sa1100ir [NET]
See drivers/net/irda/sa1100_ir.c.
- sbni= [NET] Granch SBNI12 leased line adapter
-
sched_verbose [KNL] Enables verbose scheduler debug messages.
schedstats= [KNL,X86] Enable or disable scheduled statistics.
1. User addresses not accessed by the kernel but used for address space
management (e.g. ``mprotect()``, ``madvise()``). The use of valid
- tagged pointers in this context is allowed with the exception of
- ``brk()``, ``mmap()`` and the ``new_address`` argument to
- ``mremap()`` as these have the potential to alias with existing
- user addresses.
-
- NOTE: This behaviour changed in v5.6 and so some earlier kernels may
- incorrectly accept valid tagged pointers for the ``brk()``,
- ``mmap()`` and ``mremap()`` system calls.
+ tagged pointers in this context is allowed with these exceptions:
+
+ - ``brk()``, ``mmap()`` and the ``new_address`` argument to
+ ``mremap()`` as these have the potential to alias with existing
+ user addresses.
+
+ NOTE: This behaviour changed in v5.6 and so some earlier kernels may
+ incorrectly accept valid tagged pointers for the ``brk()``,
+ ``mmap()`` and ``mremap()`` system calls.
+
+ - The ``range.start``, ``start`` and ``dst`` arguments to the
+ ``UFFDIO_*`` ``ioctl()``s used on a file descriptor obtained from
+ ``userfaultfd()``, as fault addresses subsequently obtained by reading
+ the file descriptor will be untagged, which may otherwise confuse
+ tag-unaware programs.
+
+ NOTE: This behaviour changed in v5.14 and so some earlier kernels may
+ incorrectly accept valid tagged pointers for this system call.
2. User addresses accessed by the kernel (e.g. ``write()``). This ABI
relaxation is disabled by default and the application thread needs to
maximum: 32
maxItems: 1
+ power-domains:
+ description:
+ Power domain provider node and an args specifier containing
+ the can device id value.
+ maxItems: 1
+
can-transceiver:
$ref: can-transceiver.yaml#
- const: ipa-setup-ready
interconnects:
- minItems: 2
items:
- - description: Path leading to system memory
- - description: Path between the AP and IPA config space
- - description: Path leading to internal memory
+ - description: Interconnect path between IPA and main memory
+ - description: Interconnect path between IPA and internal memory
+ - description: Interconnect path between IPA and the AP subsystem
interconnect-names:
- minItems: 2
items:
- const: memory
- - const: config
- const: imem
+ - const: config
qcom,smem-states:
$ref: /schemas/types.yaml#/definitions/phandle-array
interconnects =
<&rsc_hlos MASTER_IPA &rsc_hlos SLAVE_EBI1>,
- <&rsc_hlos MASTER_APPSS_PROC &rsc_hlos SLAVE_IPA_CFG>,
- <&rsc_hlos MASTER_IPA &rsc_hlos SLAVE_IMEM>;
+ <&rsc_hlos MASTER_IPA &rsc_hlos SLAVE_IMEM>,
+ <&rsc_hlos MASTER_APPSS_PROC &rsc_hlos SLAVE_IPA_CFG>;
interconnect-names = "memory",
- "config",
- "imem";
+ "imem",
+ "config";
qcom,smem-states = <&ipa_smp2p_out 0>,
<&ipa_smp2p_out 1>;
ports:
$ref: /schemas/graph.yaml#/properties/ports
- properties:
+ patternProperties:
port(@[0-9a-f]+)?:
$ref: audio-graph-port.yaml#
unevaluatedProperties: false
This option was added in bonding version 3.2.0. The "follow"
policy was added in bonding version 3.3.0.
+lacp_active
+ Option specifying whether to send LACPDU frames periodically.
+
+ off or 0
+ LACPDU frames acts as "speak when spoken to".
+
+ on or 1
+ LACPDU frames are sent along the configured links
+ periodically. See lacp_rate for more details.
+
+ The default is on.
+
lacp_rate
Option specifying the rate in which we'll ask our link partner
ret #-1
drop: ret #0
-**(Accelerated) VLAN w/ id 10**::
-
- ld vlan_tci
- jneq #10, drop
- ret #-1
- drop: ret #0
-
**icmp random packet sampling, 1 in 4**::
ldh [12]
bad: ret #0 /* SECCOMP_RET_KILL_THREAD */
good: ret #0x7fff0000 /* SECCOMP_RET_ALLOW */
+Examples for low-level BPF extension:
+
+**Packet for interface index 13**::
+
+ ld ifidx
+ jneq #13, drop
+ ret #-1
+ drop: ret #0
+
+**(Accelerated) VLAN w/ id 10**::
+
+ ld vlan_tci
+ jneq #10, drop
+ ret #-1
+ drop: ret #0
+
The above example code can be placed into a file (here called "foo"), and
then be passed to the bpf_asm tool for generating opcodes, output that xt_bpf
and cls_bpf understands and can directly be loaded with. Example with above
gets overloaded very easily and netdev@vger really doesn't need more
traffic if we can help it.
+netdevsim is great, can I extend it for my out-of-tree tests?
+-------------------------------------------------------------
+
+No, `netdevsim` is a test vehicle solely for upstream tests.
+(Please add your tests under tools/testing/selftests/.)
+
+We also give no guarantees that `netdevsim` won't change in the future
+in a way which would break what would normally be considered uAPI.
+
+Is netdevsim considered a "user" of an API?
+-------------------------------------------
+
+Linux kernel has a long standing rule that no API should be added unless
+it has a real, in-tree user. Mock-ups and tests based on `netdevsim` are
+strongly encouraged when adding new APIs, but `netdevsim` in itself
+is **not** considered a use case/user.
+
Any other tips to help ensure my net/net-next patch gets OK'd?
--------------------------------------------------------------
Attention to detail. Re-read your own work as if you were the
state (f.e. VLAN).
IF_OPER_TESTING (4):
- Unused in current kernel.
+ Interface is in testing mode, for example executing driver self-tests
+ or media (cable) test. It can't be used for normal traffic until tests
+ complete.
IF_OPER_DORMANT (5):
Interface is L1 up, but waiting for an external event, f.e. for a
Note that for certain kind of soft-devices, which are not managing any
real hardware, it is possible to set this bit from userspace. One
-should use TVL IFLA_CARRIER to do so.
+should use TLV IFLA_CARRIER to do so.
netif_carrier_ok() can be used to query that bit.
with the event, in nanoseconds. May be
modified by .usecs to have timestamps
interpreted as microseconds.
- cpu int the cpu on which the event occurred.
+ common_cpu int the cpu on which the event occurred.
====================== ==== =======================================
Extended error information
use PPIs designated for specific cpus. The irq field is interpreted
like this::
- Â bits: | 31 ... 28 | 27 ... 24 | 23 ... 16 | 15 ... 0 |
+ bits: | 31 ... 28 | 27 ... 24 | 23 ... 16 | 15 ... 0 |
field: | vcpu2_index | irq_type | vcpu_index | irq_id |
The irq_type field has the following values:
Errors:
====== ============================================================
- Â ENOENT Â Â no such register
- Â EINVAL Â Â invalid register ID, or no such register or used with VMs in
+ ENOENT no such register
+ EINVAL invalid register ID, or no such register or used with VMs in
protected virtualization mode on s390
- Â EPERM Â Â Â (arm64) register access not allowed before vcpu finalization
+ EPERM (arm64) register access not allowed before vcpu finalization
====== ============================================================
(These error codes are indicative only: do not rely on a specific error
Errors include:
======== ============================================================
- Â ENOENT Â Â no such register
- Â EINVAL Â Â invalid register ID, or no such register or used with VMs in
+ ENOENT no such register
+ EINVAL invalid register ID, or no such register or used with VMs in
protected virtualization mode on s390
- Â EPERM Â Â Â (arm64) register access not allowed before vcpu finalization
+ EPERM (arm64) register access not allowed before vcpu finalization
======== ============================================================
(These error codes are indicative only: do not rely on a specific error
Errors:
====== =================================================================
- Â EINVAL Â Â Â the target is unknown, or the combination of features is invalid.
- Â ENOENT Â Â Â a features bit specified is unknown.
+ EINVAL the target is unknown, or the combination of features is invalid.
+ ENOENT a features bit specified is unknown.
====== =================================================================
This tells KVM what type of CPU to present to the guest, and what
-optional features it should have. Â This will cause a reset of the cpu
-registers to their initial values. Â If this is not called, KVM_RUN will
+optional features it should have. This will cause a reset of the cpu
+registers to their initial values. If this is not called, KVM_RUN will
return ENOEXEC for that vcpu.
The initial values are defined as:
Errors:
===== ==============================================================
- Â E2BIG Â Â Â Â the reg index list is too big to fit in the array specified by
- Â Â Â Â Â Â Â Â Â Â Â Â the user (the number required will be written into n).
+ E2BIG the reg index list is too big to fit in the array specified by
+ the user (the number required will be written into n).
===== ==============================================================
::
ARM/arm64 divides the id field into two parts, a device id and an
address type id specific to the individual device::
- Â bits: | 63 ... 32 | 31 ... 16 | 15 ... 0 |
+ bits: | 63 ... 32 | 31 ... 16 | 15 ... 0 |
field: | 0x00000000 | device id | addr type id |
ARM/arm64 currently only require this when using the in-kernel GIC
trap and emulate MSRs that are outside of the scope of KVM as well as
limit the attack surface on KVM's MSR emulation code.
-8.28 KVM_CAP_ENFORCE_PV_CPUID
+8.28 KVM_CAP_ENFORCE_PV_FEATURE_CPUID
-----------------------------
Architectures: x86
F: include/uapi/linux/wmi.h
ACRN HYPERVISOR SERVICE MODULE
-M: Shuo Liu <shuo.a.liu@intel.com>
+M: Fei Li <fei1.li@intel.com>
L: acrn-dev@lists.projectacrn.org (subscribers-only)
S: Supported
W: https://projectacrn.org
F: drivers/input/touchscreen/goodix.c
GOOGLE ETHERNET DRIVERS
-M: Catherine Sullivan <csully@google.com>
-R: Sagi Shahar <sagis@google.com>
-R: Jon Olson <jonolson@google.com>
+M: Jeroen de Borst <jeroendb@google.com>
+R: Catherine Sullivan <csully@google.com>
+R: David Awogbemila <awogbemila@google.com>
L: netdev@vger.kernel.org
S: Supported
F: Documentation/networking/device_drivers/ethernet/google/gve.rst
S: Supported
F: drivers/net/phy/mxl-gpy.c
+MCAB MICROCHIP CAN BUS ANALYZER TOOL DRIVER
+R: Yasushi SHOJI <yashi@spacecubics.com>
+L: linux-can@vger.kernel.org
+S: Maintained
+F: drivers/net/can/usb/mcba_usb.c
+
MCAN MMIO DEVICE DRIVER
M: Chandrasekar Ramakrishnan <rcsekar@samsung.com>
L: linux-can@vger.kernel.org
L: amd-gfx@lists.freedesktop.org
S: Supported
T: git https://gitlab.freedesktop.org/agd5f/linux.git
+B: https://gitlab.freedesktop.org/drm/amd/-/issues
+C: irc://irc.oftc.net/radeon
F: drivers/gpu/drm/amd/
F: drivers/gpu/drm/radeon/
F: include/uapi/drm/amdgpu_drm.h
L: linux-usb@vger.kernel.org
S: Maintained
F: Documentation/devicetree/bindings/phy/hisilicon,hi3670-usb3.yaml
-F: drivers/phy/hisilicon/phy-kirin970-usb3.c
+F: drivers/phy/hisilicon/phy-hi3670-usb3.c
USB ISP116X DRIVER
M: Olav Kongas <ok@artecdesign.ee>
S: Supported
F: drivers/ptp/ptp_vmw.c
+VMWARE VMCI DRIVER
+M: Jorgen Hansen <jhansen@vmware.com>
+M: Vishnu Dasa <vdasa@vmware.com>
+L: linux-kernel@vger.kernel.org
+L: pv-drivers@vmware.com (private)
+S: Maintained
+F: drivers/misc/vmw_vmci/
+
VMWARE VMMOUSE SUBDRIVER
M: "VMware Graphics" <linux-graphics-maintainer@vmware.com>
M: "VMware, Inc." <pv-drivers@vmware.com>
VERSION = 5
PATCHLEVEL = 14
SUBLEVEL = 0
-EXTRAVERSION = -rc2
+EXTRAVERSION = -rc4
NAME = Opossums on Parade
# *DOCUMENTATION*
PHONY += scripts_basic
scripts_basic:
$(Q)$(MAKE) $(build)=scripts/basic
- $(Q)rm -f .tmp_quiet_recordmcount
PHONY += outputmakefile
ifdef building_out_of_srctree
select PCI_SYSCALL if PCI
select HAVE_AOUT
select HAVE_ASM_MODVERSIONS
- select HAVE_IDE
select HAVE_PCSPKR_PLATFORM
select HAVE_PERF_EVENTS
select NEED_DMA_MAP_STATE
will run faster if you say N here.
See also the SMP-HOWTO available at
- <http://www.tldp.org/docs.html#howto>.
+ <https://www.tldp.org/docs.html#howto>.
If you don't know what to do here, say N.
#include "ksize.h"
extern unsigned long switch_to_osf_pal(unsigned long nr,
- struct pcb_struct * pcb_va, struct pcb_struct * pcb_pa,
+ struct pcb_struct *pcb_va, struct pcb_struct *pcb_pa,
unsigned long *vptb);
extern void move_stack(unsigned long new_stack);
START_ADDR KSEG address of the entry point of kernel code.
ZERO_PGE KSEG address of page full of zeroes, but
- upon entry to kerne cvan be expected
+ upon entry to kernel, it can be expected
to hold the parameter list and possible
INTRD information.
__attribute__ ((format (printf, 1, 2)));
/*
- * gzip delarations
+ * gzip declarations
*/
#define OF(args) args
#define STATIC static
CONFIG_ALPHA_LEGACY_START_ADDRESS=y
CONFIG_MATHEMU=y
CONFIG_CRYPTO_HMAC=y
+CONFIG_DEVTMPFS=y
#include <uapi/asm/compiler.h>
-/* Some idiots over in <linux/compiler.h> thought inline should imply
- always_inline. This breaks stuff. We'll include this file whenever
- we run into such problems. */
-
-#include <linux/compiler.h>
-#undef inline
-#undef __inline__
-#undef __inline
-#undef __always_inline
-#define __always_inline inline __attribute__((always_inline))
-
#endif /* __ALPHA_COMPILER_H */
return AUDIT_ARCH_ALPHA;
}
+static inline long syscall_get_return_value(struct task_struct *task,
+ struct pt_regs *regs)
+{
+ return regs->r0;
+}
+
#endif /* _ASM_ALPHA_SYSCALL_H */
#define SO_NETNS_COOKIE 71
+#define SO_BUF_LOCK 72
+
#if !defined(__KERNEL__)
#if __BITS_PER_LONG == 64
return -EFAULT;
state = ¤t_thread_info()->ieee_state;
- /* Update softare trap enable bits. */
+ /* Update software trap enable bits. */
*state = (*state & ~IEEE_SW_MASK) | (swcr & IEEE_SW_MASK);
/* Update the real fpcr. */
state = ¤t_thread_info()->ieee_state;
exc &= IEEE_STATUS_MASK;
- /* Update softare trap enable bits. */
+ /* Update software trap enable bits. */
swcr = (*state & IEEE_SW_MASK) | exc;
*state |= exc;
* Check that CPU performance counters are supported.
* - currently support EV67 and later CPUs.
* - actually some later revisions of the EV6 have the same PMC model as the
- * EV67 but we don't do suffiently deep CPU detection to detect them.
+ * EV67 but we don't do sufficiently deep CPU detection to detect them.
* Bad luck to the very few people who might have one, I guess.
*/
static int supported_cpu(void)
childstack->r26 = (unsigned long) ret_from_kernel_thread;
childstack->r9 = usp; /* function */
childstack->r10 = kthread_arg;
- childregs->hae = alpha_mv.hae_cache,
+ childregs->hae = alpha_mv.hae_cache;
childti->pcb.usp = 0;
return 0;
}
i, cluster->usage, cluster->start_pfn,
cluster->start_pfn + cluster->numpages);
- /* Bit 0 is console/PALcode reserved. Bit 1 is
- non-volatile memory -- we might want to mark
- this for later. */
- if (cluster->usage & 3)
- continue;
-
end = cluster->start_pfn + cluster->numpages;
if (end > max_low_pfn)
max_low_pfn = end;
memblock_add(PFN_PHYS(cluster->start_pfn),
cluster->numpages << PAGE_SHIFT);
+
+ /* Bit 0 is console/PALcode reserved. Bit 1 is
+ non-volatile memory -- we might want to mark
+ this for later. */
+ if (cluster->usage & 3)
+ memblock_reserve(PFN_PHYS(cluster->start_pfn),
+ cluster->numpages << PAGE_SHIFT);
}
/*
smp_send_stop(void)
{
cpumask_t to_whom;
- cpumask_copy(&to_whom, cpu_possible_mask);
+ cpumask_copy(&to_whom, cpu_online_mask);
cpumask_clear_cpu(smp_processor_id(), &to_whom);
#ifdef DEBUG_IPI_MSG
if (hard_smp_processor_id() != boot_cpu_id)
/* Use default IO. */
pci_add_resource(&bridge->windows, &ioport_resource);
- /* Irongate PCI memory aperture, calculate requred size before
+ /* Irongate PCI memory aperture, calculate required size before
setting it up. */
pci_add_resource(&bridge->windows, &irongate_mem);
long error;
/* Check the UAC bits to decide what the user wants us to do
- with the unaliged access. */
+ with the unaligned access. */
if (!(current_thread_info()->status & TS_UAC_NOPRINT)) {
if (__ratelimit(&ratelimit)) {
long do_alpha_fp_emul_imprecise(struct pt_regs *, unsigned long);
long do_alpha_fp_emul(unsigned long);
-int init_module(void)
+static int alpha_fp_emul_init_module(void)
{
save_emul_imprecise = alpha_fp_emul_imprecise;
save_emul = alpha_fp_emul;
alpha_fp_emul = do_alpha_fp_emul;
return 0;
}
+module_init(alpha_fp_emul_init_module);
-void cleanup_module(void)
+static void alpha_fp_emul_cleanup_module(void)
{
alpha_fp_emul_imprecise = save_emul_imprecise;
alpha_fp_emul = save_emul;
}
+module_exit(alpha_fp_emul_cleanup_module);
#undef alpha_fp_emul_imprecise
#define alpha_fp_emul_imprecise do_alpha_fp_emul_imprecise
egress:
return si_code;
}
+
+EXPORT_SYMBOL(__udiv_qrnnd);
select HAVE_FUNCTION_TRACER if !XIP_KERNEL
select HAVE_GCC_PLUGINS
select HAVE_HW_BREAKPOINT if PERF_EVENTS && (CPU_V6 || CPU_V6K || CPU_V7)
- select HAVE_IDE if PCI || ISA || PCMCIA
select HAVE_IRQ_TIME_ACCOUNTING
select HAVE_KERNEL_GZIP
select HAVE_KERNEL_LZ4
bool "FootBridge"
select CPU_SA110
select FOOTBRIDGE
- select HAVE_IDE
select NEED_MACH_IO_H if !MMU
select NEED_MACH_MEMORY_H
help
select GENERIC_IRQ_MULTI_HANDLER
select GPIO_PXA
select GPIOLIB
- select HAVE_IDE
select IRQ_DOMAIN
select PLAT_PXA
select SPARSE_IRQ
select ARM_HAS_SG_CHAIN
select CPU_SA110
select FIQ
- select HAVE_IDE
select HAVE_PATA_PLATFORM
select ISA_DMA_API
select LEGACY_TIMER_TICK
select CPU_SA1100
select GENERIC_IRQ_MULTI_HANDLER
select GPIOLIB
- select HAVE_IDE
select IRQ_DOMAIN
select ISA
select NEED_MACH_MEMORY_H
select GENERIC_IRQ_CHIP
select GENERIC_IRQ_MULTI_HANDLER
select GPIOLIB
- select HAVE_IDE
select HAVE_LEGACY_CLK
select IRQ_DOMAIN
select NEED_MACH_IO_H if PCCARD
select PM_GENERIC_DOMAINS_OF if PM && OF
select REGMAP_MMIO
select RESET_CONTROLLER
- select HAVE_IDE
select PINCTRL_SINGLE
if ARCH_DAVINCI
fallthrough; /* ??? */
case 256:
vram_size += PAGE_SIZE * 256;
+ break;
default:
break;
}
rn = arm_bpf_get_reg32(src_lo, tmp2[1], ctx);
emit_ldx_r(dst, rn, off, ctx, BPF_SIZE(code));
break;
+ /* speculation barrier */
+ case BPF_ST | BPF_NOSPEC:
+ break;
/* ST: *(size *)(dst + off) = imm */
case BPF_ST | BPF_MEM | BPF_W:
case BPF_ST | BPF_MEM | BPF_H:
};
flexcan1: can@308c0000 {
- compatible = "fsl,imx8mp-flexcan", "fsl,imx6q-flexcan";
+ compatible = "fsl,imx8mp-flexcan";
reg = <0x308c0000 0x10000>;
interrupts = <GIC_SPI 142 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&clk IMX8MP_CLK_IPG_ROOT>,
};
flexcan2: can@308d0000 {
- compatible = "fsl,imx8mp-flexcan", "fsl,imx6q-flexcan";
+ compatible = "fsl,imx8mp-flexcan";
reg = <0x308d0000 0x10000>;
interrupts = <GIC_SPI 144 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&clk IMX8MP_CLK_IPG_ROOT>,
status = "okay";
extcon = <&usb2_id>;
- usb@7600000 {
+ dwc3@7600000 {
extcon = <&usb2_id>;
dr_mode = "otg";
maximum-speed = "high-speed";
status = "okay";
extcon = <&usb3_id>;
- usb@6a00000 {
+ dwc3@6a00000 {
extcon = <&usb3_id>;
dr_mode = "otg";
};
resets = <&gcc GCC_USB0_BCR>;
status = "disabled";
- dwc_0: usb@8a00000 {
+ dwc_0: dwc3@8a00000 {
compatible = "snps,dwc3";
reg = <0x8a00000 0xcd00>;
interrupts = <GIC_SPI 140 IRQ_TYPE_LEVEL_HIGH>;
resets = <&gcc GCC_USB1_BCR>;
status = "disabled";
- dwc_1: usb@8c00000 {
+ dwc_1: dwc3@8c00000 {
compatible = "snps,dwc3";
reg = <0x8c00000 0xcd00>;
interrupts = <GIC_SPI 99 IRQ_TYPE_LEVEL_HIGH>;
power-domains = <&gcc USB30_GDSC>;
status = "disabled";
- usb@6a00000 {
+ dwc3@6a00000 {
compatible = "snps,dwc3";
reg = <0x06a00000 0xcc00>;
interrupts = <0 131 IRQ_TYPE_LEVEL_HIGH>;
qcom,select-utmi-as-pipe-clk;
status = "disabled";
- usb@7600000 {
+ dwc3@7600000 {
compatible = "snps,dwc3";
reg = <0x07600000 0xcc00>;
interrupts = <0 138 IRQ_TYPE_LEVEL_HIGH>;
resets = <&gcc GCC_USB_30_BCR>;
- usb3_dwc3: usb@a800000 {
+ usb3_dwc3: dwc3@a800000 {
compatible = "snps,dwc3";
reg = <0x0a800000 0xcd00>;
interrupts = <GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>;
&usb3 {
status = "okay";
- usb@7580000 {
+ dwc3@7580000 {
dr_mode = "host";
};
};
assigned-clock-rates = <19200000>, <200000000>;
status = "disabled";
- usb@7580000 {
+ dwc3@7580000 {
compatible = "snps,dwc3";
reg = <0x07580000 0xcd00>;
interrupts = <GIC_SPI 26 IRQ_TYPE_LEVEL_HIGH>;
assigned-clock-rates = <19200000>, <133333333>;
status = "disabled";
- usb@78c0000 {
+ dwc3@78c0000 {
compatible = "snps,dwc3";
reg = <0x078c0000 0xcc00>;
interrupts = <GIC_SPI 44 IRQ_TYPE_LEVEL_HIGH>;
no-map;
};
- ipa_fw_mem: memory@8b700000 {
- reg = <0 0x8b700000 0 0x10000>;
- no-map;
- };
-
rmtfs_mem: memory@94600000 {
compatible = "qcom,rmtfs-mem";
reg = <0x0 0x94600000 0x0 0x200000>;
<&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_USB3 0>;
interconnect-names = "usb-ddr", "apps-usb";
- usb_1_dwc3: usb@a600000 {
+ usb_1_dwc3: dwc3@a600000 {
compatible = "snps,dwc3";
reg = <0 0x0a600000 0 0xe000>;
interrupts = <GIC_SPI 133 IRQ_TYPE_LEVEL_HIGH>;
#include <dt-bindings/clock/qcom,gcc-sc7280.h>
#include <dt-bindings/clock/qcom,rpmh.h>
-#include <dt-bindings/interconnect/qcom,sc7280.h>
#include <dt-bindings/interrupt-controller/arm-gic.h>
#include <dt-bindings/mailbox/qcom-ipcc.h>
#include <dt-bindings/power/qcom-aoss-qmp.h>
no-map;
reg = <0x0 0x80b00000 0x0 0x100000>;
};
-
- ipa_fw_mem: memory@8b700000 {
- reg = <0 0x8b700000 0 0x10000>;
- no-map;
- };
};
cpus {
qcom,bcm-voters = <&apps_bcm_voter>;
};
- ipa: ipa@1e40000 {
- compatible = "qcom,sc7280-ipa";
-
- iommus = <&apps_smmu 0x480 0x0>,
- <&apps_smmu 0x482 0x0>;
- reg = <0 0x1e40000 0 0x8000>,
- <0 0x1e50000 0 0x4ad0>,
- <0 0x1e04000 0 0x23000>;
- reg-names = "ipa-reg",
- "ipa-shared",
- "gsi";
-
- interrupts-extended = <&intc 0 654 IRQ_TYPE_EDGE_RISING>,
- <&intc 0 432 IRQ_TYPE_LEVEL_HIGH>,
- <&ipa_smp2p_in 0 IRQ_TYPE_EDGE_RISING>,
- <&ipa_smp2p_in 1 IRQ_TYPE_EDGE_RISING>;
- interrupt-names = "ipa",
- "gsi",
- "ipa-clock-query",
- "ipa-setup-ready";
-
- clocks = <&rpmhcc RPMH_IPA_CLK>;
- clock-names = "core";
-
- interconnects = <&aggre2_noc MASTER_IPA 0 &mc_virt SLAVE_EBI1 0>,
- <&gem_noc MASTER_APPSS_PROC 0 &cnoc2 SLAVE_IPA_CFG 0>;
- interconnect-names = "memory",
- "config";
-
- qcom,smem-states = <&ipa_smp2p_out 0>,
- <&ipa_smp2p_out 1>;
- qcom,smem-state-names = "ipa-clock-enabled-valid",
- "ipa-clock-enabled";
-
- status = "disabled";
- };
-
tcsr_mutex: hwlock@1f40000 {
compatible = "qcom,tcsr-mutex", "syscon";
reg = <0 0x01f40000 0 0x40000>;
<&gladiator_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_USB3_0 0>;
interconnect-names = "usb-ddr", "apps-usb";
- usb_1_dwc3: usb@a600000 {
+ usb_1_dwc3: dwc3@a600000 {
compatible = "snps,dwc3";
reg = <0 0x0a600000 0 0xcd00>;
interrupts = <GIC_SPI 133 IRQ_TYPE_LEVEL_HIGH>;
<&gladiator_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_USB3_1 0>;
interconnect-names = "usb-ddr", "apps-usb";
- usb_2_dwc3: usb@a800000 {
+ usb_2_dwc3: dwc3@a800000 {
compatible = "snps,dwc3";
reg = <0 0x0a800000 0 0xcd00>;
interrupts = <GIC_SPI 138 IRQ_TYPE_LEVEL_HIGH>;
resets = <&gcc GCC_USB30_PRIM_BCR>;
- usb_1_dwc3: usb@a600000 {
+ usb_1_dwc3: dwc3@a600000 {
compatible = "snps,dwc3";
reg = <0 0x0a600000 0 0xcd00>;
interrupts = <GIC_SPI 133 IRQ_TYPE_LEVEL_HIGH>;
vma_shift = get_vma_page_shift(vma, hva);
}
- shared = (vma->vm_flags & VM_PFNMAP);
+ shared = (vma->vm_flags & VM_SHARED);
switch (vma_shift) {
#ifndef __PAGETABLE_PMD_FOLDED
return ret;
break;
+ /* speculation barrier */
+ case BPF_ST | BPF_NOSPEC:
+ /*
+ * Nothing required here.
+ *
+ * In case of arm64, we rely on the firmware mitigation of
+ * Speculative Store Bypass as controlled via the ssbd kernel
+ * parameter. Whenever the mitigation is enabled, it works
+ * for all of the kernel code with no need to provide any
+ * additional instructions.
+ */
+ break;
+
/* ST: *(size *)(dst + off) = imm */
case BPF_ST | BPF_MEM | BPF_W:
case BPF_ST | BPF_MEM | BPF_H:
bool "H8MAX"
select H83069
select RAMKERNEL
- select HAVE_IDE
help
H8MAX Evaluation Board Support
More Information. (Japanese Only)
select HAVE_ASM_MODVERSIONS
select HAVE_UNSTABLE_SCHED_CLOCK
select HAVE_EXIT_THREAD
- select HAVE_IDE
select HAVE_KPROBES
select HAVE_KRETPROBES
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_DEBUG_BUGVERBOSE
select HAVE_EFFICIENT_UNALIGNED_ACCESS if !CPU_HAS_NO_UNALIGNED
select HAVE_FUTEX_CMPXCHG if MMU && FUTEX
- select HAVE_IDE
select HAVE_MOD_ARCH_SPECIFIC
select HAVE_UID16
select MMU_GATHER_NO_RANGE if MMU
depends on MMU
select MMU_MOTOROLA if MMU
select HAVE_ARCH_NVRAM_OPS
+ select HAVE_PATA_PLATFORM
select LEGACY_TIMER_TICK
help
This option enables support for the Apple Macintosh series of
DEFINE_CLK(sys, "sys.0", MCF_BUSCLK);
static struct clk_lookup m525x_clk_lookup[] = {
- CLKDEV_INIT(NULL, "pll.0", &pll),
+ CLKDEV_INIT(NULL, "pll.0", &clk_pll),
CLKDEV_INIT(NULL, "sys.0", &clk_sys),
CLKDEV_INIT("mcftmr.0", NULL, &clk_sys),
CLKDEV_INIT("mcftmr.1", NULL, &clk_sys),
select HAVE_FUNCTION_TRACER
select HAVE_GCC_PLUGINS
select HAVE_GENERIC_VDSO
- select HAVE_IDE
select HAVE_IOREMAP_PROT
select HAVE_IRQ_EXIT_ON_IRQ_STACK
select HAVE_IRQ_TIME_ACCOUNTING
#define SO_NETNS_COOKIE 71
+#define SO_BUF_LOCK 72
+
#if !defined(__KERNEL__)
#if __BITS_PER_LONG == 64
}
break;
+ case BPF_ST | BPF_NOSPEC: /* speculation barrier */
+ break;
+
case BPF_ST | BPF_B | BPF_MEM:
case BPF_ST | BPF_H | BPF_MEM:
case BPF_ST | BPF_W | BPF_MEM:
vma = find_vma(mm, addr);
if (TASK_SIZE - len >= addr &&
- (!vma || addr + len <= vma->vm_start))
+ (!vma || addr + len <= vm_start_gap(vma)))
return addr;
}
def_bool y
select ARCH_32BIT_OFF_T if !64BIT
select ARCH_MIGHT_HAVE_PC_PARPORT
- select HAVE_IDE
select HAVE_FUNCTION_TRACER
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_SYSCALL_TRACEPOINTS
#define SO_NETNS_COOKIE 0x4045
+#define SO_BUF_LOCK 0x4046
+
#if !defined(__KERNEL__)
#if __BITS_PER_LONG == 64
select HAVE_HARDLOCKUP_DETECTOR_ARCH if PPC_BOOK3S_64 && SMP
select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI && !HAVE_HARDLOCKUP_DETECTOR_ARCH
select HAVE_HW_BREAKPOINT if PERF_EVENTS && (PPC_BOOK3S || PPC_8xx)
- select HAVE_IDE
select HAVE_IOREMAP_PROT
select HAVE_IRQ_EXIT_ON_IRQ_STACK
select HAVE_IRQ_TIME_ACCOUNTING
ccflags-y := -shared -fno-common -fno-builtin -nostdlib \
-Wl,-soname=linux-vdso64.so.1 -Wl,--hash-style=both
+
+# Go prior to 1.16.x assumes r30 is not clobbered by any VDSO code. That used to be true
+# by accident when the VDSO was hand-written asm code, but may not be now that the VDSO is
+# compiler generated. To avoid breaking Go tell GCC not to use r30. Impact on code
+# generation is minimal, it will just use r29 instead.
+ccflags-y += $(call cc-option, -ffixed-r30)
+
asflags-y := -D__VDSO64__ -s
targets += vdso64.lds
HFSCR_DSCR | HFSCR_VECVSX | HFSCR_FP | HFSCR_PREFIX;
if (cpu_has_feature(CPU_FTR_HVMODE)) {
vcpu->arch.hfscr &= mfspr(SPRN_HFSCR);
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
if (cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
vcpu->arch.hfscr |= HFSCR_TM;
+#endif
}
if (cpu_has_feature(CPU_FTR_TM_COMP))
vcpu->arch.hfscr |= HFSCR_TM;
if (vcpu->kvm->arch.l1_ptcr == 0)
return H_NOT_AVAILABLE;
+ if (MSR_TM_TRANSACTIONAL(vcpu->arch.shregs.msr))
+ return H_BAD_MODE;
+
/* copy parameters in */
hv_ptr = kvmppc_get_gpr(vcpu, 4);
regs_ptr = kvmppc_get_gpr(vcpu, 5);
if (l2_hv.vcpu_token >= NR_CPUS)
return H_PARAMETER;
+ /*
+ * L1 must have set up a suspended state to enter the L2 in a
+ * transactional state, and only in that case. These have to be
+ * filtered out here to prevent causing a TM Bad Thing in the
+ * host HRFID. We could synthesize a TM Bad Thing back to the L1
+ * here but there doesn't seem like much point.
+ */
+ if (MSR_TM_SUSPENDED(vcpu->arch.shregs.msr)) {
+ if (!MSR_TM_ACTIVE(l2_regs.msr))
+ return H_BAD_MODE;
+ } else {
+ if (l2_regs.msr & MSR_TS_MASK)
+ return H_BAD_MODE;
+ if (WARN_ON_ONCE(vcpu->arch.shregs.msr & MSR_TS_MASK))
+ return H_BAD_MODE;
+ }
+
/* translate lpid */
l2 = kvmhv_get_nested(vcpu->kvm, l2_hv.lpid, true);
if (!l2)
*/
mtspr(SPRN_HDEC, hdec);
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+tm_return_to_guest:
+#endif
mtspr(SPRN_DAR, vcpu->arch.shregs.dar);
mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr);
mtspr(SPRN_SRR0, vcpu->arch.shregs.srr0);
* is in real suspend mode and is trying to transition to
* transactional mode.
*/
- if (local_paca->kvm_hstate.fake_suspend &&
+ if (!local_paca->kvm_hstate.fake_suspend &&
(vcpu->arch.shregs.msr & MSR_TS_S)) {
if (kvmhv_p9_tm_emulation_early(vcpu)) {
- /* Prevent it being handled again. */
- trap = 0;
+ /*
+ * Go straight back into the guest with the
+ * new NIP/MSR as set by TM emulation.
+ */
+ mtspr(SPRN_HSRR0, vcpu->arch.regs.nip);
+ mtspr(SPRN_HSRR1, vcpu->arch.shregs.msr);
+
+ /*
+ * tm_return_to_guest re-loads SRR0/1, DAR,
+ * DSISR after RI is cleared, in case they had
+ * been clobbered by a MCE.
+ */
+ __mtmsrd(0, 1); /* clear RI */
+ goto tm_return_to_guest;
}
}
#endif
* If we are in real mode, only switch MMU on after the MMU is
* switched to host, to avoid the P9_RADIX_PREFETCH_BUG.
*/
+ if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) &&
+ vcpu->arch.shregs.msr & MSR_TS_MASK)
+ msr |= MSR_TS_S;
+
__mtmsrd(msr, 0);
end_timing(vcpu);
* value so we can restore it on the way out.
*/
orig_rets = args.rets;
+ if (be32_to_cpu(args.nargs) >= ARRAY_SIZE(args.args)) {
+ /*
+ * Don't overflow our args array: ensure there is room for
+ * at least rets[0] (even if the call specifies 0 nret).
+ *
+ * Each handler must then check for the correct nargs and nret
+ * values, but they may always return failure in rets[0].
+ */
+ rc = -EINVAL;
+ goto fail;
+ }
args.rets = &args.args[be32_to_cpu(args.nargs)];
mutex_lock(&vcpu->kvm->arch.rtas_token_lock);
fail:
/*
* We only get here if the guest has called RTAS with a bogus
- * args pointer. That means we can't get to the args, and so we
- * can't fail the RTAS call. So fail right out to userspace,
- * which should kill the guest.
+ * args pointer or nargs/nret values that would overflow the
+ * array. That means we can't get to the args, and so we can't
+ * fail the RTAS call. So fail right out to userspace, which
+ * should kill the guest.
+ *
+ * SLOF should actually pass the hcall return value from the
+ * rtas handler call in r3, so enter_rtas could be modified to
+ * return a failure indication in r3 and we could return such
+ * errors to the guest rather than failing to host userspace.
+ * However old guests that don't test for failure could then
+ * continue silently after errors, so for now we won't do this.
*/
return rc;
}
{
struct kvm_enable_cap cap;
r = -EFAULT;
- vcpu_load(vcpu);
if (copy_from_user(&cap, argp, sizeof(cap)))
goto out;
+ vcpu_load(vcpu);
r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
vcpu_put(vcpu);
break;
case KVM_DIRTY_TLB: {
struct kvm_dirty_tlb dirty;
r = -EFAULT;
- vcpu_load(vcpu);
if (copy_from_user(&dirty, argp, sizeof(dirty)))
goto out;
+ vcpu_load(vcpu);
r = kvm_vcpu_ioctl_dirty_tlb(vcpu, &dirty);
vcpu_put(vcpu);
break;
}
break;
+ /*
+ * BPF_ST NOSPEC (speculation barrier)
+ */
+ case BPF_ST | BPF_NOSPEC:
+ break;
+
/*
* BPF_ST(X)
*/
}
break;
+ /*
+ * BPF_ST NOSPEC (speculation barrier)
+ */
+ case BPF_ST | BPF_NOSPEC:
+ break;
+
/*
* BPF_ST(X)
*/
switch (regs->msr & SRR1_WAKEMASK) {
case SRR1_WAKEDEC:
set_dec(1);
+ break;
case SRR1_WAKEEE:
/*
* Handle these when interrupts get re-enabled and we take
#include "../../../../drivers/pci/pci.h"
DEFINE_STATIC_KEY_FALSE(shared_processor);
-EXPORT_SYMBOL_GPL(shared_processor);
+EXPORT_SYMBOL(shared_processor);
int CMO_PrPSP = -1;
int CMO_SecPSP = -1;
#define ARCH_EFI_IRQ_FLAGS_MASK (SR_IE | SR_SPIE)
-/* Load initrd at enough distance from DRAM start */
+/* Load initrd anywhere in system RAM */
static inline unsigned long efi_get_max_initrd_addr(unsigned long image_addr)
{
- return image_addr + SZ_256M;
+ return ULONG_MAX;
}
#define alloc_screen_info(x...) (&screen_info)
{
unsigned long pc = 0;
- if (likely(task && task != current && !task_is_running(task)))
+ if (likely(task && task != current && !task_is_running(task))) {
+ if (!try_get_task_stack(task))
+ return 0;
walk_stackframe(task, NULL, save_wchan, &pc);
+ put_task_stack(task);
+ }
return pc;
}
* t0 - end of uncopied dst
*/
add t0, a0, a2
- bgtu a0, t0, 5f
/*
* Use byte copy only if too small.
+ * SZREG holds 4 for RV32 and 8 for RV64
*/
- li a3, 8*SZREG /* size must be larger than size in word_copy */
+ li a3, 9*SZREG /* size must be larger than size in word_copy */
bltu a2, a3, .Lbyte_copy_tail
/*
- * Copy first bytes until dst is align to word boundary.
+ * Copy first bytes until dst is aligned to word boundary.
* a0 - start of dst
* t1 - start of aligned dst
*/
addi t1, a0, SZREG-1
andi t1, t1, ~(SZREG-1)
/* dst is already aligned, skip */
- beq a0, t1, .Lskip_first_bytes
+ beq a0, t1, .Lskip_align_dst
1:
/* a5 - one byte for copying data */
fixup lb a5, 0(a1), 10f
addi a0, a0, 1 /* dst */
bltu a0, t1, 1b /* t1 - start of aligned dst */
-.Lskip_first_bytes:
+.Lskip_align_dst:
/*
* Now dst is aligned.
* Use shift-copy if src is misaligned.
*
* a0 - start of aligned dst
* a1 - start of aligned src
- * a3 - a1 & mask:(SZREG-1)
* t0 - end of aligned dst
*/
- addi t0, t0, -(8*SZREG-1) /* not to over run */
+ addi t0, t0, -(8*SZREG) /* not to over run */
2:
fixup REG_L a4, 0(a1), 10f
fixup REG_L a5, SZREG(a1), 10f
addi a1, a1, 8*SZREG
bltu a0, t0, 2b
- addi t0, t0, 8*SZREG-1 /* revert to original value */
+ addi t0, t0, 8*SZREG /* revert to original value */
j .Lbyte_copy_tail
.Lshift_copy:
* For misaligned copy we still perform aligned word copy, but
* we need to use the value fetched from the previous iteration and
* do some shifts.
- * This is safe because reading less than a word size.
+ * This is safe because reading is less than a word size.
*
* a0 - start of aligned dst
* a1 - start of src
*/
/* calculating aligned word boundary for dst */
andi t1, t0, ~(SZREG-1)
- /* Converting unaligned src to aligned arc */
+ /* Converting unaligned src to aligned src */
andi a1, a1, ~(SZREG-1)
/*
* t3 - prev shift
* t4 - current shift
*/
- slli t3, a3, LGREG
+ slli t3, a3, 3 /* converting bytes in a3 to bits */
li a5, SZREG*8
sub t4, a5, t3
- /* Load the first word to combine with seceond word */
+ /* Load the first word to combine with second word */
fixup REG_L a5, 0(a1), 10f
3:
* a1 - start of remaining src
* t0 - end of remaining dst
*/
- bgeu a0, t0, 5f
+ bgeu a0, t0, .Lout_copy_user /* check if end of copy */
4:
fixup lb a5, 0(a1), 10f
addi a1, a1, 1 /* src */
addi a0, a0, 1 /* dst */
bltu a0, t0, 4b /* t0 - end of dst */
-5:
+.Lout_copy_user:
/* Disable access to user memory */
csrc CSR_STATUS, t6
li a0, 0
}
/*
- * The default maximal physical memory size is -PAGE_OFFSET,
- * limit the memory size via mem.
+ * The default maximal physical memory size is -PAGE_OFFSET for 32-bit kernel,
+ * whereas for 64-bit kernel, the end of the virtual address space is occupied
+ * by the modules/BPF/kernel mappings which reduces the available size of the
+ * linear mapping.
+ * Limit the memory size via mem.
*/
+#ifdef CONFIG_64BIT
+static phys_addr_t memory_limit = -PAGE_OFFSET - SZ_4G;
+#else
static phys_addr_t memory_limit = -PAGE_OFFSET;
+#endif
static int __init early_mem(char *p)
{
{
phys_addr_t vmlinux_end = __pa_symbol(&_end);
phys_addr_t vmlinux_start = __pa_symbol(&_start);
- phys_addr_t max_mapped_addr = __pa(~(ulong)0);
+ phys_addr_t __maybe_unused max_mapped_addr;
phys_addr_t dram_end;
#ifdef CONFIG_XIP_KERNEL
memblock_reserve(vmlinux_start, vmlinux_end - vmlinux_start);
dram_end = memblock_end_of_DRAM();
+
+#ifndef CONFIG_64BIT
/*
* memblock allocator is not aware of the fact that last 4K bytes of
* the addressable memory can not be mapped because of IS_ERR_VALUE
* macro. Make sure that last 4k bytes are not usable by memblock
- * if end of dram is equal to maximum addressable memory.
+ * if end of dram is equal to maximum addressable memory. For 64-bit
+ * kernel, this problem can't happen here as the end of the virtual
+ * address space is occupied by the kernel mapping then this check must
+ * be done in create_kernel_page_table.
*/
+ max_mapped_addr = __pa(~(ulong)0);
if (max_mapped_addr == (dram_end - 1))
memblock_set_current_limit(max_mapped_addr - 4096);
+#endif
min_low_pfn = PFN_UP(memblock_start_of_DRAM());
max_low_pfn = max_pfn = PFN_DOWN(dram_end);
BUG_ON((PAGE_OFFSET % PGDIR_SIZE) != 0);
BUG_ON((kernel_map.phys_addr % map_size) != 0);
+#ifdef CONFIG_64BIT
+ /*
+ * The last 4K bytes of the addressable memory can not be mapped because
+ * of IS_ERR_VALUE macro.
+ */
+ BUG_ON((kernel_map.virt_addr + kernel_map.size) > ADDRESS_SPACE_END - SZ_4K);
+#endif
+
pt_ops.alloc_pte = alloc_pte_early;
pt_ops.get_pte_virt = get_pte_virt_early;
#ifndef __PAGETABLE_PMD_FOLDED
if (start <= __pa(PAGE_OFFSET) &&
__pa(PAGE_OFFSET) < end)
start = __pa(PAGE_OFFSET);
+ if (end >= __pa(PAGE_OFFSET) + memory_limit)
+ end = __pa(PAGE_OFFSET) + memory_limit;
map_size = best_map_size(start, end - start);
for (pa = start; pa < end; pa += map_size) {
return -1;
break;
+ /* speculation barrier */
+ case BPF_ST | BPF_NOSPEC:
+ break;
+
case BPF_ST | BPF_MEM | BPF_B:
case BPF_ST | BPF_MEM | BPF_H:
case BPF_ST | BPF_MEM | BPF_W:
emit_ld(rd, 0, RV_REG_T1, ctx);
break;
+ /* speculation barrier */
+ case BPF_ST | BPF_NOSPEC:
+ break;
+
/* ST: *(size *)(dst + off) = imm */
case BPF_ST | BPF_MEM | BPF_B:
emit_imm(RV_REG_T1, imm, ctx);
KASAN_SANITIZE := n
obj-y := $(if $(CONFIG_KERNEL_UNCOMPRESSED),,decompressor.o) info.o
+obj-$(CONFIG_KERNEL_ZSTD) += clz_ctz.o
obj-all := $(obj-y) piggy.o syms.o
targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2
targets += vmlinux.bin.xz vmlinux.bin.lzma vmlinux.bin.lzo vmlinux.bin.lz4
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+#include "../../../../lib/clz_ctz.c"
CONFIG_L2TP_V3=y
CONFIG_L2TP_IP=m
CONFIG_L2TP_ETH=m
-CONFIG_BRIDGE=m
+CONFIG_BRIDGE=y
CONFIG_BRIDGE_MRP=y
CONFIG_VLAN_8021Q=m
CONFIG_VLAN_8021Q_GVRP=y
CONFIG_L2TP_V3=y
CONFIG_L2TP_IP=m
CONFIG_L2TP_ETH=m
-CONFIG_BRIDGE=m
+CONFIG_BRIDGE=y
CONFIG_BRIDGE_MRP=y
CONFIG_VLAN_8021Q=m
CONFIG_VLAN_8021Q_GVRP=y
u64 instruction_sigp_init_cpu_reset;
u64 instruction_sigp_cpu_reset;
u64 instruction_sigp_unknown;
- u64 diagnose_10;
- u64 diagnose_44;
- u64 diagnose_9c;
- u64 diagnose_9c_ignored;
- u64 diagnose_9c_forward;
- u64 diagnose_258;
- u64 diagnose_308;
- u64 diagnose_500;
- u64 diagnose_other;
+ u64 instruction_diagnose_10;
+ u64 instruction_diagnose_44;
+ u64 instruction_diagnose_9c;
+ u64 diag_9c_ignored;
+ u64 diag_9c_forward;
+ u64 instruction_diagnose_258;
+ u64 instruction_diagnose_308;
+ u64 instruction_diagnose_500;
+ u64 instruction_diagnose_other;
u64 pfault_sync;
};
.rela.dyn ALIGN(8) : { *(.rela.dyn) }
.got ALIGN(8) : { *(.got .toc) }
+ .got.plt ALIGN(8) : { *(.got.plt) }
_end = .;
PROVIDE(end = .);
.rela.dyn ALIGN(8) : { *(.rela.dyn) }
.got ALIGN(8) : { *(.got .toc) }
+ .got.plt ALIGN(8) : { *(.got.plt) }
_end = .;
PROVIDE(end = .);
start = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4];
end = vcpu->run->s.regs.gprs[vcpu->arch.sie_block->ipa & 0xf] + PAGE_SIZE;
- vcpu->stat.diagnose_10++;
+ vcpu->stat.instruction_diagnose_10++;
if (start & ~PAGE_MASK || end & ~PAGE_MASK || start >= end
|| start < 2 * PAGE_SIZE)
VCPU_EVENT(vcpu, 3, "diag page reference parameter block at 0x%llx",
vcpu->run->s.regs.gprs[rx]);
- vcpu->stat.diagnose_258++;
+ vcpu->stat.instruction_diagnose_258++;
if (vcpu->run->s.regs.gprs[rx] & 7)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
rc = read_guest(vcpu, vcpu->run->s.regs.gprs[rx], rx, &parm, sizeof(parm));
static int __diag_time_slice_end(struct kvm_vcpu *vcpu)
{
VCPU_EVENT(vcpu, 5, "%s", "diag time slice end");
- vcpu->stat.diagnose_44++;
+ vcpu->stat.instruction_diagnose_44++;
kvm_vcpu_on_spin(vcpu, true);
return 0;
}
int tid;
tid = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4];
- vcpu->stat.diagnose_9c++;
+ vcpu->stat.instruction_diagnose_9c++;
/* yield to self */
if (tid == vcpu->vcpu_id)
VCPU_EVENT(vcpu, 5,
"diag time slice end directed to %d: yield forwarded",
tid);
- vcpu->stat.diagnose_9c_forward++;
+ vcpu->stat.diag_9c_forward++;
return 0;
}
return 0;
no_yield:
VCPU_EVENT(vcpu, 5, "diag time slice end directed to %d: ignored", tid);
- vcpu->stat.diagnose_9c_ignored++;
+ vcpu->stat.diag_9c_ignored++;
return 0;
}
unsigned long subcode = vcpu->run->s.regs.gprs[reg] & 0xffff;
VCPU_EVENT(vcpu, 3, "diag ipl functions, subcode %lx", subcode);
- vcpu->stat.diagnose_308++;
+ vcpu->stat.instruction_diagnose_308++;
switch (subcode) {
case 3:
vcpu->run->s390_reset_flags = KVM_S390_RESET_CLEAR;
{
int ret;
- vcpu->stat.diagnose_500++;
+ vcpu->stat.instruction_diagnose_500++;
/* No virtio-ccw notification? Get out quickly. */
if (!vcpu->kvm->arch.css_support ||
(vcpu->run->s.regs.gprs[1] != KVM_S390_VIRTIO_CCW_NOTIFY))
case 0x500:
return __diag_virtio_hypercall(vcpu);
default:
- vcpu->stat.diagnose_other++;
+ vcpu->stat.instruction_diagnose_other++;
return -EOPNOTSUPP;
}
}
STATS_DESC_COUNTER(VCPU, instruction_sigp_init_cpu_reset),
STATS_DESC_COUNTER(VCPU, instruction_sigp_cpu_reset),
STATS_DESC_COUNTER(VCPU, instruction_sigp_unknown),
- STATS_DESC_COUNTER(VCPU, diagnose_10),
- STATS_DESC_COUNTER(VCPU, diagnose_44),
- STATS_DESC_COUNTER(VCPU, diagnose_9c),
- STATS_DESC_COUNTER(VCPU, diagnose_9c_ignored),
- STATS_DESC_COUNTER(VCPU, diagnose_9c_forward),
- STATS_DESC_COUNTER(VCPU, diagnose_258),
- STATS_DESC_COUNTER(VCPU, diagnose_308),
- STATS_DESC_COUNTER(VCPU, diagnose_500),
- STATS_DESC_COUNTER(VCPU, diagnose_other),
+ STATS_DESC_COUNTER(VCPU, instruction_diagnose_10),
+ STATS_DESC_COUNTER(VCPU, instruction_diagnose_44),
+ STATS_DESC_COUNTER(VCPU, instruction_diagnose_9c),
+ STATS_DESC_COUNTER(VCPU, diag_9c_ignored),
+ STATS_DESC_COUNTER(VCPU, diag_9c_forward),
+ STATS_DESC_COUNTER(VCPU, instruction_diagnose_258),
+ STATS_DESC_COUNTER(VCPU, instruction_diagnose_308),
+ STATS_DESC_COUNTER(VCPU, instruction_diagnose_500),
+ STATS_DESC_COUNTER(VCPU, instruction_diagnose_other),
STATS_DESC_COUNTER(VCPU, pfault_sync)
};
static_assert(ARRAY_SIZE(kvm_vcpu_stats_desc) ==
break;
}
break;
+ /*
+ * BPF_NOSPEC (speculation barrier)
+ */
+ case BPF_ST | BPF_NOSPEC:
+ break;
/*
* BPF_ST(X)
*/
select HAVE_FUTEX_CMPXCHG if FUTEX
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_HW_BREAKPOINT
- select HAVE_IDE if HAS_IOPORT_MAP
select HAVE_IOREMAP_PROT if MMU && !X2TLB
select HAVE_KERNEL_BZIP2
select HAVE_KERNEL_GZIP
select OF
select OF_PROMTREE
select HAVE_ASM_MODVERSIONS
- select HAVE_IDE
select HAVE_ARCH_KGDB if !SMP || SPARC64
select HAVE_ARCH_TRACEHOOK
select HAVE_ARCH_SECCOMP if SPARC64
#define SO_NETNS_COOKIE 0x0050
+#define SO_BUF_LOCK 0x0051
+
#if !defined(__KERNEL__)
return 1;
break;
}
+ /* speculation barrier */
+ case BPF_ST | BPF_NOSPEC:
+ break;
/* ST: *(size *)(dst + off) = imm */
case BPF_ST | BPF_MEM | BPF_W:
case BPF_ST | BPF_MEM | BPF_H:
select HAVE_FUNCTION_TRACER
select HAVE_GCC_PLUGINS
select HAVE_HW_BREAKPOINT
- select HAVE_IDE
select HAVE_IOREMAP_PROT
select HAVE_IRQ_EXIT_ON_IRQ_STACK if X86_64
select HAVE_IRQ_TIME_ACCOUNTING
return (struct jump_label_patch){.code = code, .size = size};
}
-static inline void __jump_label_transform(struct jump_entry *entry,
- enum jump_label_type type,
- int init)
+static __always_inline void
+__jump_label_transform(struct jump_entry *entry,
+ enum jump_label_type type,
+ int init)
{
const struct jump_label_patch jlp = __jump_label_patch(entry, type);
static int kvm_hv_hypercall_complete(struct kvm_vcpu *vcpu, u64 result)
{
+ trace_kvm_hv_hypercall_done(result);
kvm_hv_hypercall_set_result(vcpu, result);
++vcpu->stat.hypercalls;
return kvm_skip_emulated_instruction(vcpu);
int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
{
+ struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
struct kvm_hv_hcall hc;
u64 ret = HV_STATUS_SUCCESS;
hc.rep_idx = (hc.param >> HV_HYPERCALL_REP_START_OFFSET) & 0xfff;
hc.rep = !!(hc.rep_cnt || hc.rep_idx);
- if (hc.fast && is_xmm_fast_hypercall(&hc))
- kvm_hv_hypercall_read_xmm(&hc);
-
trace_kvm_hv_hypercall(hc.code, hc.fast, hc.rep_cnt, hc.rep_idx,
hc.ingpa, hc.outgpa);
- if (unlikely(!hv_check_hypercall_access(to_hv_vcpu(vcpu), hc.code))) {
+ if (unlikely(!hv_check_hypercall_access(hv_vcpu, hc.code))) {
ret = HV_STATUS_ACCESS_DENIED;
goto hypercall_complete;
}
+ if (hc.fast && is_xmm_fast_hypercall(&hc)) {
+ if (unlikely(hv_vcpu->enforce_cpuid &&
+ !(hv_vcpu->cpuid_cache.features_edx &
+ HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE))) {
+ kvm_queue_exception(vcpu, UD_VECTOR);
+ return 1;
+ }
+
+ kvm_hv_hypercall_read_xmm(&hc);
+ }
+
switch (hc.code) {
case HVCALL_NOTIFY_LONG_SPIN_WAIT:
if (unlikely(hc.rep)) {
static void rtc_irq_eoi_tracking_reset(struct kvm_ioapic *ioapic)
{
ioapic->rtc_status.pending_eoi = 0;
- bitmap_zero(ioapic->rtc_status.dest_map.map, KVM_MAX_VCPU_ID);
+ bitmap_zero(ioapic->rtc_status.dest_map.map, KVM_MAX_VCPU_ID + 1);
}
static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic);
struct dest_map {
/* vcpu bitmap where IRQ has been sent */
- DECLARE_BITMAP(map, KVM_MAX_VCPU_ID);
+ DECLARE_BITMAP(map, KVM_MAX_VCPU_ID + 1);
/*
* Vector sent to a given vcpu, only valid when
* the vcpu's bit in map is set
*/
- u8 vectors[KVM_MAX_VCPU_ID];
+ u8 vectors[KVM_MAX_VCPU_ID + 1];
};
* aggregate version in order to make the slab shrinker
* faster
*/
-static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, unsigned long nr)
+static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, long nr)
{
kvm->arch.n_used_mmu_pages += nr;
percpu_counter_add(&kvm_total_used_mmu_pages, nr);
void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
- struct vmcb *vmcb = svm->vmcb;
+ struct vmcb *vmcb = svm->vmcb01.ptr;
bool activated = kvm_vcpu_apicv_active(vcpu);
if (!enable_apicv)
* Also covers avic_vapic_bar, avic_backing_page, avic_logical_id,
* avic_physical_id.
*/
- WARN_ON(svm->vmcb01.ptr->control.int_ctl & AVIC_ENABLE_MASK);
+ WARN_ON(kvm_apicv_activated(svm->vcpu.kvm));
/* Copied from vmcb01. msrpm_base can be overwritten later. */
svm->vmcb->control.nested_ctl = svm->vmcb01.ptr->control.nested_ctl;
}
/* Copy state save area fields which are handled by VMRUN */
-void svm_copy_vmrun_state(struct vmcb_save_area *from_save,
- struct vmcb_save_area *to_save)
+void svm_copy_vmrun_state(struct vmcb_save_area *to_save,
+ struct vmcb_save_area *from_save)
{
to_save->es = from_save->es;
to_save->cs = from_save->cs;
to_save->cpl = 0;
}
-void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
+void svm_copy_vmloadsave_state(struct vmcb *to_vmcb, struct vmcb *from_vmcb)
{
to_vmcb->save.fs = from_vmcb->save.fs;
to_vmcb->save.gs = from_vmcb->save.gs;
svm->nested.vmcb12_gpa = kvm_state->hdr.svm.vmcb_pa;
- svm_copy_vmrun_state(save, &svm->vmcb01.ptr->save);
+ svm_copy_vmrun_state(&svm->vmcb01.ptr->save, save);
nested_load_control_from_vmcb12(svm, ctl);
svm_switch_vmcb(svm, &svm->nested.vmcb02);
unsigned int max_sev_asid;
static unsigned int min_sev_asid;
static unsigned long sev_me_mask;
+static unsigned int nr_asids;
static unsigned long *sev_asid_bitmap;
static unsigned long *sev_reclaim_asid_bitmap;
/* Called with the sev_bitmap_lock held, or on shutdown */
static int sev_flush_asids(int min_asid, int max_asid)
{
- int ret, pos, error = 0;
+ int ret, asid, error = 0;
/* Check if there are any ASIDs to reclaim before performing a flush */
- pos = find_next_bit(sev_reclaim_asid_bitmap, max_asid, min_asid);
- if (pos >= max_asid)
+ asid = find_next_bit(sev_reclaim_asid_bitmap, nr_asids, min_asid);
+ if (asid > max_asid)
return -EBUSY;
/*
/* The flush process will flush all reclaimable SEV and SEV-ES ASIDs */
bitmap_xor(sev_asid_bitmap, sev_asid_bitmap, sev_reclaim_asid_bitmap,
- max_sev_asid);
- bitmap_zero(sev_reclaim_asid_bitmap, max_sev_asid);
+ nr_asids);
+ bitmap_zero(sev_reclaim_asid_bitmap, nr_asids);
return true;
}
static int sev_asid_new(struct kvm_sev_info *sev)
{
- int pos, min_asid, max_asid, ret;
+ int asid, min_asid, max_asid, ret;
bool retry = true;
enum misc_res_type type;
* SEV-enabled guests must use asid from min_sev_asid to max_sev_asid.
* SEV-ES-enabled guest can use from 1 to min_sev_asid - 1.
*/
- min_asid = sev->es_active ? 0 : min_sev_asid - 1;
+ min_asid = sev->es_active ? 1 : min_sev_asid;
max_asid = sev->es_active ? min_sev_asid - 1 : max_sev_asid;
again:
- pos = find_next_zero_bit(sev_asid_bitmap, max_sev_asid, min_asid);
- if (pos >= max_asid) {
+ asid = find_next_zero_bit(sev_asid_bitmap, max_asid + 1, min_asid);
+ if (asid > max_asid) {
if (retry && __sev_recycle_asids(min_asid, max_asid)) {
retry = false;
goto again;
goto e_uncharge;
}
- __set_bit(pos, sev_asid_bitmap);
+ __set_bit(asid, sev_asid_bitmap);
mutex_unlock(&sev_bitmap_lock);
- return pos + 1;
+ return asid;
e_uncharge:
misc_cg_uncharge(type, sev->misc_cg, 1);
put_misc_cg(sev->misc_cg);
static void sev_asid_free(struct kvm_sev_info *sev)
{
struct svm_cpu_data *sd;
- int cpu, pos;
+ int cpu;
enum misc_res_type type;
mutex_lock(&sev_bitmap_lock);
- pos = sev->asid - 1;
- __set_bit(pos, sev_reclaim_asid_bitmap);
+ __set_bit(sev->asid, sev_reclaim_asid_bitmap);
for_each_possible_cpu(cpu) {
sd = per_cpu(svm_data, cpu);
- sd->sev_vmcbs[pos] = NULL;
+ sd->sev_vmcbs[sev->asid] = NULL;
}
mutex_unlock(&sev_bitmap_lock);
min_sev_asid = edx;
sev_me_mask = 1UL << (ebx & 0x3f);
- /* Initialize SEV ASID bitmaps */
- sev_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL);
+ /*
+ * Initialize SEV ASID bitmaps. Allocate space for ASID 0 in the bitmap,
+ * even though it's never used, so that the bitmap is indexed by the
+ * actual ASID.
+ */
+ nr_asids = max_sev_asid + 1;
+ sev_asid_bitmap = bitmap_zalloc(nr_asids, GFP_KERNEL);
if (!sev_asid_bitmap)
goto out;
- sev_reclaim_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL);
+ sev_reclaim_asid_bitmap = bitmap_zalloc(nr_asids, GFP_KERNEL);
if (!sev_reclaim_asid_bitmap) {
bitmap_free(sev_asid_bitmap);
sev_asid_bitmap = NULL;
return;
/* No need to take sev_bitmap_lock, all VMs have been destroyed. */
- sev_flush_asids(0, max_sev_asid);
+ sev_flush_asids(1, max_sev_asid);
bitmap_free(sev_asid_bitmap);
bitmap_free(sev_reclaim_asid_bitmap);
if (!sev_enabled)
return 0;
- sd->sev_vmcbs = kcalloc(max_sev_asid + 1, sizeof(void *), GFP_KERNEL);
+ sd->sev_vmcbs = kcalloc(nr_asids, sizeof(void *), GFP_KERNEL);
if (!sd->sev_vmcbs)
return -ENOMEM;
goto error_free_vmsa_page;
}
- svm_vcpu_init_msrpm(vcpu, svm->msrpm);
-
svm->vmcb01.ptr = page_address(vmcb01_page);
svm->vmcb01.pa = __sme_set(page_to_pfn(vmcb01_page) << PAGE_SHIFT);
svm_switch_vmcb(svm, &svm->vmcb01);
init_vmcb(vcpu);
+ svm_vcpu_init_msrpm(vcpu, svm->msrpm);
+
svm_init_osvw(vcpu);
vcpu->arch.microcode_version = 0x01000065;
{
struct vmcb_control_area *control;
- /* The following fields are ignored when AVIC is enabled */
- WARN_ON(kvm_vcpu_apicv_active(&svm->vcpu));
+ /*
+ * The following fields are ignored when AVIC is enabled
+ */
+ WARN_ON(kvm_apicv_activated(svm->vcpu.kvm));
+
svm_set_intercept(svm, INTERCEPT_VINTR);
/*
ret = kvm_skip_emulated_instruction(vcpu);
if (vmload) {
- nested_svm_vmloadsave(vmcb12, svm->vmcb);
+ svm_copy_vmloadsave_state(svm->vmcb, vmcb12);
svm->sysenter_eip_hi = 0;
svm->sysenter_esp_hi = 0;
- } else
- nested_svm_vmloadsave(svm->vmcb, vmcb12);
+ } else {
+ svm_copy_vmloadsave_state(vmcb12, svm->vmcb);
+ }
kvm_vcpu_unmap(vcpu, &map, true);
BUILD_BUG_ON(offsetof(struct vmcb, save) != 0x400);
- svm_copy_vmrun_state(&svm->vmcb01.ptr->save,
- map_save.hva + 0x400);
+ svm_copy_vmrun_state(map_save.hva + 0x400,
+ &svm->vmcb01.ptr->save);
kvm_vcpu_unmap(vcpu, &map_save, true);
}
&map_save) == -EINVAL)
return 1;
- svm_copy_vmrun_state(map_save.hva + 0x400,
- &svm->vmcb01.ptr->save);
+ svm_copy_vmrun_state(&svm->vmcb01.ptr->save,
+ map_save.hva + 0x400);
kvm_vcpu_unmap(vcpu, &map_save, true);
}
void svm_free_nested(struct vcpu_svm *svm);
int svm_allocate_nested(struct vcpu_svm *svm);
int nested_svm_vmrun(struct kvm_vcpu *vcpu);
-void svm_copy_vmrun_state(struct vmcb_save_area *from_save,
- struct vmcb_save_area *to_save);
-void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb);
+void svm_copy_vmrun_state(struct vmcb_save_area *to_save,
+ struct vmcb_save_area *from_save);
+void svm_copy_vmloadsave_state(struct vmcb *to_vmcb, struct vmcb *from_vmcb);
int nested_svm_vmexit(struct vcpu_svm *svm);
static inline int nested_svm_simple_vmexit(struct vcpu_svm *svm, u32 exit_code)
* as we mark it dirty unconditionally towards end of vcpu
* init phase.
*/
- if (vmcb && vmcb_is_clean(vmcb, VMCB_HV_NESTED_ENLIGHTENMENTS) &&
+ if (vmcb_is_clean(vmcb, VMCB_HV_NESTED_ENLIGHTENMENTS) &&
hve->hv_enlightenments_control.msr_bitmap)
vmcb_mark_dirty(vmcb, VMCB_HV_NESTED_ENLIGHTENMENTS);
}
__entry->outgpa)
);
+TRACE_EVENT(kvm_hv_hypercall_done,
+ TP_PROTO(u64 result),
+ TP_ARGS(result),
+
+ TP_STRUCT__entry(
+ __field(__u64, result)
+ ),
+
+ TP_fast_assign(
+ __entry->result = result;
+ ),
+
+ TP_printk("result 0x%llx", __entry->result)
+);
+
/*
* Tracepoint for Xen hypercall.
*/
return 1;
break;
case MSR_KVM_ASYNC_PF_ACK:
- if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF))
+ if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_INT))
return 1;
if (data & 0x1) {
vcpu->arch.apf.pageready_pending = false;
msr_info->data = vcpu->arch.apf.msr_int_val;
break;
case MSR_KVM_ASYNC_PF_ACK:
- if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF))
+ if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_INT))
return 1;
msr_info->data = 0;
static int kvm_vcpu_ready_for_interrupt_injection(struct kvm_vcpu *vcpu)
{
- return kvm_arch_interrupt_allowed(vcpu) &&
- kvm_cpu_accept_dm_intr(vcpu);
+ /*
+ * Do not cause an interrupt window exit if an exception
+ * is pending or an event needs reinjection; userspace
+ * might want to inject the interrupt manually using KVM_SET_REGS
+ * or KVM_SET_SREGS. For that to work, we must be at an
+ * instruction boundary and with no events half-injected.
+ */
+ return (kvm_arch_interrupt_allowed(vcpu) &&
+ kvm_cpu_accept_dm_intr(vcpu) &&
+ !kvm_event_needs_reinjection(vcpu) &&
+ !vcpu->arch.exception.pending);
}
static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
}
break;
+ /* speculation barrier */
+ case BPF_ST | BPF_NOSPEC:
+ if (boot_cpu_has(X86_FEATURE_XMM2))
+ /* Emit 'lfence' */
+ EMIT3(0x0F, 0xAE, 0xE8);
+ break;
+
/* ST: *(u8*)(dst_reg + off) = imm */
case BPF_ST | BPF_MEM | BPF_B:
if (is_ereg(dst_reg))
i++;
break;
}
+ /* speculation barrier */
+ case BPF_ST | BPF_NOSPEC:
+ if (boot_cpu_has(X86_FEATURE_XMM2))
+ /* Emit 'lfence' */
+ EMIT3(0x0F, 0xAE, 0xE8);
+ break;
/* ST: *(u8*)(dst_reg + off) = imm */
case BPF_ST | BPF_MEM | BPF_H:
case BPF_ST | BPF_MEM | BPF_B:
config XTENSA_PLATFORM_XT2000
bool "XT2000"
- select HAVE_IDE
help
XT2000 is the name of Tensilica's feature-rich emulation platform.
This hardware is capable of running a full Linux distribution.
return -1;
iocg_commit_bio(ctx->iocg, wait->bio, wait->abs_cost, cost);
+ wait->committed = true;
/*
* autoremove_wake_function() removes the wait entry only when it
- * actually changed the task state. We want the wait always
- * removed. Remove explicitly and use default_wake_function().
+ * actually changed the task state. We want the wait always removed.
+ * Remove explicitly and use default_wake_function(). Note that the
+ * order of operations is important as finish_wait() tests whether
+ * @wq_entry is removed without grabbing the lock.
*/
- list_del_init(&wq_entry->entry);
- wait->committed = true;
-
default_wake_function(wq_entry, mode, flags, key);
+ list_del_init_careful(&wq_entry->entry);
return 0;
}
percpu_ref_put(&q->q_usage_counter);
}
-static void blk_mq_sched_free_tags(struct blk_mq_tag_set *set,
- struct blk_mq_hw_ctx *hctx,
- unsigned int hctx_idx)
-{
- if (hctx->sched_tags) {
- blk_mq_free_rqs(set, hctx->sched_tags, hctx_idx);
- blk_mq_free_rq_map(hctx->sched_tags, set->flags);
- hctx->sched_tags = NULL;
- }
-}
-
static int blk_mq_sched_alloc_tags(struct request_queue *q,
struct blk_mq_hw_ctx *hctx,
unsigned int hctx_idx)
return -ENOMEM;
ret = blk_mq_alloc_rqs(set, hctx->sched_tags, hctx_idx, q->nr_requests);
- if (ret)
- blk_mq_sched_free_tags(set, hctx, hctx_idx);
+ if (ret) {
+ blk_mq_free_rq_map(hctx->sched_tags, set->flags);
+ hctx->sched_tags = NULL;
+ }
return ret;
}
disk_release_events(disk);
kfree(disk->random);
xa_destroy(&disk->part_tbl);
- bdput(disk->part0);
if (test_bit(GD_QUEUE_REF, &disk->state) && disk->queue)
blk_put_queue(disk->queue);
- kfree(disk);
+ bdput(disk->part0); /* frees the disk */
}
struct class block_class = {
.name = "block",
config ACPI_TABLE_OVERRIDE_VIA_BUILTIN_INITRD
bool "Override ACPI tables from built-in initrd"
depends on ACPI_TABLE_UPGRADE
- depends on INITRAMFS_SOURCE!="" && INITRAMFS_COMPRESSION=""
+ depends on INITRAMFS_SOURCE!="" && INITRAMFS_COMPRESSION_NONE
help
This option provides functionality to override arbitrary ACPI tables
from built-in uncompressed initrd.
#include <linux/module.h>
#include <linux/platform_device.h>
+struct pch_fivr_resp {
+ u64 status;
+ u64 result;
+};
+
+static int pch_fivr_read(acpi_handle handle, char *method, struct pch_fivr_resp *fivr_resp)
+{
+ struct acpi_buffer resp = { sizeof(struct pch_fivr_resp), fivr_resp};
+ struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+ struct acpi_buffer format = { sizeof("NN"), "NN" };
+ union acpi_object *obj;
+ acpi_status status;
+ int ret = -EFAULT;
+
+ status = acpi_evaluate_object(handle, method, NULL, &buffer);
+ if (ACPI_FAILURE(status))
+ return ret;
+
+ obj = buffer.pointer;
+ if (!obj || obj->type != ACPI_TYPE_PACKAGE)
+ goto release_buffer;
+
+ status = acpi_extract_package(obj, &format, &resp);
+ if (ACPI_FAILURE(status))
+ goto release_buffer;
+
+ if (fivr_resp->status)
+ goto release_buffer;
+
+ ret = 0;
+
+release_buffer:
+ kfree(buffer.pointer);
+ return ret;
+}
+
/*
* Presentation of attributes which are defined for INT1045
* They are:
char *buf)\
{\
struct acpi_device *acpi_dev = dev_get_drvdata(dev);\
- unsigned long long val;\
- acpi_status status;\
+ struct pch_fivr_resp fivr_resp;\
+ int status;\
\
- status = acpi_evaluate_integer(acpi_dev->handle, #method,\
- NULL, &val);\
- if (ACPI_SUCCESS(status))\
- return sprintf(buf, "%d\n", (int)val);\
- else\
- return -EINVAL;\
+ status = pch_fivr_read(acpi_dev->handle, #method, &fivr_resp);\
+ if (status)\
+ return status;\
+\
+ return sprintf(buf, "%llu\n", fivr_resp.result);\
}
#define PCH_FIVR_STORE(name, method) \
}
}
-static bool irq_is_legacy(struct acpi_resource_irq *irq)
-{
- return irq->triggering == ACPI_EDGE_SENSITIVE &&
- irq->polarity == ACPI_ACTIVE_HIGH &&
- irq->shareable == ACPI_EXCLUSIVE;
-}
-
/**
* acpi_dev_resource_interrupt - Extract ACPI interrupt resource information.
* @ares: Input ACPI resource object.
}
acpi_dev_get_irqresource(res, irq->interrupts[index],
irq->triggering, irq->polarity,
- irq->shareable, irq_is_legacy(irq));
+ irq->shareable, true);
break;
case ACPI_RESOURCE_TYPE_EXTENDED_IRQ:
ext_irq = &ares->data.extended_irq;
* Return the next match of ACPI device if another matching device was present
* at the moment of invocation, or NULL otherwise.
*
- * FIXME: The function does not tolerate the sudden disappearance of @adev, e.g.
- * in the case of a hotplug event. That said, the caller should ensure that
- * this will never happen.
- *
* The caller is responsible for invoking acpi_dev_put() on the returned device.
+ * On the other hand the function invokes acpi_dev_put() on the given @adev
+ * assuming that its reference counter had been increased beforehand.
*
* See additional information in acpi_dev_present() as well.
*/
match.hrv = hrv;
dev = bus_find_device(&acpi_bus_type, start, &match, acpi_dev_match_cb);
+ acpi_dev_put(adev);
return dev ? to_acpi_device(dev) : NULL;
}
EXPORT_SYMBOL(acpi_dev_get_next_match_dev);
* AMDI0006:
* - should use rev_id 0x0
* - function mask = 0x3: Should use Microsoft method
+ * AMDI0007:
+ * - Should use rev_id 0x2
+ * - Should only use AMD method
*/
const char *hid = acpi_device_hid(adev);
- rev_id = 0;
+ rev_id = strcmp(hid, "AMDI0007") ? 0 : 2;
lps0_dsm_func_mask = validate_dsm(adev->handle,
ACPI_LPS0_DSM_UUID_AMD, rev_id, &lps0_dsm_guid);
lps0_dsm_func_mask_microsoft = validate_dsm(adev->handle,
- ACPI_LPS0_DSM_UUID_MICROSOFT, rev_id,
+ ACPI_LPS0_DSM_UUID_MICROSOFT, 0,
&lps0_dsm_guid_microsoft);
if (lps0_dsm_func_mask > 0x3 && (!strcmp(hid, "AMD0004") ||
!strcmp(hid, "AMDI0005"))) {
lps0_dsm_func_mask = (lps0_dsm_func_mask << 1) | 0x1;
acpi_handle_debug(adev->handle, "_DSM UUID %s: Adjusted function mask: 0x%x\n",
ACPI_LPS0_DSM_UUID_AMD, lps0_dsm_func_mask);
+ } else if (lps0_dsm_func_mask_microsoft > 0 && !strcmp(hid, "AMDI0007")) {
+ lps0_dsm_func_mask_microsoft = -EINVAL;
+ acpi_handle_debug(adev->handle, "_DSM Using AMD method\n");
}
} else {
rev_id = 1;
}
EXPORT_SYMBOL_GPL(ata_sff_data_xfer32);
+static void ata_pio_xfer(struct ata_queued_cmd *qc, struct page *page,
+ unsigned int offset, size_t xfer_size)
+{
+ bool do_write = (qc->tf.flags & ATA_TFLAG_WRITE);
+ unsigned char *buf;
+
+ buf = kmap_atomic(page);
+ qc->ap->ops->sff_data_xfer(qc, buf + offset, xfer_size, do_write);
+ kunmap_atomic(buf);
+
+ if (!do_write && !PageSlab(page))
+ flush_dcache_page(page);
+}
+
/**
* ata_pio_sector - Transfer a sector of data.
* @qc: Command on going
*/
static void ata_pio_sector(struct ata_queued_cmd *qc)
{
- int do_write = (qc->tf.flags & ATA_TFLAG_WRITE);
struct ata_port *ap = qc->ap;
struct page *page;
unsigned int offset;
- unsigned char *buf;
if (!qc->cursg) {
qc->curbytes = qc->nbytes;
DPRINTK("data %s\n", qc->tf.flags & ATA_TFLAG_WRITE ? "write" : "read");
- /* do the actual data transfer */
- buf = kmap_atomic(page);
- ap->ops->sff_data_xfer(qc, buf + offset, qc->sect_size, do_write);
- kunmap_atomic(buf);
+ /*
+ * Split the transfer when it splits a page boundary. Note that the
+ * split still has to be dword aligned like all ATA data transfers.
+ */
+ WARN_ON_ONCE(offset % 4);
+ if (offset + qc->sect_size > PAGE_SIZE) {
+ unsigned int split_len = PAGE_SIZE - offset;
- if (!do_write && !PageSlab(page))
- flush_dcache_page(page);
+ ata_pio_xfer(qc, page, offset, split_len);
+ ata_pio_xfer(qc, nth_page(page, 1), 0,
+ qc->sect_size - split_len);
+ } else {
+ ata_pio_xfer(qc, page, offset, qc->sect_size);
+ }
qc->curbytes += qc->sect_size;
qc->cursg_ofs += qc->sect_size;
// Part of the job is done by atm_pcr_goal which gives us a PCR
// specification which says: EITHER grab the maximum available PCR
- // (and perhaps a lower bound which we musn't pass), OR grab this
+ // (and perhaps a lower bound which we must not pass), OR grab this
// amount, rounding down if you have to (and perhaps a lower bound
- // which we musn't pass) OR grab this amount, rounding up if you
- // have to (and perhaps an upper bound which we musn't pass). If any
+ // which we must not pass) OR grab this amount, rounding up if you
+ // have to (and perhaps an upper bound which we must not pass). If any
// bounds ARE passed we fail. Note that rounding is only rounding to
// match device limitations, we do not round down to satisfy
// bandwidth availability even if this would not violate any given
int __auxiliary_driver_register(struct auxiliary_driver *auxdrv,
struct module *owner, const char *modname)
{
+ int ret;
+
if (WARN_ON(!auxdrv->probe) || WARN_ON(!auxdrv->id_table))
return -EINVAL;
auxdrv->driver.bus = &auxiliary_bus_type;
auxdrv->driver.mod_name = modname;
- return driver_register(&auxdrv->driver);
+ ret = driver_register(&auxdrv->driver);
+ if (ret)
+ kfree(auxdrv->driver.name);
+
+ return ret;
}
EXPORT_SYMBOL_GPL(__auxiliary_driver_register);
return;
}
- snprintf(buf, len, "supplier:%s:%s", dev_bus_name(sup), dev_name(sup));
- sysfs_remove_link(&con->kobj, buf);
+ if (device_is_registered(con)) {
+ snprintf(buf, len, "supplier:%s:%s", dev_bus_name(sup), dev_name(sup));
+ sysfs_remove_link(&con->kobj, buf);
+ }
snprintf(buf, len, "consumer:%s:%s", dev_bus_name(con), dev_name(con));
sysfs_remove_link(&sup->kobj, buf);
kfree(buf);
static DEFINE_IDR(loop_index_idr);
static DEFINE_MUTEX(loop_ctl_mutex);
+static DEFINE_MUTEX(loop_validate_mutex);
+
+/**
+ * loop_global_lock_killable() - take locks for safe loop_validate_file() test
+ *
+ * @lo: struct loop_device
+ * @global: true if @lo is about to bind another "struct loop_device", false otherwise
+ *
+ * Returns 0 on success, -EINTR otherwise.
+ *
+ * Since loop_validate_file() traverses on other "struct loop_device" if
+ * is_loop_device() is true, we need a global lock for serializing concurrent
+ * loop_configure()/loop_change_fd()/__loop_clr_fd() calls.
+ */
+static int loop_global_lock_killable(struct loop_device *lo, bool global)
+{
+ int err;
+
+ if (global) {
+ err = mutex_lock_killable(&loop_validate_mutex);
+ if (err)
+ return err;
+ }
+ err = mutex_lock_killable(&lo->lo_mutex);
+ if (err && global)
+ mutex_unlock(&loop_validate_mutex);
+ return err;
+}
+
+/**
+ * loop_global_unlock() - release locks taken by loop_global_lock_killable()
+ *
+ * @lo: struct loop_device
+ * @global: true if @lo was about to bind another "struct loop_device", false otherwise
+ */
+static void loop_global_unlock(struct loop_device *lo, bool global)
+{
+ mutex_unlock(&lo->lo_mutex);
+ if (global)
+ mutex_unlock(&loop_validate_mutex);
+}
static int max_part;
static int part_shift;
while (is_loop_device(f)) {
struct loop_device *l;
+ lockdep_assert_held(&loop_validate_mutex);
if (f->f_mapping->host->i_rdev == bdev->bd_dev)
return -EBADF;
l = I_BDEV(f->f_mapping->host)->bd_disk->private_data;
- if (l->lo_state != Lo_bound) {
+ if (l->lo_state != Lo_bound)
return -EINVAL;
- }
+ /* Order wrt setting lo->lo_backing_file in loop_configure(). */
+ rmb();
f = l->lo_backing_file;
}
if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))
static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
unsigned int arg)
{
- struct file *file = NULL, *old_file;
- int error;
- bool partscan;
+ struct file *file = fget(arg);
+ struct file *old_file;
+ int error;
+ bool partscan;
+ bool is_loop;
- error = mutex_lock_killable(&lo->lo_mutex);
+ if (!file)
+ return -EBADF;
+ is_loop = is_loop_device(file);
+ error = loop_global_lock_killable(lo, is_loop);
if (error)
- return error;
+ goto out_putf;
error = -ENXIO;
if (lo->lo_state != Lo_bound)
goto out_err;
if (!(lo->lo_flags & LO_FLAGS_READ_ONLY))
goto out_err;
- error = -EBADF;
- file = fget(arg);
- if (!file)
- goto out_err;
-
error = loop_validate_file(file, bdev);
if (error)
goto out_err;
loop_update_dio(lo);
blk_mq_unfreeze_queue(lo->lo_queue);
partscan = lo->lo_flags & LO_FLAGS_PARTSCAN;
- mutex_unlock(&lo->lo_mutex);
+ loop_global_unlock(lo, is_loop);
+
+ /*
+ * Flush loop_validate_file() before fput(), for l->lo_backing_file
+ * might be pointing at old_file which might be the last reference.
+ */
+ if (!is_loop) {
+ mutex_lock(&loop_validate_mutex);
+ mutex_unlock(&loop_validate_mutex);
+ }
/*
* We must drop file reference outside of lo_mutex as dropping
* the file ref can take open_mutex which creates circular locking
return 0;
out_err:
- mutex_unlock(&lo->lo_mutex);
- if (file)
- fput(file);
+ loop_global_unlock(lo, is_loop);
+out_putf:
+ fput(file);
return error;
}
struct block_device *bdev,
const struct loop_config *config)
{
- struct file *file;
- struct inode *inode;
+ struct file *file = fget(config->fd);
+ struct inode *inode;
struct address_space *mapping;
- int error;
- loff_t size;
- bool partscan;
- unsigned short bsize;
+ int error;
+ loff_t size;
+ bool partscan;
+ unsigned short bsize;
+ bool is_loop;
+
+ if (!file)
+ return -EBADF;
+ is_loop = is_loop_device(file);
/* This is safe, since we have a reference from open(). */
__module_get(THIS_MODULE);
- error = -EBADF;
- file = fget(config->fd);
- if (!file)
- goto out;
-
/*
* If we don't hold exclusive handle for the device, upgrade to it
* here to avoid changing device under exclusive owner.
goto out_putf;
}
- error = mutex_lock_killable(&lo->lo_mutex);
+ error = loop_global_lock_killable(lo, is_loop);
if (error)
goto out_bdev;
size = get_loop_size(lo, file);
loop_set_size(lo, size);
+ /* Order wrt reading lo_state in loop_validate_file(). */
+ wmb();
+
lo->lo_state = Lo_bound;
if (part_shift)
lo->lo_flags |= LO_FLAGS_PARTSCAN;
* put /dev/loopXX inode. Later in __loop_clr_fd() we bdput(bdev).
*/
bdgrab(bdev);
- mutex_unlock(&lo->lo_mutex);
+ loop_global_unlock(lo, is_loop);
if (partscan)
loop_reread_partitions(lo);
if (!(mode & FMODE_EXCL))
return 0;
out_unlock:
- mutex_unlock(&lo->lo_mutex);
+ loop_global_unlock(lo, is_loop);
out_bdev:
if (!(mode & FMODE_EXCL))
bd_abort_claiming(bdev, loop_configure);
out_putf:
fput(file);
-out:
/* This is safe: open() is still holding a reference. */
module_put(THIS_MODULE);
return error;
int lo_number;
struct loop_worker *pos, *worker;
+ /*
+ * Flush loop_configure() and loop_change_fd(). It is acceptable for
+ * loop_validate_file() to succeed, for actual clear operation has not
+ * started yet.
+ */
+ mutex_lock(&loop_validate_mutex);
+ mutex_unlock(&loop_validate_mutex);
+ /*
+ * loop_validate_file() now fails because l->lo_state != Lo_bound
+ * became visible.
+ */
+
mutex_lock(&lo->lo_mutex);
if (WARN_ON_ONCE(lo->lo_state != Lo_rundown)) {
err = -ENXIO;
static bool rbd_quiesce_lock(struct rbd_device *rbd_dev)
{
- bool need_wait;
-
dout("%s rbd_dev %p\n", __func__, rbd_dev);
lockdep_assert_held_write(&rbd_dev->lock_rwsem);
*/
rbd_dev->lock_state = RBD_LOCK_STATE_RELEASING;
rbd_assert(!completion_done(&rbd_dev->releasing_wait));
- need_wait = !list_empty(&rbd_dev->running_list);
- downgrade_write(&rbd_dev->lock_rwsem);
- if (need_wait)
- wait_for_completion(&rbd_dev->releasing_wait);
- up_read(&rbd_dev->lock_rwsem);
+ if (list_empty(&rbd_dev->running_list))
+ return true;
+
+ up_write(&rbd_dev->lock_rwsem);
+ wait_for_completion(&rbd_dev->releasing_wait);
down_write(&rbd_dev->lock_rwsem);
if (rbd_dev->lock_state != RBD_LOCK_STATE_RELEASING)
if (!rbd_cid_equal(&cid, &rbd_empty_cid)) {
down_write(&rbd_dev->lock_rwsem);
if (rbd_cid_equal(&cid, &rbd_dev->owner_cid)) {
- /*
- * we already know that the remote client is
- * the owner
- */
- up_write(&rbd_dev->lock_rwsem);
- return;
+ dout("%s rbd_dev %p cid %llu-%llu == owner_cid\n",
+ __func__, rbd_dev, cid.gid, cid.handle);
+ } else {
+ rbd_set_owner_cid(rbd_dev, &cid);
}
-
- rbd_set_owner_cid(rbd_dev, &cid);
downgrade_write(&rbd_dev->lock_rwsem);
} else {
down_read(&rbd_dev->lock_rwsem);
if (!rbd_cid_equal(&cid, &rbd_empty_cid)) {
down_write(&rbd_dev->lock_rwsem);
if (!rbd_cid_equal(&cid, &rbd_dev->owner_cid)) {
- dout("%s rbd_dev %p unexpected owner, cid %llu-%llu != owner_cid %llu-%llu\n",
+ dout("%s rbd_dev %p cid %llu-%llu != owner_cid %llu-%llu\n",
__func__, rbd_dev, cid.gid, cid.handle,
rbd_dev->owner_cid.gid, rbd_dev->owner_cid.handle);
- up_write(&rbd_dev->lock_rwsem);
- return;
+ } else {
+ rbd_set_owner_cid(rbd_dev, &rbd_empty_cid);
}
-
- rbd_set_owner_cid(rbd_dev, &rbd_empty_cid);
downgrade_write(&rbd_dev->lock_rwsem);
} else {
down_read(&rbd_dev->lock_rwsem);
disk->minors = RBD_MINORS_PER_MAJOR;
}
disk->fops = &rbd_bd_ops;
+ disk->private_data = rbd_dev;
blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
/* QUEUE_FLAG_ADD_RANDOM is off by default for blk-mq */
}
EXPORT_SYMBOL_GPL(fsl_mc_device_remove);
-struct fsl_mc_device *fsl_mc_get_endpoint(struct fsl_mc_device *mc_dev)
+struct fsl_mc_device *fsl_mc_get_endpoint(struct fsl_mc_device *mc_dev,
+ u16 if_id)
{
struct fsl_mc_device *mc_bus_dev, *endpoint;
struct fsl_mc_obj_desc endpoint_desc = {{ 0 }};
mc_bus_dev = to_fsl_mc_device(mc_dev->dev.parent);
strcpy(endpoint1.type, mc_dev->obj_desc.type);
endpoint1.id = mc_dev->obj_desc.id;
+ endpoint1.if_id = if_id;
err = dprc_get_connection(mc_bus_dev->mc_io, 0,
mc_bus_dev->mc_handle,
struct image_info *img_info);
void mhi_fw_load_handler(struct mhi_controller *mhi_cntrl);
int mhi_prepare_channel(struct mhi_controller *mhi_cntrl,
- struct mhi_chan *mhi_chan);
+ struct mhi_chan *mhi_chan, unsigned int flags);
int mhi_init_chan_ctxt(struct mhi_controller *mhi_cntrl,
struct mhi_chan *mhi_chan);
void mhi_deinit_chan_ctxt(struct mhi_controller *mhi_cntrl,
cmd_pkt = mhi_to_virtual(mhi_ring, ptr);
chan = MHI_TRE_GET_CMD_CHID(cmd_pkt);
- mhi_chan = &mhi_cntrl->mhi_chan[chan];
- write_lock_bh(&mhi_chan->lock);
- mhi_chan->ccs = MHI_TRE_GET_EV_CODE(tre);
- complete(&mhi_chan->completion);
- write_unlock_bh(&mhi_chan->lock);
+
+ if (chan < mhi_cntrl->max_chan &&
+ mhi_cntrl->mhi_chan[chan].configured) {
+ mhi_chan = &mhi_cntrl->mhi_chan[chan];
+ write_lock_bh(&mhi_chan->lock);
+ mhi_chan->ccs = MHI_TRE_GET_EV_CODE(tre);
+ complete(&mhi_chan->completion);
+ write_unlock_bh(&mhi_chan->lock);
+ } else {
+ dev_err(&mhi_cntrl->mhi_dev->dev,
+ "Completion packet for invalid channel ID: %d\n", chan);
+ }
mhi_del_ring_element(mhi_cntrl, mhi_ring);
}
}
int mhi_prepare_channel(struct mhi_controller *mhi_cntrl,
- struct mhi_chan *mhi_chan)
+ struct mhi_chan *mhi_chan, unsigned int flags)
{
int ret = 0;
struct device *dev = &mhi_chan->mhi_dev->dev;
if (ret)
goto error_pm_state;
+ if (mhi_chan->dir == DMA_FROM_DEVICE)
+ mhi_chan->pre_alloc = !!(flags & MHI_CH_INBOUND_ALLOC_BUFS);
+
/* Pre-allocate buffer for xfer ring */
if (mhi_chan->pre_alloc) {
int nr_el = get_nr_avail_ring_elements(mhi_cntrl,
}
/* Move channel to start state */
-int mhi_prepare_for_transfer(struct mhi_device *mhi_dev)
+int mhi_prepare_for_transfer(struct mhi_device *mhi_dev, unsigned int flags)
{
int ret, dir;
struct mhi_controller *mhi_cntrl = mhi_dev->mhi_cntrl;
if (!mhi_chan)
continue;
- ret = mhi_prepare_channel(mhi_cntrl, mhi_chan);
+ ret = mhi_prepare_channel(mhi_cntrl, mhi_chan, flags);
if (ret)
goto error_open_chan;
}
* @bar_num: PCI base address register to use for MHI MMIO register space
* @dma_data_width: DMA transfer word size (32 or 64 bits)
* @mru_default: default MRU size for MBIM network packets
+ * @sideband_wake: Devices using dedicated sideband GPIO for wakeup instead
+ * of inband wake support (such as sdx24)
*/
struct mhi_pci_dev_info {
const struct mhi_controller_config *config;
unsigned int bar_num;
unsigned int dma_data_width;
unsigned int mru_default;
+ bool sideband_wake;
};
#define MHI_CHANNEL_CONFIG_UL(ch_num, ch_name, el_count, ev_ring) \
.doorbell_mode_switch = false, \
}
+#define MHI_CHANNEL_CONFIG_DL_AUTOQUEUE(ch_num, ch_name, el_count, ev_ring) \
+ { \
+ .num = ch_num, \
+ .name = ch_name, \
+ .num_elements = el_count, \
+ .event_ring = ev_ring, \
+ .dir = DMA_FROM_DEVICE, \
+ .ee_mask = BIT(MHI_EE_AMSS), \
+ .pollcfg = 0, \
+ .doorbell = MHI_DB_BRST_DISABLE, \
+ .lpm_notify = false, \
+ .offload_channel = false, \
+ .doorbell_mode_switch = false, \
+ .auto_queue = true, \
+ }
+
#define MHI_EVENT_CONFIG_CTRL(ev_ring, el_count) \
{ \
.num_elements = el_count, \
MHI_CHANNEL_CONFIG_UL(14, "QMI", 4, 0),
MHI_CHANNEL_CONFIG_DL(15, "QMI", 4, 0),
MHI_CHANNEL_CONFIG_UL(20, "IPCR", 8, 0),
- MHI_CHANNEL_CONFIG_DL(21, "IPCR", 8, 0),
+ MHI_CHANNEL_CONFIG_DL_AUTOQUEUE(21, "IPCR", 8, 0),
MHI_CHANNEL_CONFIG_UL_FP(34, "FIREHOSE", 32, 0),
MHI_CHANNEL_CONFIG_DL_FP(35, "FIREHOSE", 32, 0),
MHI_CHANNEL_CONFIG_HW_UL(100, "IP_HW0", 128, 2),
.edl = "qcom/sdx65m/edl.mbn",
.config = &modem_qcom_v1_mhiv_config,
.bar_num = MHI_PCI_DEFAULT_BAR_NUM,
- .dma_data_width = 32
+ .dma_data_width = 32,
+ .sideband_wake = false,
};
static const struct mhi_pci_dev_info mhi_qcom_sdx55_info = {
.config = &modem_qcom_v1_mhiv_config,
.bar_num = MHI_PCI_DEFAULT_BAR_NUM,
.dma_data_width = 32,
- .mru_default = 32768
+ .mru_default = 32768,
+ .sideband_wake = false,
};
static const struct mhi_pci_dev_info mhi_qcom_sdx24_info = {
.edl = "qcom/prog_firehose_sdx24.mbn",
.config = &modem_qcom_v1_mhiv_config,
.bar_num = MHI_PCI_DEFAULT_BAR_NUM,
- .dma_data_width = 32
+ .dma_data_width = 32,
+ .sideband_wake = true,
};
static const struct mhi_channel_config mhi_quectel_em1xx_channels[] = {
.edl = "qcom/prog_firehose_sdx24.mbn",
.config = &modem_quectel_em1xx_config,
.bar_num = MHI_PCI_DEFAULT_BAR_NUM,
- .dma_data_width = 32
+ .dma_data_width = 32,
+ .sideband_wake = true,
};
static const struct mhi_channel_config mhi_foxconn_sdx55_channels[] = {
.edl = "qcom/sdx55m/edl.mbn",
.config = &modem_foxconn_sdx55_config,
.bar_num = MHI_PCI_DEFAULT_BAR_NUM,
- .dma_data_width = 32
+ .dma_data_width = 32,
+ .sideband_wake = false,
};
static const struct pci_device_id mhi_pci_id_table[] = {
mhi_cntrl->status_cb = mhi_pci_status_cb;
mhi_cntrl->runtime_get = mhi_pci_runtime_get;
mhi_cntrl->runtime_put = mhi_pci_runtime_put;
- mhi_cntrl->wake_get = mhi_pci_wake_get_nop;
- mhi_cntrl->wake_put = mhi_pci_wake_put_nop;
- mhi_cntrl->wake_toggle = mhi_pci_wake_toggle_nop;
mhi_cntrl->mru = info->mru_default;
+ if (info->sideband_wake) {
+ mhi_cntrl->wake_get = mhi_pci_wake_get_nop;
+ mhi_cntrl->wake_put = mhi_pci_wake_put_nop;
+ mhi_cntrl->wake_toggle = mhi_pci_wake_toggle_nop;
+ }
+
err = mhi_pci_claim(mhi_cntrl, info->bar_num, DMA_BIT_MASK(info->dma_data_width));
if (err)
return err;
}
EXPORT_SYMBOL_GPL(devm_clk_bulk_get_optional);
+static void devm_clk_bulk_release_all(struct device *dev, void *res)
+{
+ struct clk_bulk_devres *devres = res;
+
+ clk_bulk_put_all(devres->num_clks, devres->clks);
+}
+
int __must_check devm_clk_bulk_get_all(struct device *dev,
struct clk_bulk_data **clks)
{
struct clk_bulk_devres *devres;
int ret;
- devres = devres_alloc(devm_clk_bulk_release,
+ devres = devres_alloc(devm_clk_bulk_release_all,
sizeof(*devres), GFP_KERNEL);
if (!devres)
return -ENOMEM;
struct stm32f4_pll_post_div_data {
int idx;
- u8 pll_num;
+ int pll_idx;
const char *name;
const char *parent;
u8 flag;
#define MAX_POST_DIV 3
static const struct stm32f4_pll_post_div_data post_div_data[MAX_POST_DIV] = {
- { CLK_I2SQ_PDIV, PLL_I2S, "plli2s-q-div", "plli2s-q",
+ { CLK_I2SQ_PDIV, PLL_VCO_I2S, "plli2s-q-div", "plli2s-q",
CLK_SET_RATE_PARENT, STM32F4_RCC_DCKCFGR, 0, 5, 0, NULL},
- { CLK_SAIQ_PDIV, PLL_SAI, "pllsai-q-div", "pllsai-q",
+ { CLK_SAIQ_PDIV, PLL_VCO_SAI, "pllsai-q-div", "pllsai-q",
CLK_SET_RATE_PARENT, STM32F4_RCC_DCKCFGR, 8, 5, 0, NULL },
- { NO_IDX, PLL_SAI, "pllsai-r-div", "pllsai-r", CLK_SET_RATE_PARENT,
+ { NO_IDX, PLL_VCO_SAI, "pllsai-r-div", "pllsai-r", CLK_SET_RATE_PARENT,
STM32F4_RCC_DCKCFGR, 16, 2, 0, post_divr_table },
};
post_div->width,
post_div->flag_div,
post_div->div_table,
- clks[post_div->pll_num],
+ clks[post_div->pll_idx],
&stm32f4_clk_lock);
if (post_div->idx != NO_IDX)
config COMMON_CLK_HI3559A
bool "Hi3559A Clock Driver"
depends on ARCH_HISI || COMPILE_TEST
+ select RESET_HISI
default ARCH_HISI
help
Build the clock driver for hi3559a.
static struct clk_smd_rpm *msm8936_clks[] = {
[RPM_SMD_PCNOC_CLK] = &msm8916_pcnoc_clk,
- [RPM_SMD_PCNOC_A_CLK] = &msm8916_pcnoc_clk,
+ [RPM_SMD_PCNOC_A_CLK] = &msm8916_pcnoc_a_clk,
[RPM_SMD_SNOC_CLK] = &msm8916_snoc_clk,
[RPM_SMD_SNOC_A_CLK] = &msm8916_snoc_a_clk,
[RPM_SMD_BIMC_CLK] = &msm8916_bimc_clk,
gate_ops->disable(gate_hw);
}
+static void clk_sdmmc_mux_disable_unused(struct clk_hw *hw)
+{
+ struct tegra_sdmmc_mux *sdmmc_mux = to_clk_sdmmc_mux(hw);
+ const struct clk_ops *gate_ops = sdmmc_mux->gate_ops;
+ struct clk_hw *gate_hw = &sdmmc_mux->gate.hw;
+
+ gate_ops->disable_unused(gate_hw);
+}
+
static void clk_sdmmc_mux_restore_context(struct clk_hw *hw)
{
struct clk_hw *parent = clk_hw_get_parent(hw);
.is_enabled = clk_sdmmc_mux_is_enabled,
.enable = clk_sdmmc_mux_enable,
.disable = clk_sdmmc_mux_disable,
+ .disable_unused = clk_sdmmc_mux_disable_unused,
.restore_context = clk_sdmmc_mux_restore_context,
};
break;
if (!adev->pnp.unique_id && node->acpi.uid == 0)
break;
- acpi_dev_put(adev);
}
if (!adev)
return -ENODEV;
static int efi_mem_reserve_iomem(phys_addr_t addr, u64 size)
{
struct resource *res, *parent;
+ int ret;
res = kzalloc(sizeof(struct resource), GFP_ATOMIC);
if (!res)
/* we expect a conflict with a 'System RAM' region */
parent = request_resource_conflict(&iomem_resource, res);
- return parent ? request_resource(parent, res) : 0;
+ ret = parent ? request_resource(parent, res) : 0;
+
+ /*
+ * Given that efi_mem_reserve_iomem() can be called at any
+ * time, only call memblock_reserve() if the architecture
+ * keeps the infrastructure around.
+ */
+ if (IS_ENABLED(CONFIG_ARCH_KEEP_MEMBLOCK) && !ret)
+ memblock_reserve(addr, size);
+
+ return ret;
}
int __ref efi_mem_reserve_persistent(phys_addr_t addr, u64 size)
* @image: EFI loaded image protocol
* @load_addr: pointer to loaded initrd
* @load_size: size of loaded initrd
- * @soft_limit: preferred size of allocated memory for loading the initrd
- * @hard_limit: minimum size of allocated memory
+ * @soft_limit: preferred address for loading the initrd
+ * @hard_limit: upper limit address for loading the initrd
*
* Return: status code
*/
pr_err("EFI MOKvar config table is not valid\n");
return;
}
- efi_mem_reserve(efi.mokvar_table, map_size_needed);
+
+ if (md.type == EFI_BOOT_SERVICES_DATA)
+ efi_mem_reserve(efi.mokvar_table, map_size_needed);
+
efi_mokvar_table_size = map_size_needed;
}
tbl_size = sizeof(*log_tbl) + log_tbl->size;
memblock_reserve(efi.tpm_log, tbl_size);
- if (efi.tpm_final_log == EFI_INVALID_TABLE_ADDR ||
- log_tbl->version != EFI_TCG2_EVENT_LOG_FORMAT_TCG_2) {
- pr_warn(FW_BUG "TPM Final Events table missing or invalid\n");
+ if (efi.tpm_final_log == EFI_INVALID_TABLE_ADDR) {
+ pr_info("TPM Final Events table not present\n");
+ goto out;
+ } else if (log_tbl->version != EFI_TCG2_EVENT_LOG_FORMAT_TCG_2) {
+ pr_warn(FW_BUG "TPM Final Events table invalid\n");
goto out;
}
ret = devm_request_irq(&pdev->dev, mpc8xxx_gc->irqn,
mpc8xxx_gpio_irq_cascade,
- IRQF_SHARED, "gpio-cascade",
+ IRQF_NO_THREAD | IRQF_SHARED, "gpio-cascade",
mpc8xxx_gc);
if (ret) {
dev_err(&pdev->dev,
struct resource *res;
int ret, irq;
- irq = platform_get_irq(pdev, 0);
- if (irq < 0)
+ irq = platform_get_irq_optional(pdev, 0);
+ if (irq < 0 && irq != -ENXIO)
return irq;
res = platform_get_resource(pdev, IORESOURCE_IO, 0);
pm_runtime_enable(&pdev->dev);
- if (irq) {
+ if (irq > 0) {
struct irq_chip *irq_chip = &gpio->irq_chip;
u8 irq_status;
u32 max_level;
};
+#define codec_info_build(type, width, height, level) \
+ .codec_type = type,\
+ .max_width = width,\
+ .max_height = height,\
+ .max_pixels_per_frame = height * width,\
+ .max_level = level,
+
struct amdgpu_video_codecs {
const u32 codec_count;
const struct amdgpu_video_codec_info *codec_array;
#include <linux/slab.h>
#include <linux/power_supply.h>
#include <linux/pm_runtime.h>
+#include <linux/suspend.h>
#include <acpi/video.h>
#include <acpi/actbl.h>
#if defined(CONFIG_AMD_PMC) || defined(CONFIG_AMD_PMC_MODULE)
if (acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0) {
if (adev->flags & AMD_IS_APU)
- return true;
+ return pm_suspend_target_state == PM_SUSPEND_TO_IDLE;
}
#endif
return false;
r = amdgpu_device_get_job_timeout_settings(adev);
if (r) {
dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
- goto failed_unmap;
+ return r;
}
/* early init functions */
r = amdgpu_device_ip_early_init(adev);
if (r)
- goto failed_unmap;
+ return r;
/* doorbell bar mapping and doorbell index init*/
amdgpu_device_doorbell_init(adev);
failed:
amdgpu_vf_error_trans_all(adev);
-failed_unmap:
- iounmap(adev->rmmio);
- adev->rmmio = NULL;
-
return r;
}
/* Van Gogh */
{0x1002, 0x163F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VANGOGH|AMD_IS_APU},
+ /* Yellow Carp */
+ {0x1002, 0x164D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_YELLOW_CARP|AMD_IS_APU},
+ {0x1002, 0x1681, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_YELLOW_CARP|AMD_IS_APU},
+
/* Navy_Flounder */
{0x1002, 0x73C0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER},
{0x1002, 0x73C1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER},
if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)
return -EPERM;
+ /* Workaround for Thunk bug creating PROT_NONE,MAP_PRIVATE mappings
+ * for debugger access to invisible VRAM. Should have used MAP_SHARED
+ * instead. Clearing VM_MAYWRITE prevents the mapping from ever
+ * becoming writable and makes is_cow_mapping(vm_flags) false.
+ */
+ if (is_cow_mapping(vma->vm_flags) &&
+ !(vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)))
+ vma->vm_flags &= ~VM_MAYWRITE;
+
return drm_gem_ttm_mmap(obj, vma);
}
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER7_SELECT, 0xf0f001ff, 0x00000000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER8_SELECT, 0xf0f001ff, 0x00000000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER9_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSX_DEBUG_1, 0x00010000, 0x00010020),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffbfffff, 0x00a00000)
};
SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0xffffffbf, 0x00000020),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1_Vangogh, 0xffffffff, 0x00070103),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQG_CONFIG, 0x000017ff, 0x00001000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSX_DEBUG_1, 0x00010000, 0x00010020),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffffffff, 0x00400000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER7_SELECT, 0xf0f001ff, 0x00000000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER8_SELECT, 0xf0f001ff, 0x00000000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER9_SELECT, 0xf0f001ff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSX_DEBUG_1, 0x00010000, 0x00010020),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x01030000, 0x01030000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x03a00000, 0x00a00000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmLDS_CONFIG, 0x00000020, 0x00000020)
#include "smuio_v11_0.h"
#include "smuio_v11_0_6.h"
-#define codec_info_build(type, width, height, level) \
- .codec_type = type,\
- .max_width = width,\
- .max_height = height,\
- .max_pixels_per_frame = height * width,\
- .max_level = level,
-
static const struct amd_ip_funcs nv_common_ip_funcs;
/* Navi */
static const struct amdgpu_video_codec_info nv_video_codecs_encode_array[] =
{
- {
- .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC,
- .max_width = 4096,
- .max_height = 2304,
- .max_pixels_per_frame = 4096 * 2304,
- .max_level = 0,
- },
- {
- .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC,
- .max_width = 4096,
- .max_height = 2304,
- .max_pixels_per_frame = 4096 * 2304,
- .max_level = 0,
- },
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 0)},
};
static const struct amdgpu_video_codecs nv_video_codecs_encode =
/* Navi1x */
static const struct amdgpu_video_codec_info nv_video_codecs_decode_array[] =
{
- {
- .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2,
- .max_width = 4096,
- .max_height = 4096,
- .max_pixels_per_frame = 4096 * 4096,
- .max_level = 3,
- },
- {
- .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4,
- .max_width = 4096,
- .max_height = 4096,
- .max_pixels_per_frame = 4096 * 4096,
- .max_level = 5,
- },
- {
- .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC,
- .max_width = 4096,
- .max_height = 4096,
- .max_pixels_per_frame = 4096 * 4096,
- .max_level = 52,
- },
- {
- .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1,
- .max_width = 4096,
- .max_height = 4096,
- .max_pixels_per_frame = 4096 * 4096,
- .max_level = 4,
- },
- {
- .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC,
- .max_width = 8192,
- .max_height = 4352,
- .max_pixels_per_frame = 8192 * 4352,
- .max_level = 186,
- },
- {
- .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG,
- .max_width = 4096,
- .max_height = 4096,
- .max_pixels_per_frame = 4096 * 4096,
- .max_level = 0,
- },
- {
- .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9,
- .max_width = 8192,
- .max_height = 4352,
- .max_pixels_per_frame = 8192 * 4352,
- .max_level = 0,
- },
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
};
static const struct amdgpu_video_codecs nv_video_codecs_decode =
/* Sienna Cichlid */
static const struct amdgpu_video_codec_info sc_video_codecs_decode_array[] =
{
- {
- .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2,
- .max_width = 4096,
- .max_height = 4096,
- .max_pixels_per_frame = 4096 * 4096,
- .max_level = 3,
- },
- {
- .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4,
- .max_width = 4096,
- .max_height = 4096,
- .max_pixels_per_frame = 4096 * 4096,
- .max_level = 5,
- },
- {
- .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC,
- .max_width = 4096,
- .max_height = 4096,
- .max_pixels_per_frame = 4096 * 4096,
- .max_level = 52,
- },
- {
- .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1,
- .max_width = 4096,
- .max_height = 4096,
- .max_pixels_per_frame = 4096 * 4096,
- .max_level = 4,
- },
- {
- .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC,
- .max_width = 8192,
- .max_height = 4352,
- .max_pixels_per_frame = 8192 * 4352,
- .max_level = 186,
- },
- {
- .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG,
- .max_width = 4096,
- .max_height = 4096,
- .max_pixels_per_frame = 4096 * 4096,
- .max_level = 0,
- },
- {
- .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9,
- .max_width = 8192,
- .max_height = 4352,
- .max_pixels_per_frame = 8192 * 4352,
- .max_level = 0,
- },
- {
- .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1,
- .max_width = 8192,
- .max_height = 4352,
- .max_pixels_per_frame = 8192 * 4352,
- .max_level = 0,
- },
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
};
static const struct amdgpu_video_codecs sc_video_codecs_decode =
/* SRIOV Sienna Cichlid, not const since data is controlled by host */
static struct amdgpu_video_codec_info sriov_sc_video_codecs_encode_array[] =
{
- {
- .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC,
- .max_width = 4096,
- .max_height = 2304,
- .max_pixels_per_frame = 4096 * 2304,
- .max_level = 0,
- },
- {
- .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC,
- .max_width = 4096,
- .max_height = 2304,
- .max_pixels_per_frame = 4096 * 2304,
- .max_level = 0,
- },
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 0)},
};
static struct amdgpu_video_codec_info sriov_sc_video_codecs_decode_array[] =
{
- {
- .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2,
- .max_width = 4096,
- .max_height = 4096,
- .max_pixels_per_frame = 4096 * 4096,
- .max_level = 3,
- },
- {
- .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4,
- .max_width = 4096,
- .max_height = 4096,
- .max_pixels_per_frame = 4096 * 4096,
- .max_level = 5,
- },
- {
- .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC,
- .max_width = 4096,
- .max_height = 4096,
- .max_pixels_per_frame = 4096 * 4096,
- .max_level = 52,
- },
- {
- .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1,
- .max_width = 4096,
- .max_height = 4096,
- .max_pixels_per_frame = 4096 * 4096,
- .max_level = 4,
- },
- {
- .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC,
- .max_width = 8192,
- .max_height = 4352,
- .max_pixels_per_frame = 8192 * 4352,
- .max_level = 186,
- },
- {
- .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG,
- .max_width = 4096,
- .max_height = 4096,
- .max_pixels_per_frame = 4096 * 4096,
- .max_level = 0,
- },
- {
- .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9,
- .max_width = 8192,
- .max_height = 4352,
- .max_pixels_per_frame = 8192 * 4352,
- .max_level = 0,
- },
- {
- .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1,
- .max_width = 8192,
- .max_height = 4352,
- .max_pixels_per_frame = 8192 * 4352,
- .max_level = 0,
- },
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
};
static struct amdgpu_video_codecs sriov_sc_video_codecs_encode =
.codec_array = NULL,
};
+/* Yellow Carp*/
+static const struct amdgpu_video_codec_info yc_video_codecs_decode_array[] = {
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
+};
+
+static const struct amdgpu_video_codecs yc_video_codecs_decode = {
+ .codec_count = ARRAY_SIZE(yc_video_codecs_decode_array),
+ .codec_array = yc_video_codecs_decode_array,
+};
+
static int nv_query_video_codecs(struct amdgpu_device *adev, bool encode,
const struct amdgpu_video_codecs **codecs)
{
case CHIP_NAVY_FLOUNDER:
case CHIP_DIMGREY_CAVEFISH:
case CHIP_VANGOGH:
- case CHIP_YELLOW_CARP:
if (encode)
*codecs = &nv_video_codecs_encode;
else
*codecs = &sc_video_codecs_decode;
return 0;
+ case CHIP_YELLOW_CARP:
+ if (encode)
+ *codecs = &nv_video_codecs_encode;
+ else
+ *codecs = &yc_video_codecs_decode;
+ return 0;
case CHIP_BEIGE_GOBY:
if (encode)
*codecs = &bg_video_codecs_encode;
AMD_PG_SUPPORT_VCN |
AMD_PG_SUPPORT_VCN_DPG |
AMD_PG_SUPPORT_JPEG;
- adev->external_rev_id = adev->rev_id + 0x01;
+ if (adev->pdev->device == 0x1681)
+ adev->external_rev_id = adev->rev_id + 0x19;
+ else
+ adev->external_rev_id = adev->rev_id + 0x01;
break;
default:
/* FIXME: not supported yet */
err = psp_init_asd_microcode(psp, chip_name);
if (err)
- goto out;
+ return err;
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", chip_name);
err = request_firmware(&adev->psp.ta_fw, fw_name, adev->dev);
} else {
err = amdgpu_ucode_validate(adev->psp.ta_fw);
if (err)
- goto out2;
+ goto out;
ta_hdr = (const struct ta_firmware_header_v1_0 *)
adev->psp.ta_fw->data;
return 0;
-out2:
+out:
release_firmware(adev->psp.ta_fw);
adev->psp.ta_fw = NULL;
-out:
if (err) {
dev_err(adev->dev,
"psp v12.0: Failed to load firmware \"%s\"\n",
/* Vega, Raven, Arcturus */
static const struct amdgpu_video_codec_info vega_video_codecs_encode_array[] =
{
- {
- .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC,
- .max_width = 4096,
- .max_height = 2304,
- .max_pixels_per_frame = 4096 * 2304,
- .max_level = 0,
- },
- {
- .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC,
- .max_width = 4096,
- .max_height = 2304,
- .max_pixels_per_frame = 4096 * 2304,
- .max_level = 0,
- },
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 0)},
};
static const struct amdgpu_video_codecs vega_video_codecs_encode =
/* Vega */
static const struct amdgpu_video_codec_info vega_video_codecs_decode_array[] =
{
- {
- .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2,
- .max_width = 4096,
- .max_height = 4096,
- .max_pixels_per_frame = 4096 * 4096,
- .max_level = 3,
- },
- {
- .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4,
- .max_width = 4096,
- .max_height = 4096,
- .max_pixels_per_frame = 4096 * 4096,
- .max_level = 5,
- },
- {
- .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC,
- .max_width = 4096,
- .max_height = 4096,
- .max_pixels_per_frame = 4096 * 4096,
- .max_level = 52,
- },
- {
- .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1,
- .max_width = 4096,
- .max_height = 4096,
- .max_pixels_per_frame = 4096 * 4096,
- .max_level = 4,
- },
- {
- .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC,
- .max_width = 4096,
- .max_height = 4096,
- .max_pixels_per_frame = 4096 * 4096,
- .max_level = 186,
- },
- {
- .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG,
- .max_width = 4096,
- .max_height = 4096,
- .max_pixels_per_frame = 4096 * 4096,
- .max_level = 0,
- },
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 4096, 186)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
};
static const struct amdgpu_video_codecs vega_video_codecs_decode =
/* Raven */
static const struct amdgpu_video_codec_info rv_video_codecs_decode_array[] =
{
- {
- .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2,
- .max_width = 4096,
- .max_height = 4096,
- .max_pixels_per_frame = 4096 * 4096,
- .max_level = 3,
- },
- {
- .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4,
- .max_width = 4096,
- .max_height = 4096,
- .max_pixels_per_frame = 4096 * 4096,
- .max_level = 5,
- },
- {
- .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC,
- .max_width = 4096,
- .max_height = 4096,
- .max_pixels_per_frame = 4096 * 4096,
- .max_level = 52,
- },
- {
- .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1,
- .max_width = 4096,
- .max_height = 4096,
- .max_pixels_per_frame = 4096 * 4096,
- .max_level = 4,
- },
- {
- .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC,
- .max_width = 4096,
- .max_height = 4096,
- .max_pixels_per_frame = 4096 * 4096,
- .max_level = 186,
- },
- {
- .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG,
- .max_width = 4096,
- .max_height = 4096,
- .max_pixels_per_frame = 4096 * 4096,
- .max_level = 0,
- },
- {
- .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9,
- .max_width = 4096,
- .max_height = 4096,
- .max_pixels_per_frame = 4096 * 4096,
- .max_level = 0,
- },
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 4096, 186)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 4096, 4096, 0)},
};
static const struct amdgpu_video_codecs rv_video_codecs_decode =
/* Renoir, Arcturus */
static const struct amdgpu_video_codec_info rn_video_codecs_decode_array[] =
{
- {
- .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2,
- .max_width = 4096,
- .max_height = 4096,
- .max_pixels_per_frame = 4096 * 4096,
- .max_level = 3,
- },
- {
- .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4,
- .max_width = 4096,
- .max_height = 4096,
- .max_pixels_per_frame = 4096 * 4096,
- .max_level = 5,
- },
- {
- .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC,
- .max_width = 4096,
- .max_height = 4096,
- .max_pixels_per_frame = 4096 * 4096,
- .max_level = 52,
- },
- {
- .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1,
- .max_width = 4096,
- .max_height = 4096,
- .max_pixels_per_frame = 4096 * 4096,
- .max_level = 4,
- },
- {
- .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC,
- .max_width = 8192,
- .max_height = 4352,
- .max_pixels_per_frame = 4096 * 4096,
- .max_level = 186,
- },
- {
- .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG,
- .max_width = 4096,
- .max_height = 4096,
- .max_pixels_per_frame = 4096 * 4096,
- .max_level = 0,
- },
- {
- .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9,
- .max_width = 8192,
- .max_height = 4352,
- .max_pixels_per_frame = 4096 * 4096,
- .max_level = 0,
- },
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
};
static const struct amdgpu_video_codecs rn_video_codecs_decode =
max_cll = conn_base->hdr_sink_metadata.hdmi_type1.max_cll;
min_cll = conn_base->hdr_sink_metadata.hdmi_type1.min_cll;
- if (caps->ext_caps->bits.oled == 1 ||
+ if (caps->ext_caps->bits.oled == 1 /*||
caps->ext_caps->bits.sdr_aux_backlight_control == 1 ||
- caps->ext_caps->bits.hdr_aux_backlight_control == 1)
+ caps->ext_caps->bits.hdr_aux_backlight_control == 1*/)
caps->aux_support = true;
if (amdgpu_backlight == 0)
REG_UPDATE(DENTIST_DISPCLK_CNTL,
DENTIST_DISPCLK_WDIVIDER, dispclk_wdivider);
-// REG_WAIT(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_CHG_DONE, 1, 5, 100);
+ REG_WAIT(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_CHG_DONE, 1, 50, 1000);
REG_UPDATE(DENTIST_DISPCLK_CNTL,
DENTIST_DPPCLK_WDIVIDER, dppclk_wdivider);
REG_WAIT(DENTIST_DISPCLK_CNTL, DENTIST_DPPCLK_CHG_DONE, 1, 5, 100);
&clk_mgr_base->bw_params->clk_table.entries[0].dtbclk_mhz,
&num_levels);
+ /* SOCCLK */
+ dcn3_init_single_clock(clk_mgr, PPCLK_SOCCLK,
+ &clk_mgr_base->bw_params->clk_table.entries[0].socclk_mhz,
+ &num_levels);
// DPREFCLK ???
/* DISPCLK */
#include "dc_dmub_srv.h"
+#include "yellow_carp_offset.h"
+
+#define regCLK1_CLK_PLL_REQ 0x0237
+#define regCLK1_CLK_PLL_REQ_BASE_IDX 0
+
+#define CLK1_CLK_PLL_REQ__FbMult_int__SHIFT 0x0
+#define CLK1_CLK_PLL_REQ__PllSpineDiv__SHIFT 0xc
+#define CLK1_CLK_PLL_REQ__FbMult_frac__SHIFT 0x10
+#define CLK1_CLK_PLL_REQ__FbMult_int_MASK 0x000001FFL
+#define CLK1_CLK_PLL_REQ__PllSpineDiv_MASK 0x0000F000L
+#define CLK1_CLK_PLL_REQ__FbMult_frac_MASK 0xFFFF0000L
+
+#define REG(reg_name) \
+ (CLK_BASE.instance[0].segment[reg ## reg_name ## _BASE_IDX] + reg ## reg_name)
+
#define TO_CLK_MGR_DCN31(clk_mgr)\
container_of(clk_mgr, struct clk_mgr_dcn31, base)
* also if safe to lower is false, we just go in the higher state
*/
if (safe_to_lower) {
- if (new_clocks->z9_support == DCN_Z9_SUPPORT_ALLOW &&
- new_clocks->z9_support != clk_mgr_base->clks.z9_support) {
+ if (new_clocks->zstate_support == DCN_ZSTATE_SUPPORT_ALLOW &&
+ new_clocks->zstate_support != clk_mgr_base->clks.zstate_support) {
dcn31_smu_set_Z9_support(clk_mgr, true);
- clk_mgr_base->clks.z9_support = new_clocks->z9_support;
+ clk_mgr_base->clks.zstate_support = new_clocks->zstate_support;
}
if (clk_mgr_base->clks.dtbclk_en && !new_clocks->dtbclk_en) {
}
}
} else {
- if (new_clocks->z9_support == DCN_Z9_SUPPORT_DISALLOW &&
- new_clocks->z9_support != clk_mgr_base->clks.z9_support) {
+ if (new_clocks->zstate_support == DCN_ZSTATE_SUPPORT_DISALLOW &&
+ new_clocks->zstate_support != clk_mgr_base->clks.zstate_support) {
dcn31_smu_set_Z9_support(clk_mgr, false);
- clk_mgr_base->clks.z9_support = new_clocks->z9_support;
+ clk_mgr_base->clks.zstate_support = new_clocks->zstate_support;
}
if (!clk_mgr_base->clks.dtbclk_en && new_clocks->dtbclk_en) {
static int get_vco_frequency_from_reg(struct clk_mgr_internal *clk_mgr)
{
- return 0;
+ /* get FbMult value */
+ struct fixed31_32 pll_req;
+ unsigned int fbmult_frac_val = 0;
+ unsigned int fbmult_int_val = 0;
+
+ /*
+ * Register value of fbmult is in 8.16 format, we are converting to 31.32
+ * to leverage the fix point operations available in driver
+ */
+
+ REG_GET(CLK1_CLK_PLL_REQ, FbMult_frac, &fbmult_frac_val); /* 16 bit fractional part*/
+ REG_GET(CLK1_CLK_PLL_REQ, FbMult_int, &fbmult_int_val); /* 8 bit integer part */
+
+ pll_req = dc_fixpt_from_int(fbmult_int_val);
+
+ /*
+ * since fractional part is only 16 bit in register definition but is 32 bit
+ * in our fix point definiton, need to shift left by 16 to obtain correct value
+ */
+ pll_req.value |= fbmult_frac_val << 16;
+
+ /* multiply by REFCLK period */
+ pll_req = dc_fixpt_mul_int(pll_req, clk_mgr->dfs_ref_freq_khz);
+
+ /* integer part is now VCO frequency in kHz */
+ return dc_fixpt_floor(pll_req);
}
static void dcn31_enable_pme_wa(struct clk_mgr *clk_mgr_base)
clk_mgr->clks.p_state_change_support = true;
clk_mgr->clks.prev_p_state_change_support = true;
clk_mgr->clks.pwr_state = DCN_PWR_STATE_UNKNOWN;
- clk_mgr->clks.z9_support = DCN_Z9_SUPPORT_UNKNOWN;
+ clk_mgr->clks.zstate_support = DCN_ZSTATE_SUPPORT_UNKNOWN;
}
static bool dcn31_are_clock_states_equal(struct dc_clocks *a,
return false;
else if (a->dcfclk_deep_sleep_khz != b->dcfclk_deep_sleep_khz)
return false;
- else if (a->z9_support != b->z9_support)
+ else if (a->zstate_support != b->zstate_support)
return false;
else if (a->dtbclk_en != b->dtbclk_en)
return false;
clk_mgr->base.dprefclk_ss_percentage = 0;
clk_mgr->base.dprefclk_ss_divider = 1000;
clk_mgr->base.ss_on_dprefclk = false;
+ clk_mgr->base.dfs_ref_freq_khz = 48000;
clk_mgr->smu_wm_set.wm_set = (struct dcn31_watermarks *)dm_helpers_allocate_gpu_mem(
clk_mgr->base.base.ctx,
#define __DCN31_CLK_MGR_H__
#include "clk_mgr_internal.h"
-//CLK1_CLK_PLL_REQ
-#ifndef CLK11_CLK1_CLK_PLL_REQ__FbMult_int__SHIFT
-#define CLK11_CLK1_CLK_PLL_REQ__FbMult_int__SHIFT 0x0
-#define CLK11_CLK1_CLK_PLL_REQ__PllSpineDiv__SHIFT 0xc
-#define CLK11_CLK1_CLK_PLL_REQ__FbMult_frac__SHIFT 0x10
-#define CLK11_CLK1_CLK_PLL_REQ__FbMult_int_MASK 0x000001FFL
-#define CLK11_CLK1_CLK_PLL_REQ__PllSpineDiv_MASK 0x0000F000L
-#define CLK11_CLK1_CLK_PLL_REQ__FbMult_frac_MASK 0xFFFF0000L
-//CLK1_CLK0_DFS_CNTL
-#define CLK11_CLK1_CLK0_DFS_CNTL__CLK0_DIVIDER__SHIFT 0x0
-#define CLK11_CLK1_CLK0_DFS_CNTL__CLK0_DIVIDER_MASK 0x0000007FL
-/*DPREF clock related*/
-#define CLK0_CLK3_DFS_CNTL__CLK3_DIVIDER__SHIFT 0x0
-#define CLK0_CLK3_DFS_CNTL__CLK3_DIVIDER_MASK 0x0000007FL
-#define CLK1_CLK3_DFS_CNTL__CLK3_DIVIDER__SHIFT 0x0
-#define CLK1_CLK3_DFS_CNTL__CLK3_DIVIDER_MASK 0x0000007FL
-#define CLK2_CLK3_DFS_CNTL__CLK3_DIVIDER__SHIFT 0x0
-#define CLK2_CLK3_DFS_CNTL__CLK3_DIVIDER_MASK 0x0000007FL
-#define CLK3_CLK3_DFS_CNTL__CLK3_DIVIDER__SHIFT 0x0
-#define CLK3_CLK3_DFS_CNTL__CLK3_DIVIDER_MASK 0x0000007FL
-
-//CLK3_0_CLK3_CLK_PLL_REQ
-#define CLK3_0_CLK3_CLK_PLL_REQ__FbMult_int__SHIFT 0x0
-#define CLK3_0_CLK3_CLK_PLL_REQ__PllSpineDiv__SHIFT 0xc
-#define CLK3_0_CLK3_CLK_PLL_REQ__FbMult_frac__SHIFT 0x10
-#define CLK3_0_CLK3_CLK_PLL_REQ__FbMult_int_MASK 0x000001FFL
-#define CLK3_0_CLK3_CLK_PLL_REQ__PllSpineDiv_MASK 0x0000F000L
-#define CLK3_0_CLK3_CLK_PLL_REQ__FbMult_frac_MASK 0xFFFF0000L
-
-#define mmCLK0_CLK3_DFS_CNTL 0x16C60
-#define mmCLK00_CLK0_CLK3_DFS_CNTL 0x16C60
-#define mmCLK01_CLK0_CLK3_DFS_CNTL 0x16E60
-#define mmCLK02_CLK0_CLK3_DFS_CNTL 0x17060
-#define mmCLK03_CLK0_CLK3_DFS_CNTL 0x17260
-
-#define mmCLK0_CLK_PLL_REQ 0x16C10
-#define mmCLK00_CLK0_CLK_PLL_REQ 0x16C10
-#define mmCLK01_CLK0_CLK_PLL_REQ 0x16E10
-#define mmCLK02_CLK0_CLK_PLL_REQ 0x17010
-#define mmCLK03_CLK0_CLK_PLL_REQ 0x17210
-
-#define mmCLK1_CLK_PLL_REQ 0x1B00D
-#define mmCLK10_CLK1_CLK_PLL_REQ 0x1B00D
-#define mmCLK11_CLK1_CLK_PLL_REQ 0x1B20D
-#define mmCLK12_CLK1_CLK_PLL_REQ 0x1B40D
-#define mmCLK13_CLK1_CLK_PLL_REQ 0x1B60D
-
-#define mmCLK2_CLK_PLL_REQ 0x17E0D
-
-/*AMCLK*/
-#define mmCLK11_CLK1_CLK0_DFS_CNTL 0x1B23F
-#define mmCLK11_CLK1_CLK_PLL_REQ 0x1B20D
-#endif
-
struct dcn31_watermarks;
struct dcn31_smu_watermark_set {
*/
panel_mode = DP_PANEL_MODE_DEFAULT;
}
- } else
- panel_mode = DP_PANEL_MODE_DEFAULT;
+ }
}
#endif
}
}
- if (link->dpcd_caps.panel_mode_edp) {
+ if (link->dpcd_caps.panel_mode_edp &&
+ (link->connector_signal == SIGNAL_TYPE_EDP ||
+ (link->connector_signal == SIGNAL_TYPE_DISPLAY_PORT &&
+ link->is_internal_display))) {
return DP_PANEL_MODE_EDP;
}
{
uint32_t default_backlight;
- if (link &&
- (link->dpcd_sink_ext_caps.bits.hdr_aux_backlight_control == 1 ||
- link->dpcd_sink_ext_caps.bits.sdr_aux_backlight_control == 1)) {
+ if (link && link->dpcd_sink_ext_caps.bits.oled == 1) {
if (!dc_link_read_default_bl_aux(link, &default_backlight))
default_backlight = 150000;
// if < 5 nits or > 5000, it might be wrong readback
* so use only 30 bpp on DCE_VERSION_11_0. Testing with DCE 11.2 and 8.3
* did not show such problems, so this seems to be the exception.
*/
- if (plane_state->ctx->dce_version != DCE_VERSION_11_0)
+ if (plane_state->ctx->dce_version > DCE_VERSION_11_0)
pipe_ctx->plane_res.scl_data.lb_params.depth = LB_PIXEL_DEPTH_36BPP;
else
pipe_ctx->plane_res.scl_data.lb_params.depth = LB_PIXEL_DEPTH_30BPP;
};
#if defined(CONFIG_DRM_AMD_DC_DCN)
-enum dcn_z9_support_state {
- DCN_Z9_SUPPORT_UNKNOWN,
- DCN_Z9_SUPPORT_ALLOW,
- DCN_Z9_SUPPORT_DISALLOW,
+enum dcn_zstate_support_state {
+ DCN_ZSTATE_SUPPORT_UNKNOWN,
+ DCN_ZSTATE_SUPPORT_ALLOW,
+ DCN_ZSTATE_SUPPORT_DISALLOW,
};
#endif
/*
int dramclk_khz;
bool p_state_change_support;
#if defined(CONFIG_DRM_AMD_DC_DCN)
- enum dcn_z9_support_state z9_support;
+ enum dcn_zstate_support_state zstate_support;
bool dtbclk_en;
#endif
enum dcn_pwr_state pwr_state;
uint32_t ODM_MEM_PWR_CTRL3;
uint32_t DMU_MEM_PWR_CNTL;
uint32_t MMHUBBUB_MEM_PWR_CNTL;
+ uint32_t DCHUBBUB_ARB_HOSTVM_CNTL;
};
/* set field name */
#define HWS_SF(blk_name, reg_name, field_name, post_fix)\
type DOMAIN_POWER_FORCEON;\
type DOMAIN_POWER_GATE;\
type DOMAIN_PGFSM_PWR_STATUS;\
- type HPO_HDMISTREAMCLK_G_GATE_DIS;
+ type HPO_HDMISTREAMCLK_G_GATE_DIS;\
+ type DISABLE_HOSTVM_FORCE_ALLOW_PSTATE;
struct dce_hwseq_shift {
HWSEQ_REG_FIELD_LIST(uint8_t)
const struct line_buffer_params *lb_params,
enum lb_memory_config mem_size_config)
{
+ uint32_t max_partitions = 63; /* Currently hardcoded on all ASICs before DCN 3.2 */
+
/* LB */
if (dpp->base.caps->dscl_data_proc_format == DSCL_DATA_PRCESSING_FIXED_FORMAT) {
/* DSCL caps: pixel data processed in fixed format */
LB_DATA_FORMAT__ALPHA_EN, lb_params->alpha_en); /* Alpha enable */
}
+ if (dpp->base.caps->max_lb_partitions == 31)
+ max_partitions = 31;
+
REG_SET_2(LB_MEMORY_CTRL, 0,
MEMORY_CONFIG, mem_size_config,
- LB_MAX_PARTITIONS, 63);
+ LB_MAX_PARTITIONS, max_partitions);
}
static const uint16_t *dpp1_dscl_get_filter_coeffs_64p(int taps, struct fixed31_32 ratio)
- timing->v_border_bottom;
pipes[pipe_cnt].pipe.dest.htotal = timing->h_total;
pipes[pipe_cnt].pipe.dest.vtotal = v_total;
- pipes[pipe_cnt].pipe.dest.hactive = timing->h_addressable;
- pipes[pipe_cnt].pipe.dest.vactive = timing->v_addressable;
+ pipes[pipe_cnt].pipe.dest.hactive =
+ timing->h_addressable + timing->h_border_left + timing->h_border_right;
+ pipes[pipe_cnt].pipe.dest.vactive =
+ timing->v_addressable + timing->v_border_top + timing->v_border_bottom;
pipes[pipe_cnt].pipe.dest.interlaced = timing->flags.INTERLACE;
pipes[pipe_cnt].pipe.dest.pixel_rate_mhz = timing->pix_clk_100hz/10000.0;
if (timing->timing_3d_format == TIMING_3D_FORMAT_HW_FRAME_PACKING)
return false;
}
+static enum dcn_zstate_support_state decide_zstate_support(struct dc *dc, struct dc_state *context)
+{
+ int plane_count;
+ int i;
+
+ plane_count = 0;
+ for (i = 0; i < dc->res_pool->pipe_count; i++) {
+ if (context->res_ctx.pipe_ctx[i].plane_state)
+ plane_count++;
+ }
+
+ /*
+ * Zstate is allowed in following scenarios:
+ * 1. Single eDP with PSR enabled
+ * 2. 0 planes (No memory requests)
+ * 3. Single eDP without PSR but > 5ms stutter period
+ */
+ if (plane_count == 0)
+ return DCN_ZSTATE_SUPPORT_ALLOW;
+ else if (context->stream_count == 1 && context->streams[0]->signal == SIGNAL_TYPE_EDP) {
+ struct dc_link *link = context->streams[0]->sink->link;
+
+ if ((link->link_index == 0 && link->psr_settings.psr_feature_enabled)
+ || context->bw_ctx.dml.vba.StutterPeriod > 5000.0)
+ return DCN_ZSTATE_SUPPORT_ALLOW;
+ else
+ return DCN_ZSTATE_SUPPORT_DISALLOW;
+ } else
+ return DCN_ZSTATE_SUPPORT_DISALLOW;
+}
+
void dcn20_calculate_dlg_params(
struct dc *dc, struct dc_state *context,
display_e2e_pipe_params_st *pipes,
int vlevel)
{
int i, pipe_idx;
- int plane_count;
/* Writeback MCIF_WB arbitration parameters */
dc->res_pool->funcs->set_mcif_arb_params(dc, context, pipes, pipe_cnt);
!= dm_dram_clock_change_unsupported;
context->bw_ctx.bw.dcn.clk.dppclk_khz = 0;
- context->bw_ctx.bw.dcn.clk.z9_support = (context->bw_ctx.dml.vba.StutterPeriod > 5000.0) ?
- DCN_Z9_SUPPORT_ALLOW : DCN_Z9_SUPPORT_DISALLOW;
-
- plane_count = 0;
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- if (context->res_ctx.pipe_ctx[i].plane_state)
- plane_count++;
- }
-
- if (plane_count == 0)
- context->bw_ctx.bw.dcn.clk.z9_support = DCN_Z9_SUPPORT_ALLOW;
+ context->bw_ctx.bw.dcn.clk.zstate_support = decide_zstate_support(dc, context);
context->bw_ctx.bw.dcn.clk.dtbclk_en = is_dtbclk_required(dc, context);
.max_page_table_levels = 4,
.pte_chunk_size_kbytes = 2,
.meta_chunk_size_kbytes = 2,
+ .min_meta_chunk_size_bytes = 256,
.writeback_chunk_size_kbytes = 2,
.line_buffer_size_bits = 789504,
.is_line_buffer_bpp_fixed = 0,
int min_taps_y, min_taps_c;
enum lb_memory_config lb_config;
- /* Some ASICs does not support FP16 scaling, so we reject modes require this*/
- if (scl_data->viewport.width != scl_data->h_active &&
- scl_data->viewport.height != scl_data->v_active &&
- dpp->caps->dscl_data_proc_format == DSCL_DATA_PRCESSING_FIXED_FORMAT &&
- scl_data->format == PIXEL_FORMAT_FP16)
- return false;
-
if (scl_data->viewport.width > scl_data->h_active &&
dpp->ctx->dc->debug.max_downscale_src_width != 0 &&
scl_data->viewport.width > dpp->ctx->dc->debug.max_downscale_src_width)
dpp->tf_shift = tf_shift;
dpp->tf_mask = tf_mask;
- dpp->lb_pixel_depth_supported =
- LB_PIXEL_DEPTH_18BPP |
- LB_PIXEL_DEPTH_24BPP |
- LB_PIXEL_DEPTH_30BPP |
- LB_PIXEL_DEPTH_36BPP;
-
- dpp->lb_bits_per_entry = LB_BITS_PER_ENTRY;
- dpp->lb_memory_size = LB_TOTAL_NUMBER_OF_ENTRIES; /*0x1404*/
-
return true;
}
SRI(COLOR_KEYER_BLUE, CNVC_CFG, id), \
SRI(CURSOR_CONTROL, CURSOR0_, id),\
SRI(OBUF_MEM_PWR_CTRL, DSCL, id),\
+ SRI(DSCL_MEM_PWR_STATUS, DSCL, id), \
SRI(DSCL_MEM_PWR_CTRL, DSCL, id)
#define DPP_REG_LIST_DCN30(id)\
SRI(CM_SHAPER_LUT_DATA, CM, id),\
SRI(CM_MEM_PWR_CTRL2, CM, id), \
SRI(CM_MEM_PWR_STATUS2, CM, id), \
- SRI(DSCL_MEM_PWR_STATUS, DSCL, id), \
- SRI(DSCL_MEM_PWR_CTRL, DSCL, id), \
SRI(CM_BLNDGAM_RAMA_START_SLOPE_CNTL_B, CM, id),\
SRI(CM_BLNDGAM_RAMA_START_SLOPE_CNTL_G, CM, id),\
SRI(CM_BLNDGAM_RAMA_START_SLOPE_CNTL_R, CM, id),\
dcn3_02_soc.clock_limits[i].dispclk_mhz = max_dispclk_mhz;
dcn3_02_soc.clock_limits[i].dppclk_mhz = max_dppclk_mhz;
dcn3_02_soc.clock_limits[i].phyclk_mhz = max_phyclk_mhz;
- dcn3_02_soc.clock_limits[i].dtbclk_mhz = dcn3_02_soc.clock_limits[0].dtbclk_mhz;
+ /* Populate from bw_params for DTBCLK, SOCCLK */
+ if (!bw_params->clk_table.entries[i].dtbclk_mhz && i > 0)
+ dcn3_02_soc.clock_limits[i].dtbclk_mhz = dcn3_02_soc.clock_limits[i-1].dtbclk_mhz;
+ else
+ dcn3_02_soc.clock_limits[i].dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz;
+ if (!bw_params->clk_table.entries[i].socclk_mhz && i > 0)
+ dcn3_02_soc.clock_limits[i].socclk_mhz = dcn3_02_soc.clock_limits[i-1].socclk_mhz;
+ else
+ dcn3_02_soc.clock_limits[i].socclk_mhz = bw_params->clk_table.entries[i].socclk_mhz;
/* These clocks cannot come from bw_params, always fill from dcn3_02_soc[1] */
- /* FCLK, PHYCLK_D18, SOCCLK, DSCCLK */
+ /* FCLK, PHYCLK_D18, DSCCLK */
dcn3_02_soc.clock_limits[i].phyclk_d18_mhz = dcn3_02_soc.clock_limits[0].phyclk_d18_mhz;
- dcn3_02_soc.clock_limits[i].socclk_mhz = dcn3_02_soc.clock_limits[0].socclk_mhz;
dcn3_02_soc.clock_limits[i].dscclk_mhz = dcn3_02_soc.clock_limits[0].dscclk_mhz;
}
/* re-init DML with updated bb */
dcn3_03_soc.clock_limits[i].dispclk_mhz = max_dispclk_mhz;
dcn3_03_soc.clock_limits[i].dppclk_mhz = max_dppclk_mhz;
dcn3_03_soc.clock_limits[i].phyclk_mhz = max_phyclk_mhz;
- dcn3_03_soc.clock_limits[i].dtbclk_mhz = dcn3_03_soc.clock_limits[0].dtbclk_mhz;
+ /* Populate from bw_params for DTBCLK, SOCCLK */
+ if (!bw_params->clk_table.entries[i].dtbclk_mhz && i > 0)
+ dcn3_03_soc.clock_limits[i].dtbclk_mhz = dcn3_03_soc.clock_limits[i-1].dtbclk_mhz;
+ else
+ dcn3_03_soc.clock_limits[i].dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz;
+ if (!bw_params->clk_table.entries[i].socclk_mhz && i > 0)
+ dcn3_03_soc.clock_limits[i].socclk_mhz = dcn3_03_soc.clock_limits[i-1].socclk_mhz;
+ else
+ dcn3_03_soc.clock_limits[i].socclk_mhz = bw_params->clk_table.entries[i].socclk_mhz;
/* These clocks cannot come from bw_params, always fill from dcn3_03_soc[1] */
- /* FCLK, PHYCLK_D18, SOCCLK, DSCCLK */
+ /* FCLK, PHYCLK_D18, DSCCLK */
dcn3_03_soc.clock_limits[i].phyclk_d18_mhz = dcn3_03_soc.clock_limits[0].phyclk_d18_mhz;
- dcn3_03_soc.clock_limits[i].socclk_mhz = dcn3_03_soc.clock_limits[0].socclk_mhz;
dcn3_03_soc.clock_limits[i].dscclk_mhz = dcn3_03_soc.clock_limits[0].dscclk_mhz;
}
/* re-init DML with updated bb */
#include "dce/dmub_outbox.h"
#include "dc_link_dp.h"
#include "inc/link_dpcd.h"
+#include "dcn10/dcn10_hw_sequencer.h"
#define DC_LOGGER_INIT(logger)
}
return false;
}
+
+static void apply_riommu_invalidation_wa(struct dc *dc)
+{
+ struct dce_hwseq *hws = dc->hwseq;
+
+ if (!hws->wa.early_riommu_invalidation)
+ return;
+
+ REG_UPDATE(DCHUBBUB_ARB_HOSTVM_CNTL, DISABLE_HOSTVM_FORCE_ALLOW_PSTATE, 0);
+}
+
+void dcn31_init_pipes(struct dc *dc, struct dc_state *context)
+{
+ dcn10_init_pipes(dc, context);
+ apply_riommu_invalidation_wa(dc);
+
+}
struct dc_state *context);
bool dcn31_is_abm_supported(struct dc *dc,
struct dc_state *context, struct dc_stream_state *stream);
+void dcn31_init_pipes(struct dc *dc, struct dc_state *context);
#endif /* __DC_HWSS_DCN31_H__ */
.set_flip_control_gsl = dcn20_set_flip_control_gsl,
.get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync,
.calc_vupdate_position = dcn10_calc_vupdate_position,
- .apply_idle_power_optimizations = dcn30_apply_idle_power_optimizations,
.set_backlight_level = dcn21_set_backlight_level,
.set_abm_immediate_disable = dcn21_set_abm_immediate_disable,
.set_pipe = dcn21_set_pipe,
};
static const struct hwseq_private_funcs dcn31_private_funcs = {
- .init_pipes = dcn10_init_pipes,
+ .init_pipes = dcn31_init_pipes,
.update_plane_addr = dcn20_update_plane_addr,
.plane_atomic_disconnect = dcn10_plane_atomic_disconnect,
.update_mpcc = dcn20_update_mpcc,
.sr_exit_z8_time_us = 402.0,
.sr_enter_plus_exit_z8_time_us = 520.0,
.writeback_latency_us = 12.0,
+ .dram_channel_width_bytes = 4,
.round_trip_ping_latency_dcfclk_cycles = 106,
.urgent_latency_pixel_data_only_us = 4.0,
.urgent_latency_pixel_mixed_with_vm_data_us = 4.0,
#define HWSEQ_DCN31_REG_LIST()\
SR(DCHUBBUB_GLOBAL_TIMER_CNTL), \
+ SR(DCHUBBUB_ARB_HOSTVM_CNTL), \
SR(DIO_MEM_PWR_CTRL), \
SR(ODM_MEM_PWR_CTRL3), \
SR(DMU_MEM_PWR_CNTL), \
#define HWSEQ_DCN31_MASK_SH_LIST(mask_sh)\
HWSEQ_DCN_MASK_SH_LIST(mask_sh), \
HWS_SF(, DCHUBBUB_GLOBAL_TIMER_CNTL, DCHUBBUB_GLOBAL_TIMER_REFDIV, mask_sh), \
+ HWS_SF(, DCHUBBUB_ARB_HOSTVM_CNTL, DISABLE_HOSTVM_FORCE_ALLOW_PSTATE, mask_sh), \
HWS_SF(, DOMAIN0_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \
HWS_SF(, DOMAIN0_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \
HWS_SF(, DOMAIN1_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \
hws->regs = &hwseq_reg;
hws->shifts = &hwseq_shift;
hws->masks = &hwseq_mask;
+ hws->wa.early_riommu_invalidation = true;
}
return hws;
}
else
*DestinationLinesForPrefetch = dst_y_prefetch_equ;
+ // Limit to prevent overflow in DST_Y_PREFETCH register
+ *DestinationLinesForPrefetch = dml_min(*DestinationLinesForPrefetch, 63.75);
+
dml_print("DML: VStartup: %d\n", VStartup);
dml_print("DML: TCalc: %f\n", TCalc);
dml_print("DML: TWait: %f\n", TWait);
}
} while ((locals->PrefetchSupported[i][j] != true || locals->VRatioInPrefetchSupported[i][j] != true)
&& (mode_lib->vba.NextMaxVStartup != mode_lib->vba.MaxMaxVStartup[0][0]
- || mode_lib->vba.NextPrefetchMode < mode_lib->vba.MaxPrefetchMode));
+ || mode_lib->vba.NextPrefetchMode <= mode_lib->vba.MaxPrefetchMode));
if (locals->PrefetchSupported[i][j] == true && locals->VRatioInPrefetchSupported[i][j] == true) {
mode_lib->vba.BandwidthAvailableForImmediateFlip = locals->ReturnBWPerState[i][0];
/* DSCL processing pixel data in fixed or float format */
enum dscl_data_processing_format dscl_data_proc_format;
+ /* max LB partitions */
+ unsigned int max_lb_partitions;
+
/* Calculates the number of partitions in the line buffer.
* The implementation of this function is overloaded for
* different versions of DSCL LB.
bool DEGVIDCN10_254;
bool DEGVIDCN21;
bool disallow_self_refresh_during_multi_plane_transition;
+ bool early_riommu_invalidation;
};
struct hwseq_wa_state {
#define PPSMC_MSG_SetSystemVirtualSTBtoDramAddrLow 0x41
#define PPSMC_MSG_GfxDriverResetRecovery 0x42
-#define PPSMC_Message_Count 0x43
+#define PPSMC_MSG_BoardPowerCalibration 0x43
+#define PPSMC_Message_Count 0x44
//PPSMC Reset Types
#define PPSMC_RESET_TYPE_WARM_RESET 0x00
__SMU_DUMMY_MAP(DisableDeterminism), \
__SMU_DUMMY_MAP(SetUclkDpmMode), \
__SMU_DUMMY_MAP(LightSBR), \
- __SMU_DUMMY_MAP(GfxDriverResetRecovery),
+ __SMU_DUMMY_MAP(GfxDriverResetRecovery), \
+ __SMU_DUMMY_MAP(BoardPowerCalibration),
#undef __SMU_DUMMY_MAP
#define __SMU_DUMMY_MAP(type) SMU_MSG_##type
#define SMU11_DRIVER_IF_VERSION_Navy_Flounder 0xE
#define SMU11_DRIVER_IF_VERSION_VANGOGH 0x03
#define SMU11_DRIVER_IF_VERSION_Dimgrey_Cavefish 0xF
-#define SMU11_DRIVER_IF_VERSION_Beige_Goby 0x9
+#define SMU11_DRIVER_IF_VERSION_Beige_Goby 0xD
/* MP Apertures */
#define MP0_Public 0x03800000
MSG_MAP(DisableDeterminism, PPSMC_MSG_DisableDeterminism, 0),
MSG_MAP(SetUclkDpmMode, PPSMC_MSG_SetUclkDpmMode, 0),
MSG_MAP(GfxDriverResetRecovery, PPSMC_MSG_GfxDriverResetRecovery, 0),
+ MSG_MAP(BoardPowerCalibration, PPSMC_MSG_BoardPowerCalibration, 0),
};
static const struct cmn2asic_mapping aldebaran_clk_map[SMU_CLK_COUNT] = {
return ret;
}
+static bool aldebaran_is_primary(struct smu_context *smu)
+{
+ struct amdgpu_device *adev = smu->adev;
+
+ if (adev->smuio.funcs && adev->smuio.funcs->get_die_id)
+ return adev->smuio.funcs->get_die_id(adev) == 0;
+
+ return true;
+}
+
+static int aldebaran_run_board_btc(struct smu_context *smu)
+{
+ u32 smu_version;
+ int ret;
+
+ if (!aldebaran_is_primary(smu))
+ return 0;
+
+ ret = smu_cmn_get_smc_version(smu, NULL, &smu_version);
+ if (ret) {
+ dev_err(smu->adev->dev, "Failed to get smu version!\n");
+ return ret;
+ }
+ if (smu_version <= 0x00441d00)
+ return 0;
+
+ ret = smu_cmn_send_smc_msg(smu, SMU_MSG_BoardPowerCalibration, NULL);
+ if (ret)
+ dev_err(smu->adev->dev, "Board power calibration failed!\n");
+
+ return ret;
+}
+
static int aldebaran_run_btc(struct smu_context *smu)
{
int ret;
ret = smu_cmn_send_smc_msg(smu, SMU_MSG_RunDcBtc, NULL);
if (ret)
dev_err(smu->adev->dev, "RunDcBtc failed!\n");
+ else
+ ret = aldebaran_run_board_btc(smu);
return ret;
}
return (abs(frequency1 - frequency2) <= EPSILON);
}
-static bool aldebaran_is_primary(struct smu_context *smu)
-{
- struct amdgpu_device *adev = smu->adev;
-
- if (adev->smuio.funcs && adev->smuio.funcs->get_die_id)
- return adev->smuio.funcs->get_die_id(adev) == 0;
-
- return true;
-}
-
static int aldebaran_get_smu_metrics_data(struct smu_context *smu,
MetricsMember_t member,
uint32_t *value)
if (drm_dev_is_unplugged(dev))
return -ENODEV;
+ if (DRM_IOCTL_TYPE(cmd) != DRM_IOCTL_BASE)
+ return -ENOTTY;
+
is_driver_ioctl = nr >= DRM_COMMAND_BASE && nr < DRM_COMMAND_END;
if (is_driver_ioctl) {
init_vbt_missing_defaults(struct drm_i915_private *i915)
{
enum port port;
- int ports = PORT_A | PORT_B | PORT_C | PORT_D | PORT_E | PORT_F;
+ int ports = BIT(PORT_A) | BIT(PORT_B) | BIT(PORT_C) |
+ BIT(PORT_D) | BIT(PORT_E) | BIT(PORT_F);
if (!HAS_DDI(i915) && !IS_CHERRYVIEW(i915))
return;
intel_ddi_init(dev_priv, PORT_B);
intel_ddi_init(dev_priv, PORT_C);
vlv_dsi_init(dev_priv);
- } else if (DISPLAY_VER(dev_priv) >= 9) {
+ } else if (DISPLAY_VER(dev_priv) == 10) {
intel_ddi_init(dev_priv, PORT_A);
intel_ddi_init(dev_priv, PORT_B);
intel_ddi_init(dev_priv, PORT_C);
intel_ddi_init(dev_priv, PORT_D);
intel_ddi_init(dev_priv, PORT_E);
intel_ddi_init(dev_priv, PORT_F);
+ } else if (DISPLAY_VER(dev_priv) >= 9) {
+ intel_ddi_init(dev_priv, PORT_A);
+ intel_ddi_init(dev_priv, PORT_B);
+ intel_ddi_init(dev_priv, PORT_C);
+ intel_ddi_init(dev_priv, PORT_D);
+ intel_ddi_init(dev_priv, PORT_E);
} else if (HAS_DDI(dev_priv)) {
u32 found;
#include "i915_gem_clflush.h"
#include "i915_gem_context.h"
#include "i915_gem_ioctls.h"
-#include "i915_sw_fence_work.h"
#include "i915_trace.h"
#include "i915_user_extensions.h"
-#include "i915_memcpy.h"
struct eb_vma {
struct i915_vma *vma;
int err;
struct intel_engine_cs *engine = eb->engine;
+ /* If we need to copy for the cmdparser, we will stall anyway */
+ if (eb_use_cmdparser(eb))
+ return ERR_PTR(-EWOULDBLOCK);
+
if (!reloc_can_use_engine(engine)) {
engine = engine->gt->engine_class[COPY_ENGINE_CLASS][0];
if (!engine)
return vma;
}
-struct eb_parse_work {
- struct dma_fence_work base;
- struct intel_engine_cs *engine;
- struct i915_vma *batch;
- struct i915_vma *shadow;
- struct i915_vma *trampoline;
- unsigned long batch_offset;
- unsigned long batch_length;
- unsigned long *jump_whitelist;
- const void *batch_map;
- void *shadow_map;
-};
-
-static int __eb_parse(struct dma_fence_work *work)
-{
- struct eb_parse_work *pw = container_of(work, typeof(*pw), base);
- int ret;
- bool cookie;
-
- cookie = dma_fence_begin_signalling();
- ret = intel_engine_cmd_parser(pw->engine,
- pw->batch,
- pw->batch_offset,
- pw->batch_length,
- pw->shadow,
- pw->jump_whitelist,
- pw->shadow_map,
- pw->batch_map);
- dma_fence_end_signalling(cookie);
-
- return ret;
-}
-
-static void __eb_parse_release(struct dma_fence_work *work)
-{
- struct eb_parse_work *pw = container_of(work, typeof(*pw), base);
-
- if (!IS_ERR_OR_NULL(pw->jump_whitelist))
- kfree(pw->jump_whitelist);
-
- if (pw->batch_map)
- i915_gem_object_unpin_map(pw->batch->obj);
- else
- i915_gem_object_unpin_pages(pw->batch->obj);
-
- i915_gem_object_unpin_map(pw->shadow->obj);
-
- if (pw->trampoline)
- i915_active_release(&pw->trampoline->active);
- i915_active_release(&pw->shadow->active);
- i915_active_release(&pw->batch->active);
-}
-
-static const struct dma_fence_work_ops eb_parse_ops = {
- .name = "eb_parse",
- .work = __eb_parse,
- .release = __eb_parse_release,
-};
-
-static inline int
-__parser_mark_active(struct i915_vma *vma,
- struct intel_timeline *tl,
- struct dma_fence *fence)
-{
- struct intel_gt_buffer_pool_node *node = vma->private;
-
- return i915_active_ref(&node->active, tl->fence_context, fence);
-}
-
-static int
-parser_mark_active(struct eb_parse_work *pw, struct intel_timeline *tl)
-{
- int err;
-
- mutex_lock(&tl->mutex);
-
- err = __parser_mark_active(pw->shadow, tl, &pw->base.dma);
- if (err)
- goto unlock;
-
- if (pw->trampoline) {
- err = __parser_mark_active(pw->trampoline, tl, &pw->base.dma);
- if (err)
- goto unlock;
- }
-
-unlock:
- mutex_unlock(&tl->mutex);
- return err;
-}
-
-static int eb_parse_pipeline(struct i915_execbuffer *eb,
- struct i915_vma *shadow,
- struct i915_vma *trampoline)
-{
- struct eb_parse_work *pw;
- struct drm_i915_gem_object *batch = eb->batch->vma->obj;
- bool needs_clflush;
- int err;
-
- GEM_BUG_ON(overflows_type(eb->batch_start_offset, pw->batch_offset));
- GEM_BUG_ON(overflows_type(eb->batch_len, pw->batch_length));
-
- pw = kzalloc(sizeof(*pw), GFP_KERNEL);
- if (!pw)
- return -ENOMEM;
-
- err = i915_active_acquire(&eb->batch->vma->active);
- if (err)
- goto err_free;
-
- err = i915_active_acquire(&shadow->active);
- if (err)
- goto err_batch;
-
- if (trampoline) {
- err = i915_active_acquire(&trampoline->active);
- if (err)
- goto err_shadow;
- }
-
- pw->shadow_map = i915_gem_object_pin_map(shadow->obj, I915_MAP_WB);
- if (IS_ERR(pw->shadow_map)) {
- err = PTR_ERR(pw->shadow_map);
- goto err_trampoline;
- }
-
- needs_clflush =
- !(batch->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ);
-
- pw->batch_map = ERR_PTR(-ENODEV);
- if (needs_clflush && i915_has_memcpy_from_wc())
- pw->batch_map = i915_gem_object_pin_map(batch, I915_MAP_WC);
-
- if (IS_ERR(pw->batch_map)) {
- err = i915_gem_object_pin_pages(batch);
- if (err)
- goto err_unmap_shadow;
- pw->batch_map = NULL;
- }
-
- pw->jump_whitelist =
- intel_engine_cmd_parser_alloc_jump_whitelist(eb->batch_len,
- trampoline);
- if (IS_ERR(pw->jump_whitelist)) {
- err = PTR_ERR(pw->jump_whitelist);
- goto err_unmap_batch;
- }
-
- dma_fence_work_init(&pw->base, &eb_parse_ops);
-
- pw->engine = eb->engine;
- pw->batch = eb->batch->vma;
- pw->batch_offset = eb->batch_start_offset;
- pw->batch_length = eb->batch_len;
- pw->shadow = shadow;
- pw->trampoline = trampoline;
-
- /* Mark active refs early for this worker, in case we get interrupted */
- err = parser_mark_active(pw, eb->context->timeline);
- if (err)
- goto err_commit;
-
- err = dma_resv_reserve_shared(pw->batch->resv, 1);
- if (err)
- goto err_commit;
-
- err = dma_resv_reserve_shared(shadow->resv, 1);
- if (err)
- goto err_commit;
-
- /* Wait for all writes (and relocs) into the batch to complete */
- err = i915_sw_fence_await_reservation(&pw->base.chain,
- pw->batch->resv, NULL, false,
- 0, I915_FENCE_GFP);
- if (err < 0)
- goto err_commit;
-
- /* Keep the batch alive and unwritten as we parse */
- dma_resv_add_shared_fence(pw->batch->resv, &pw->base.dma);
-
- /* Force execution to wait for completion of the parser */
- dma_resv_add_excl_fence(shadow->resv, &pw->base.dma);
-
- dma_fence_work_commit_imm(&pw->base);
- return 0;
-
-err_commit:
- i915_sw_fence_set_error_once(&pw->base.chain, err);
- dma_fence_work_commit_imm(&pw->base);
- return err;
-
-err_unmap_batch:
- if (pw->batch_map)
- i915_gem_object_unpin_map(batch);
- else
- i915_gem_object_unpin_pages(batch);
-err_unmap_shadow:
- i915_gem_object_unpin_map(shadow->obj);
-err_trampoline:
- if (trampoline)
- i915_active_release(&trampoline->active);
-err_shadow:
- i915_active_release(&shadow->active);
-err_batch:
- i915_active_release(&eb->batch->vma->active);
-err_free:
- kfree(pw);
- return err;
-}
-
static struct i915_vma *eb_dispatch_secure(struct i915_execbuffer *eb, struct i915_vma *vma)
{
/*
goto err_trampoline;
}
- err = eb_parse_pipeline(eb, shadow, trampoline);
+ err = dma_resv_reserve_shared(shadow->resv, 1);
+ if (err)
+ goto err_trampoline;
+
+ err = intel_engine_cmd_parser(eb->engine,
+ eb->batch->vma,
+ eb->batch_start_offset,
+ eb->batch_len,
+ shadow, trampoline);
if (err)
goto err_unpin_batch;
intel_gt_pm_get(&eb.i915->gt);
for_each_uabi_engine(eb.engine, eb.i915) {
+ if (intel_engine_requires_cmd_parser(eb.engine) ||
+ intel_engine_using_cmd_parser(eb.engine))
+ continue;
+
reloc_cache_init(&eb.reloc_cache, eb.i915);
memset(map, POISON_INUSE, 4096);
if (drm_WARN_ON(&i915->drm, !engine))
return -EINVAL;
+ /*
+ * Due to d3_entered is used to indicate skipping PPGTT invalidation on
+ * vGPU reset, it's set on D0->D3 on PCI config write, and cleared after
+ * vGPU reset if in resuming.
+ * In S0ix exit, the device power state also transite from D3 to D0 as
+ * S3 resume, but no vGPU reset (triggered by QEMU devic model). After
+ * S0ix exit, all engines continue to work. However the d3_entered
+ * remains set which will break next vGPU reset logic (miss the expected
+ * PPGTT invalidation).
+ * Engines can only work in D0. Thus the 1st elsp write gives GVT a
+ * chance to clear d3_entered.
+ */
+ if (vgpu->d3_entered)
+ vgpu->d3_entered = false;
+
execlist = &vgpu->submission.execlist[engine->id];
execlist->elsp_dwords.data[3 - execlist->elsp_dwords.index] = data;
static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
struct drm_i915_gem_object *src_obj,
unsigned long offset, unsigned long length,
- void *dst, const void *src)
+ bool *needs_clflush_after)
{
- bool needs_clflush =
- !(src_obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ);
-
- if (src) {
- GEM_BUG_ON(!needs_clflush);
- i915_unaligned_memcpy_from_wc(dst, src + offset, length);
- } else {
- struct scatterlist *sg;
+ unsigned int src_needs_clflush;
+ unsigned int dst_needs_clflush;
+ void *dst, *src;
+ int ret;
+
+ ret = i915_gem_object_prepare_write(dst_obj, &dst_needs_clflush);
+ if (ret)
+ return ERR_PTR(ret);
+
+ dst = i915_gem_object_pin_map(dst_obj, I915_MAP_WB);
+ i915_gem_object_finish_access(dst_obj);
+ if (IS_ERR(dst))
+ return dst;
+
+ ret = i915_gem_object_prepare_read(src_obj, &src_needs_clflush);
+ if (ret) {
+ i915_gem_object_unpin_map(dst_obj);
+ return ERR_PTR(ret);
+ }
+
+ src = ERR_PTR(-ENODEV);
+ if (src_needs_clflush && i915_has_memcpy_from_wc()) {
+ src = i915_gem_object_pin_map(src_obj, I915_MAP_WC);
+ if (!IS_ERR(src)) {
+ i915_unaligned_memcpy_from_wc(dst,
+ src + offset,
+ length);
+ i915_gem_object_unpin_map(src_obj);
+ }
+ }
+ if (IS_ERR(src)) {
+ unsigned long x, n, remain;
void *ptr;
- unsigned int x, sg_ofs;
- unsigned long remain;
/*
* We can avoid clflushing partial cachelines before the write
* validate up to the end of the batch.
*/
remain = length;
- if (!(dst_obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
+ if (dst_needs_clflush & CLFLUSH_BEFORE)
remain = round_up(remain,
boot_cpu_data.x86_clflush_size);
ptr = dst;
x = offset_in_page(offset);
- sg = i915_gem_object_get_sg(src_obj, offset >> PAGE_SHIFT, &sg_ofs, false);
-
- while (remain) {
- unsigned long sg_max = sg->length >> PAGE_SHIFT;
-
- for (; remain && sg_ofs < sg_max; sg_ofs++) {
- unsigned long len = min(remain, PAGE_SIZE - x);
- void *map;
-
- map = kmap_atomic(nth_page(sg_page(sg), sg_ofs));
- if (needs_clflush)
- drm_clflush_virt_range(map + x, len);
- memcpy(ptr, map + x, len);
- kunmap_atomic(map);
-
- ptr += len;
- remain -= len;
- x = 0;
- }
-
- sg_ofs = 0;
- sg = sg_next(sg);
+ for (n = offset >> PAGE_SHIFT; remain; n++) {
+ int len = min(remain, PAGE_SIZE - x);
+
+ src = kmap_atomic(i915_gem_object_get_page(src_obj, n));
+ if (src_needs_clflush)
+ drm_clflush_virt_range(src + x, len);
+ memcpy(ptr, src + x, len);
+ kunmap_atomic(src);
+
+ ptr += len;
+ remain -= len;
+ x = 0;
}
}
+ i915_gem_object_finish_access(src_obj);
+
memset32(dst + length, 0, (dst_obj->base.size - length) / sizeof(u32));
/* dst_obj is returned with vmap pinned */
+ *needs_clflush_after = dst_needs_clflush & CLFLUSH_AFTER;
+
return dst;
}
if (target_cmd_index == offset)
return 0;
+ if (IS_ERR(jump_whitelist))
+ return PTR_ERR(jump_whitelist);
+
if (!test_bit(target_cmd_index, jump_whitelist)) {
DRM_DEBUG("CMD: BB_START to 0x%llx not a previously executed cmd\n",
jump_target);
return 0;
}
-/**
- * intel_engine_cmd_parser_alloc_jump_whitelist() - preallocate jump whitelist for intel_engine_cmd_parser()
- * @batch_length: length of the commands in batch_obj
- * @trampoline: Whether jump trampolines are used.
- *
- * Preallocates a jump whitelist for parsing the cmd buffer in intel_engine_cmd_parser().
- * This has to be preallocated, because the command parser runs in signaling context,
- * and may not allocate any memory.
- *
- * Return: NULL or pointer to a jump whitelist, or ERR_PTR() on failure. Use
- * IS_ERR() to check for errors. Must bre freed() with kfree().
- *
- * NULL is a valid value, meaning no allocation was required.
- */
-unsigned long *intel_engine_cmd_parser_alloc_jump_whitelist(u32 batch_length,
- bool trampoline)
+static unsigned long *alloc_whitelist(u32 batch_length)
{
unsigned long *jmp;
- if (trampoline)
- return NULL;
-
/*
* We expect batch_length to be less than 256KiB for known users,
* i.e. we need at most an 8KiB bitmap allocation which should be
* @batch_offset: byte offset in the batch at which execution starts
* @batch_length: length of the commands in batch_obj
* @shadow: validated copy of the batch buffer in question
- * @jump_whitelist: buffer preallocated with intel_engine_cmd_parser_alloc_jump_whitelist()
- * @shadow_map: mapping to @shadow vma
- * @batch_map: mapping to @batch vma
+ * @trampoline: true if we need to trampoline into privileged execution
*
* Parses the specified batch buffer looking for privilege violations as
* described in the overview.
* Return: non-zero if the parser finds violations or otherwise fails; -EACCES
* if the batch appears legal but should use hardware parsing
*/
+
int intel_engine_cmd_parser(struct intel_engine_cs *engine,
struct i915_vma *batch,
unsigned long batch_offset,
unsigned long batch_length,
struct i915_vma *shadow,
- unsigned long *jump_whitelist,
- void *shadow_map,
- const void *batch_map)
+ bool trampoline)
{
u32 *cmd, *batch_end, offset = 0;
struct drm_i915_cmd_descriptor default_desc = noop_desc;
const struct drm_i915_cmd_descriptor *desc = &default_desc;
+ bool needs_clflush_after = false;
+ unsigned long *jump_whitelist;
u64 batch_addr, shadow_addr;
int ret = 0;
- bool trampoline = !jump_whitelist;
GEM_BUG_ON(!IS_ALIGNED(batch_offset, sizeof(*cmd)));
GEM_BUG_ON(!IS_ALIGNED(batch_length, sizeof(*cmd)));
batch->size));
GEM_BUG_ON(!batch_length);
- cmd = copy_batch(shadow->obj, batch->obj, batch_offset, batch_length,
- shadow_map, batch_map);
+ cmd = copy_batch(shadow->obj, batch->obj,
+ batch_offset, batch_length,
+ &needs_clflush_after);
+ if (IS_ERR(cmd)) {
+ DRM_DEBUG("CMD: Failed to copy batch\n");
+ return PTR_ERR(cmd);
+ }
+
+ jump_whitelist = NULL;
+ if (!trampoline)
+ /* Defer failure until attempted use */
+ jump_whitelist = alloc_whitelist(batch_length);
shadow_addr = gen8_canonical_addr(shadow->node.start);
batch_addr = gen8_canonical_addr(batch->node.start + batch_offset);
i915_gem_object_flush_map(shadow->obj);
+ if (!IS_ERR_OR_NULL(jump_whitelist))
+ kfree(jump_whitelist);
+ i915_gem_object_unpin_map(shadow->obj);
return ret;
}
int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv);
int intel_engine_init_cmd_parser(struct intel_engine_cs *engine);
void intel_engine_cleanup_cmd_parser(struct intel_engine_cs *engine);
-unsigned long *intel_engine_cmd_parser_alloc_jump_whitelist(u32 batch_length,
- bool trampoline);
-
int intel_engine_cmd_parser(struct intel_engine_cs *engine,
struct i915_vma *batch,
unsigned long batch_offset,
unsigned long batch_length,
struct i915_vma *shadow,
- unsigned long *jump_whitelist,
- void *shadow_map,
- const void *batch_map);
+ bool trampoline);
#define I915_CMD_PARSER_TRAMPOLINE_SIZE 8
/* intel_device_info.c */
do {
fence = *child++;
- if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) {
- i915_sw_fence_set_error_once(&rq->submit, fence->error);
+ if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
continue;
- }
if (fence->context == rq->fence.context)
continue;
do {
fence = *child++;
- if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) {
- i915_sw_fence_set_error_once(&rq->submit, fence->error);
+ if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
continue;
- }
/*
* Requests on the same timeline are explicitly ordered, along
info->pipe_mask &= ~BIT(PIPE_C);
info->cpu_transcoder_mask &= ~BIT(TRANSCODER_C);
}
- } else if (HAS_DISPLAY(dev_priv) && GRAPHICS_VER(dev_priv) >= 9) {
+ } else if (HAS_DISPLAY(dev_priv) && DISPLAY_VER(dev_priv) >= 9) {
u32 dfsm = intel_de_read(dev_priv, SKL_DFSM);
if (dfsm & SKL_DFSM_PIPE_A_DISABLE) {
info->pipe_mask &= ~BIT(PIPE_C);
info->cpu_transcoder_mask &= ~BIT(TRANSCODER_C);
}
- if (GRAPHICS_VER(dev_priv) >= 12 &&
+
+ if (DISPLAY_VER(dev_priv) >= 12 &&
(dfsm & TGL_DFSM_PIPE_D_DISABLE)) {
info->pipe_mask &= ~BIT(PIPE_D);
info->cpu_transcoder_mask &= ~BIT(TRANSCODER_D);
if (dfsm & SKL_DFSM_DISPLAY_PM_DISABLE)
info->display.has_fbc = 0;
- if (GRAPHICS_VER(dev_priv) >= 11 && (dfsm & ICL_DFSM_DMC_DISABLE))
+ if (DISPLAY_VER(dev_priv) >= 11 && (dfsm & ICL_DFSM_DMC_DISABLE))
info->display.has_dmc = 0;
- if (GRAPHICS_VER(dev_priv) >= 10 &&
+ if (DISPLAY_VER(dev_priv) >= 10 &&
(dfsm & CNL_DFSM_DISPLAY_DSC_DISABLE))
info->display.has_dsc = 0;
}
static const struct dpu_mdp_cfg sm8250_mdp[] = {
{
.name = "top_0", .id = MDP_TOP,
- .base = 0x0, .len = 0x45C,
+ .base = 0x0, .len = 0x494,
.features = 0,
.highest_bank_bit = 0x3, /* TODO: 2 for LP_DDR4 */
.clk_ctrls[DPU_CLK_CTRL_VIG0] = {
dp_write_link(catalog, REG_DP_HSYNC_VSYNC_WIDTH_POLARITY,
dp_catalog->width_blanking);
dp_write_link(catalog, REG_DP_ACTIVE_HOR_VER, dp_catalog->dp_active);
+ dp_write_p0(catalog, MMSS_DP_INTF_CONFIG, 0);
return 0;
}
* running. Add the global reset just before disabling the
* link clocks and core clocks.
*/
- ret = dp_ctrl_off(&ctrl->dp_ctrl);
+ ret = dp_ctrl_off_link_stream(&ctrl->dp_ctrl);
if (ret) {
DRM_ERROR("failed to disable DP controller\n");
return ret;
goto end;
}
+ dp->aux->drm_dev = drm;
rc = dp_aux_register(dp->aux);
if (rc) {
DRM_ERROR("DRM DP AUX register failed\n");
else
dp->dp_display.is_connected = false;
+ dp_display_handle_plugged_change(g_dp_display,
+ dp->dp_display.is_connected);
+
+
mutex_unlock(&dp->event_mutex);
return 0;
.tlb_add_page = msm_iommu_tlb_add_page,
};
+static int msm_fault_handler(struct iommu_domain *domain, struct device *dev,
+ unsigned long iova, int flags, void *arg);
+
struct msm_mmu *msm_iommu_pagetable_create(struct msm_mmu *parent)
{
struct adreno_smmu_priv *adreno_smmu = dev_get_drvdata(parent->dev);
if (!ttbr1_cfg)
return ERR_PTR(-ENODEV);
+ /*
+ * Defer setting the fault handler until we have a valid adreno_smmu
+ * to avoid accidentially installing a GPU specific fault handler for
+ * the display's iommu
+ */
+ iommu_set_fault_handler(iommu->domain, msm_fault_handler, iommu);
+
pagetable = kzalloc(sizeof(*pagetable), GFP_KERNEL);
if (!pagetable)
return ERR_PTR(-ENOMEM);
iommu->domain = domain;
msm_mmu_init(&iommu->base, dev, &funcs, MSM_MMU_IOMMU);
- iommu_set_fault_handler(domain, msm_fault_handler, iommu);
atomic_set(&iommu->pagetables, 0);
*/
if (bo->base.dev)
drm_gem_object_release(&bo->base);
+ else
+ dma_resv_fini(&bo->base._resv);
kfree(nvbo);
}
if (IS_ERR(nvbo))
return PTR_ERR(nvbo);
+ nvbo->bo.base.size = size;
+ dma_resv_init(&nvbo->bo.base._resv);
+ drm_vma_node_reset(&nvbo->bo.base.vma_node);
+
ret = nouveau_bo_init(nvbo, size, align, domain, sg, robj);
if (ret)
return ret;
drm_panel_remove(&ts->base);
mipi_dsi_device_unregister(ts->dsi);
- kfree(ts->dsi);
return 0;
}
static const struct panel_desc yes_optoelectronics_ytc700tlag_05_201c = {
.modes = &yes_optoelectronics_ytc700tlag_05_201c_mode,
.num_modes = 1,
- .bpc = 6,
+ .bpc = 8,
.size = {
.width = 154,
.height = 90,
return;
}
+ if (!mem)
+ return;
+
man = ttm_manager_type(bdev, mem->mem_type);
list_move_tail(&bo->lru, &man->lru[bo->priority]);
void ttm_mem_io_free(struct ttm_device *bdev,
struct ttm_resource *mem)
{
+ if (!mem)
+ return;
+
if (!mem->bus.offset && !mem->bus.addr)
return;
struct ttm_global ttm_glob;
EXPORT_SYMBOL(ttm_glob);
+struct dentry *ttm_debugfs_root;
+
static void ttm_global_release(void)
{
struct ttm_global *glob = &ttm_glob;
goto out;
ttm_pool_mgr_fini();
+ debugfs_remove(ttm_debugfs_root);
__free_page(glob->dummy_read_page);
memset(glob, 0, sizeof(*glob));
si_meminfo(&si);
+ ttm_debugfs_root = debugfs_create_dir("ttm", NULL);
+ if (IS_ERR(ttm_debugfs_root)) {
+ ret = PTR_ERR(ttm_debugfs_root);
+ ttm_debugfs_root = NULL;
+ goto out;
+ }
+
/* Limit the number of pages in the pool to about 50% of the total
* system memory.
*/
debugfs_create_atomic_t("buffer_objects", 0444, ttm_debugfs_root,
&glob->bo_count);
out:
+ if (ret && ttm_debugfs_root)
+ debugfs_remove(ttm_debugfs_root);
+ if (ret)
+ --ttm_glob_use_count;
mutex_unlock(&ttm_global_mutex);
return ret;
}
return tmp;
}
-struct dentry *ttm_debugfs_root;
-
-static int __init ttm_init(void)
-{
- ttm_debugfs_root = debugfs_create_dir("ttm", NULL);
- return 0;
-}
-
-static void __exit ttm_exit(void)
-{
- debugfs_remove(ttm_debugfs_root);
-}
-
-module_init(ttm_init);
-module_exit(ttm_exit);
-
MODULE_AUTHOR("Thomas Hellstrom, Jerome Glisse");
MODULE_DESCRIPTION("TTM memory manager subsystem (for DRM device)");
MODULE_LICENSE("GPL and additional rights");
vc4_hdmi_cec_update_clk_div(vc4_hdmi);
if (vc4_hdmi->variant->external_irq_controller) {
- ret = devm_request_threaded_irq(&pdev->dev,
- platform_get_irq_byname(pdev, "cec-rx"),
- vc4_cec_irq_handler_rx_bare,
- vc4_cec_irq_handler_rx_thread, 0,
- "vc4 hdmi cec rx", vc4_hdmi);
+ ret = request_threaded_irq(platform_get_irq_byname(pdev, "cec-rx"),
+ vc4_cec_irq_handler_rx_bare,
+ vc4_cec_irq_handler_rx_thread, 0,
+ "vc4 hdmi cec rx", vc4_hdmi);
if (ret)
goto err_delete_cec_adap;
- ret = devm_request_threaded_irq(&pdev->dev,
- platform_get_irq_byname(pdev, "cec-tx"),
- vc4_cec_irq_handler_tx_bare,
- vc4_cec_irq_handler_tx_thread, 0,
- "vc4 hdmi cec tx", vc4_hdmi);
+ ret = request_threaded_irq(platform_get_irq_byname(pdev, "cec-tx"),
+ vc4_cec_irq_handler_tx_bare,
+ vc4_cec_irq_handler_tx_thread, 0,
+ "vc4 hdmi cec tx", vc4_hdmi);
if (ret)
- goto err_delete_cec_adap;
+ goto err_remove_cec_rx_handler;
} else {
HDMI_WRITE(HDMI_CEC_CPU_MASK_SET, 0xffffffff);
- ret = devm_request_threaded_irq(&pdev->dev, platform_get_irq(pdev, 0),
- vc4_cec_irq_handler,
- vc4_cec_irq_handler_thread, 0,
- "vc4 hdmi cec", vc4_hdmi);
+ ret = request_threaded_irq(platform_get_irq(pdev, 0),
+ vc4_cec_irq_handler,
+ vc4_cec_irq_handler_thread, 0,
+ "vc4 hdmi cec", vc4_hdmi);
if (ret)
goto err_delete_cec_adap;
}
ret = cec_register_adapter(vc4_hdmi->cec_adap, &pdev->dev);
if (ret < 0)
- goto err_delete_cec_adap;
+ goto err_remove_handlers;
return 0;
+err_remove_handlers:
+ if (vc4_hdmi->variant->external_irq_controller)
+ free_irq(platform_get_irq_byname(pdev, "cec-tx"), vc4_hdmi);
+ else
+ free_irq(platform_get_irq(pdev, 0), vc4_hdmi);
+
+err_remove_cec_rx_handler:
+ if (vc4_hdmi->variant->external_irq_controller)
+ free_irq(platform_get_irq_byname(pdev, "cec-rx"), vc4_hdmi);
+
err_delete_cec_adap:
cec_delete_adapter(vc4_hdmi->cec_adap);
static void vc4_hdmi_cec_exit(struct vc4_hdmi *vc4_hdmi)
{
+ struct platform_device *pdev = vc4_hdmi->pdev;
+
+ if (vc4_hdmi->variant->external_irq_controller) {
+ free_irq(platform_get_irq_byname(pdev, "cec-rx"), vc4_hdmi);
+ free_irq(platform_get_irq_byname(pdev, "cec-tx"), vc4_hdmi);
+ } else {
+ free_irq(platform_get_irq(pdev, 0), vc4_hdmi);
+ }
+
cec_unregister_adapter(vc4_hdmi->cec_adap);
}
#else
depends on HID_LOGITECH
select POWER_SUPPLY
help
- Support for Logitech devices relyingon the HID++ Logitech specification
+ Support for Logitech devices relying on the HID++ Logitech specification
Say Y if you want support for Logitech devices relying on the HID++
specification. Such devices are the various Logitech Touchpads (T650,
cmd_base.cmd_v2.sensor_id = sensor_idx;
cmd_base.cmd_v2.length = 16;
- writeq(0x0, privdata->mmio + AMD_C2P_MSG2);
+ writeq(0x0, privdata->mmio + AMD_C2P_MSG1);
writel(cmd_base.ul, privdata->mmio + AMD_C2P_MSG0);
}
APPLE_RDESC_JIS },
{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_ANSI),
.driver_data = APPLE_HAS_FN },
+ { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_ANSI),
+ .driver_data = APPLE_HAS_FN },
{ HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_ISO),
.driver_data = APPLE_HAS_FN },
{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_ISO),
{
struct asus_kbd_leds *led = container_of(led_cdev, struct asus_kbd_leds,
cdev);
- if (led->brightness == brightness)
- return;
-
led->brightness = brightness;
schedule_work(&led->work);
}
int ret;
ret = ft260_get_system_config(hdev, &cfg);
- if (ret)
+ if (ret < 0)
return ret;
ft260_dbg("interface: 0x%02x\n", interface);
switch (cfg.chip_mode) {
case FT260_MODE_ALL:
case FT260_MODE_BOTH:
- if (interface == 1) {
+ if (interface == 1)
hid_info(hdev, "uart interface is not supported\n");
- return 0;
- }
- ret = 1;
+ else
+ ret = 1;
break;
case FT260_MODE_UART:
- if (interface == 0) {
- hid_info(hdev, "uart is unsupported on interface 0\n");
- ret = 0;
- }
+ hid_info(hdev, "uart interface is not supported\n");
break;
case FT260_MODE_I2C:
- if (interface == 1) {
- hid_info(hdev, "i2c is unsupported on interface 1\n");
- ret = 0;
- }
+ ret = 1;
break;
}
return ret;
if (ret < 0)
return ret;
- return scnprintf(buf, PAGE_SIZE, "%hi\n", *field);
+ return scnprintf(buf, PAGE_SIZE, "%d\n", *field);
}
static int ft260_word_show(struct hid_device *hdev, int id, u8 *cfg, int len,
if (ret < 0)
return ret;
- return scnprintf(buf, PAGE_SIZE, "%hi\n", le16_to_cpu(*field));
+ return scnprintf(buf, PAGE_SIZE, "%d\n", le16_to_cpu(*field));
}
#define FT260_ATTR_SHOW(name, reptype, id, type, func) \
static void ft260_remove(struct hid_device *hdev)
{
- int ret;
struct ft260_device *dev = hid_get_drvdata(hdev);
- ret = ft260_is_interface_enabled(hdev);
- if (ret <= 0)
+ if (!dev)
return;
sysfs_remove_group(&hdev->dev.kobj, &ft260_attr_group);
}
}
+static void hid_ishtp_cl_resume_handler(struct work_struct *work)
+{
+ struct ishtp_cl_data *client_data = container_of(work, struct ishtp_cl_data, resume_work);
+ struct ishtp_cl *hid_ishtp_cl = client_data->hid_ishtp_cl;
+
+ if (ishtp_wait_resume(ishtp_get_ishtp_device(hid_ishtp_cl))) {
+ client_data->suspended = false;
+ wake_up_interruptible(&client_data->ishtp_resume_wait);
+ }
+}
+
ishtp_print_log ishtp_hid_print_trace;
/**
init_waitqueue_head(&client_data->ishtp_resume_wait);
INIT_WORK(&client_data->work, hid_ishtp_cl_reset_handler);
+ INIT_WORK(&client_data->resume_work, hid_ishtp_cl_resume_handler);
+
ishtp_hid_print_trace = ishtp_trace_callback(cl_device);
hid_ishtp_trace(client_data, "%s hid_ishtp_cl %p\n", __func__,
hid_ishtp_cl);
- client_data->suspended = false;
+ schedule_work(&client_data->resume_work);
return 0;
}
int multi_packet_cnt;
struct work_struct work;
+ struct work_struct resume_work;
struct ishtp_cl_device *cl_device;
};
if (!device)
return 0;
- /*
- * When ISH needs hard reset, it is done asynchrnously, hence bus
- * resume will be called before full ISH resume
- */
- if (device->ishtp_dev->resume_flag)
- return 0;
-
driver = to_ishtp_cl_driver(dev->driver);
if (driver && driver->driver.pm) {
if (driver->driver.pm->resume)
}
EXPORT_SYMBOL(ishtp_device);
+/**
+ * ishtp_wait_resume() - Wait for IPC resume
+ *
+ * Wait for IPC resume
+ *
+ * Return: resume complete or not
+ */
+bool ishtp_wait_resume(struct ishtp_device *dev)
+{
+ /* 50ms to get resume response */
+ #define WAIT_FOR_RESUME_ACK_MS 50
+
+ /* Waiting to get resume response */
+ if (dev->resume_flag)
+ wait_event_interruptible_timeout(dev->resume_wait,
+ !dev->resume_flag,
+ msecs_to_jiffies(WAIT_FOR_RESUME_ACK_MS));
+
+ return (!dev->resume_flag);
+}
+EXPORT_SYMBOL_GPL(ishtp_wait_resume);
+
/**
* ishtp_get_pci_device() - Return PCI device dev pointer
* This interface is used to return PCI device pointer
help
Say Y here if you want to support HID devices (from the USB
specification standpoint) that aren't strictly user interface
- devices, like monitor controls and Uninterruptable Power Supplies.
+ devices, like monitor controls and Uninterruptible Power Supplies.
This module supports these devices separately using a separate
event interface on /dev/usb/hiddevX (char 180:96 to 180:111).
int slot;
slot = input_mt_get_slot_by_key(input, hid_data->id);
+ if (slot < 0)
+ return;
+
input_mt_slot(input, slot);
input_mt_report_slot_state(input, MT_TOOL_FINGER, prox);
}
wacom_wac->shared->touch->product == 0xF6) {
input_dev->evbit[0] |= BIT_MASK(EV_SW);
__set_bit(SW_MUTE_DEVICE, input_dev->swbit);
- wacom_wac->shared->has_mute_touch_switch = true;
+ wacom_wac->has_mute_touch_switch = true;
}
fallthrough;
status = readb(i2c->base + MPC_I2C_SR);
if (status & CSR_MIF) {
- /* Read again to allow register to stabilise */
- status = readb(i2c->base + MPC_I2C_SR);
+ /* Wait up to 100us for transfer to properly complete */
+ readb_poll_timeout(i2c->base + MPC_I2C_SR, status, !(status & CSR_MCF), 0, 100);
writeb(0, i2c->base + MPC_I2C_SR);
mpc_i2c_do_intr(i2c, status);
return IRQ_HANDLED;
if (!chip_ctx)
return -ENOMEM;
chip_ctx->chip_num = bp->chip_num;
+ chip_ctx->hw_stats_size = bp->hw_ring_stats_size;
rdev->chip_ctx = chip_ctx;
/* rest members to follow eventually */
dma_addr_t dma_map,
u32 *fw_stats_ctx_id)
{
+ struct bnxt_qplib_chip_ctx *chip_ctx = rdev->chip_ctx;
struct hwrm_stat_ctx_alloc_output resp = {0};
struct hwrm_stat_ctx_alloc_input req = {0};
struct bnxt_en_dev *en_dev = rdev->en_dev;
bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_STAT_CTX_ALLOC, -1, -1);
req.update_period_ms = cpu_to_le32(1000);
req.stats_dma_addr = cpu_to_le64(dma_map);
- req.stats_dma_length = cpu_to_le16(sizeof(struct ctx_hw_stats_ext));
+ req.stats_dma_length = cpu_to_le16(chip_ctx->hw_stats_size);
req.stat_ctx_flags = STAT_CTX_ALLOC_REQ_STAT_CTX_FLAGS_ROCE;
bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
static void bnxt_qplib_free_stats_ctx(struct pci_dev *pdev,
struct bnxt_qplib_stats *stats);
static int bnxt_qplib_alloc_stats_ctx(struct pci_dev *pdev,
+ struct bnxt_qplib_chip_ctx *cctx,
struct bnxt_qplib_stats *stats);
/* PBL */
goto fail;
stats_alloc:
/* Stats */
- rc = bnxt_qplib_alloc_stats_ctx(res->pdev, &ctx->stats);
+ rc = bnxt_qplib_alloc_stats_ctx(res->pdev, res->cctx, &ctx->stats);
if (rc)
goto fail;
}
static int bnxt_qplib_alloc_stats_ctx(struct pci_dev *pdev,
+ struct bnxt_qplib_chip_ctx *cctx,
struct bnxt_qplib_stats *stats)
{
memset(stats, 0, sizeof(*stats));
stats->fw_id = -1;
- /* 128 byte aligned context memory is required only for 57500.
- * However making this unconditional, it does not harm previous
- * generation.
- */
- stats->size = ALIGN(sizeof(struct ctx_hw_stats), 128);
+ stats->size = cctx->hw_stats_size;
stats->dma = dma_alloc_coherent(&pdev->dev, stats->size,
&stats->dma_map, GFP_KERNEL);
if (!stats->dma) {
u16 chip_num;
u8 chip_rev;
u8 chip_metal;
+ u16 hw_stats_size;
struct bnxt_qplib_drv_modes modes;
};
* parses fpm commit info and copy base value
* of hmc objects in hmc_info
*/
-static enum irdma_status_code
+static void
irdma_sc_parse_fpm_commit_buf(struct irdma_sc_dev *dev, __le64 *buf,
struct irdma_hmc_obj_info *info, u32 *sd)
{
else
*sd = (u32)(size >> 21);
- return 0;
}
/**
* @dev: sc device struct
* @count: allocate count
*/
-enum irdma_status_code irdma_sc_repost_aeq_entries(struct irdma_sc_dev *dev, u32 count)
+void irdma_sc_repost_aeq_entries(struct irdma_sc_dev *dev, u32 count)
{
writel(count, dev->hw_regs[IRDMA_AEQALLOC]);
-
- return 0;
}
/**
ret_code = irdma_sc_commit_fpm_val(dev->cqp, 0, hmc_info->hmc_fn_id,
&commit_fpm_mem, true, wait_type);
if (!ret_code)
- ret_code = irdma_sc_parse_fpm_commit_buf(dev, dev->fpm_commit_buf,
- hmc_info->hmc_obj,
- &hmc_info->sd_table.sd_cnt);
+ irdma_sc_parse_fpm_commit_buf(dev, dev->fpm_commit_buf,
+ hmc_info->hmc_obj,
+ &hmc_info->sd_table.sd_cnt);
print_hex_dump_debug("HMC: COMMIT FPM BUFFER", DUMP_PREFIX_OFFSET, 16,
8, commit_fpm_mem.va, IRDMA_COMMIT_FPM_BUF_SIZE,
false);
* irdma_set_hw_rsrc - set hw memory resources.
* @rf: RDMA PCI function
*/
-static u32 irdma_set_hw_rsrc(struct irdma_pci_f *rf)
+static void irdma_set_hw_rsrc(struct irdma_pci_f *rf)
{
rf->allocated_qps = (void *)(rf->mem_rsrc +
(sizeof(struct irdma_arp_entry) * rf->arp_table_size));
spin_lock_init(&rf->arp_lock);
spin_lock_init(&rf->qptable_lock);
spin_lock_init(&rf->qh_list_lock);
-
- return 0;
}
/**
rf->arp_table = (struct irdma_arp_entry *)rf->mem_rsrc;
- ret = irdma_set_hw_rsrc(rf);
- if (ret)
- goto set_hw_rsrc_fail;
+ irdma_set_hw_rsrc(rf);
set_bit(0, rf->allocated_mrs);
set_bit(0, rf->allocated_qps);
return 0;
-set_hw_rsrc_fail:
- kfree(rf->mem_rsrc);
- rf->mem_rsrc = NULL;
mem_rsrc_kzalloc_fail:
kfree(rf->allocated_ws_nodes);
rf->allocated_ws_nodes = NULL;
pr_debug("INIT: Gen2 PF[%d] device remove success\n", PCI_FUNC(pf->pdev->devfn));
}
-static void irdma_fill_device_info(struct irdma_device *iwdev, struct ice_pf *pf)
+static void irdma_fill_device_info(struct irdma_device *iwdev, struct ice_pf *pf,
+ struct ice_vsi *vsi)
{
struct irdma_pci_f *rf = iwdev->rf;
- struct ice_vsi *vsi = ice_get_main_vsi(pf);
rf->cdev = pf;
rf->gen_ops.register_qset = irdma_lan_register_qset;
struct iidc_auxiliary_dev,
adev);
struct ice_pf *pf = iidc_adev->pf;
+ struct ice_vsi *vsi = ice_get_main_vsi(pf);
struct iidc_qos_params qos_info = {};
struct irdma_device *iwdev;
struct irdma_pci_f *rf;
struct irdma_l2params l2params = {};
int err;
+ if (!vsi)
+ return -EIO;
iwdev = ib_alloc_device(irdma_device, ibdev);
if (!iwdev)
return -ENOMEM;
return -ENOMEM;
}
- irdma_fill_device_info(iwdev, pf);
+ irdma_fill_device_info(iwdev, pf, vsi);
rf = iwdev->rf;
if (irdma_ctrl_init_hw(rf)) {
struct irdma_aeq_init_info *info);
enum irdma_status_code irdma_sc_get_next_aeqe(struct irdma_sc_aeq *aeq,
struct irdma_aeqe_info *info);
-enum irdma_status_code irdma_sc_repost_aeq_entries(struct irdma_sc_dev *dev,
- u32 count);
+void irdma_sc_repost_aeq_entries(struct irdma_sc_dev *dev, u32 count);
void irdma_sc_pd_init(struct irdma_sc_dev *dev, struct irdma_sc_pd *pd, u32 pd_id,
int abi_ver);
enum irdma_status_code irdma_uk_post_receive(struct irdma_qp_uk *qp,
struct irdma_post_rq_info *info)
{
- u32 total_size = 0, wqe_idx, i, byte_off;
+ u32 wqe_idx, i, byte_off;
u32 addl_frag_cnt;
__le64 *wqe;
u64 hdr;
if (qp->max_rq_frag_cnt < info->num_sges)
return IRDMA_ERR_INVALID_FRAG_COUNT;
- for (i = 0; i < info->num_sges; i++)
- total_size += info->sg_list[i].len;
-
wqe = irdma_qp_get_next_recv_wqe(qp, &wqe_idx);
if (!wqe)
return IRDMA_ERR_QP_TOOMANY_WRS_POSTED;
* @iwqp: qp ptr
* @init_info: initialize info to return
*/
-static int irdma_setup_virt_qp(struct irdma_device *iwdev,
+static void irdma_setup_virt_qp(struct irdma_device *iwdev,
struct irdma_qp *iwqp,
struct irdma_qp_init_info *init_info)
{
init_info->sq_pa = qpmr->sq_pbl.addr;
init_info->rq_pa = qpmr->rq_pbl.addr;
}
-
- return 0;
}
/**
}
}
init_info.qp_uk_init_info.abi_ver = iwpd->sc_pd.abi_ver;
- err_code = irdma_setup_virt_qp(iwdev, iwqp, &init_info);
+ irdma_setup_virt_qp(iwdev, iwqp, &init_info);
} else {
init_info.qp_uk_init_info.abi_ver = IRDMA_ABI_VER;
err_code = irdma_setup_kmode_qp(iwdev, iwqp, &init_info, init_attr);
MLX5_IB_CQ_PR_FLAGS_CQE_128_PAD));
MLX5_SET(cqc, cqc, log_cq_size, ilog2(entries));
MLX5_SET(cqc, cqc, uar_page, index);
- MLX5_SET(cqc, cqc, c_eqn, eqn);
+ MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
MLX5_SET64(cqc, cqc, dbr_addr, cq->db.dma);
if (cq->create_flags & IB_UVERBS_CQ_FLAGS_IGNORE_OVERRUN)
MLX5_SET(cqc, cqc, oi, 1);
rcu_read_unlock();
}
-static bool is_apu_thread_cq(struct mlx5_ib_dev *dev, const void *in)
+static bool is_apu_cq(struct mlx5_ib_dev *dev, const void *in)
{
if (!MLX5_CAP_GEN(dev->mdev, apu) ||
- !MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context),
- apu_thread_cq))
+ !MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context), apu_cq))
return false;
return true;
err = mlx5_core_create_dct(dev, &obj->core_dct, cmd_in,
cmd_in_len, cmd_out, cmd_out_len);
} else if (opcode == MLX5_CMD_OP_CREATE_CQ &&
- !is_apu_thread_cq(dev, cmd_in)) {
+ !is_apu_cq(dev, cmd_in)) {
obj->flags |= DEVX_OBJ_FLAGS_CQ;
obj->core_cq.comp = devx_cq_comp;
err = mlx5_core_create_cq(dev->mdev, &obj->core_cq,
#include "srq.h"
static int
-mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
+mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev,
+ struct mlx5_eswitch_rep *rep,
+ int vport_index)
{
struct mlx5_ib_dev *ibdev;
- int vport_index;
ibdev = mlx5_eswitch_uplink_get_proto_dev(dev->priv.eswitch, REP_IB);
- vport_index = rep->vport_index;
+ if (!ibdev)
+ return -EINVAL;
ibdev->port[vport_index].rep = rep;
rep->rep_data[REP_IB].priv = ibdev;
return 0;
}
+static void mlx5_ib_register_peer_vport_reps(struct mlx5_core_dev *mdev);
+
static int
mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
{
u32 num_ports = mlx5_eswitch_get_total_vports(dev);
const struct mlx5_ib_profile *profile;
+ struct mlx5_core_dev *peer_dev;
struct mlx5_ib_dev *ibdev;
+ u32 peer_num_ports;
int vport_index;
int ret;
+ vport_index = rep->vport_index;
+
+ if (mlx5_lag_is_shared_fdb(dev)) {
+ peer_dev = mlx5_lag_get_peer_mdev(dev);
+ peer_num_ports = mlx5_eswitch_get_total_vports(peer_dev);
+ if (mlx5_lag_is_master(dev)) {
+ /* Only 1 ib port is the representor for both uplinks */
+ num_ports += peer_num_ports - 1;
+ } else {
+ if (rep->vport == MLX5_VPORT_UPLINK)
+ return 0;
+ vport_index += peer_num_ports;
+ dev = peer_dev;
+ }
+ }
+
if (rep->vport == MLX5_VPORT_UPLINK)
profile = &raw_eth_profile;
else
- return mlx5_ib_set_vport_rep(dev, rep);
+ return mlx5_ib_set_vport_rep(dev, rep, vport_index);
ibdev = ib_alloc_device(mlx5_ib_dev, ib_dev);
if (!ibdev)
goto fail_add;
rep->rep_data[REP_IB].priv = ibdev;
+ if (mlx5_lag_is_shared_fdb(dev))
+ mlx5_ib_register_peer_vport_reps(dev);
return 0;
static void
mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep)
{
+ struct mlx5_core_dev *mdev = mlx5_eswitch_get_core_dev(rep->esw);
struct mlx5_ib_dev *dev = mlx5_ib_rep_to_dev(rep);
+ int vport_index = rep->vport_index;
struct mlx5_ib_port *port;
- port = &dev->port[rep->vport_index];
+ if (WARN_ON(!mdev))
+ return;
+
+ if (mlx5_lag_is_shared_fdb(mdev) &&
+ !mlx5_lag_is_master(mdev)) {
+ struct mlx5_core_dev *peer_mdev;
+
+ if (rep->vport == MLX5_VPORT_UPLINK)
+ return;
+ peer_mdev = mlx5_lag_get_peer_mdev(mdev);
+ vport_index += mlx5_eswitch_get_total_vports(peer_mdev);
+ }
+
+ if (!dev)
+ return;
+
+ port = &dev->port[vport_index];
write_lock(&port->roce.netdev_lock);
port->roce.netdev = NULL;
write_unlock(&port->roce.netdev_lock);
rep->rep_data[REP_IB].priv = NULL;
port->rep = NULL;
- if (rep->vport == MLX5_VPORT_UPLINK)
+ if (rep->vport == MLX5_VPORT_UPLINK) {
+ struct mlx5_core_dev *peer_mdev;
+ struct mlx5_eswitch *esw;
+
+ if (mlx5_lag_is_shared_fdb(mdev)) {
+ peer_mdev = mlx5_lag_get_peer_mdev(mdev);
+ esw = peer_mdev->priv.eswitch;
+ mlx5_eswitch_unregister_vport_reps(esw, REP_IB);
+ }
__mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX);
+ }
}
static const struct mlx5_eswitch_rep_ops rep_ops = {
.get_proto_dev = mlx5_ib_rep_to_dev,
};
+static void mlx5_ib_register_peer_vport_reps(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_core_dev *peer_mdev = mlx5_lag_get_peer_mdev(mdev);
+ struct mlx5_eswitch *esw;
+
+ if (!peer_mdev)
+ return;
+
+ esw = peer_mdev->priv.eswitch;
+ mlx5_eswitch_register_vport_reps(esw, &rep_ops, REP_IB);
+}
+
struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
u16 vport_num)
{
rep = dev->port[port - 1].rep;
- return mlx5_eswitch_add_send_to_vport_rule(esw, rep, sq->base.mqp.qpn);
+ return mlx5_eswitch_add_send_to_vport_rule(esw, esw, rep, sq->base.mqp.qpn);
}
static int mlx5r_rep_probe(struct auxiliary_device *adev,
static struct mlx5_roce *mlx5_get_rep_roce(struct mlx5_ib_dev *dev,
struct net_device *ndev,
+ struct net_device *upper,
u32 *port_num)
{
struct net_device *rep_ndev;
if (!port->rep)
continue;
+ if (upper == ndev && port->rep->vport == MLX5_VPORT_UPLINK) {
+ *port_num = i + 1;
+ return &port->roce;
+ }
+
+ if (upper && port->rep->vport == MLX5_VPORT_UPLINK)
+ continue;
+
read_lock(&port->roce.netdev_lock);
rep_ndev = mlx5_ib_get_rep_netdev(port->rep->esw,
port->rep->vport);
}
if (ibdev->is_rep)
- roce = mlx5_get_rep_roce(ibdev, ndev, &port_num);
+ roce = mlx5_get_rep_roce(ibdev, ndev, upper, &port_num);
if (!roce)
return NOTIFY_DONE;
- if ((upper == ndev || (!upper && ndev == roce->netdev))
- && ibdev->ib_active) {
+ if ((upper == ndev ||
+ ((!upper || ibdev->is_rep) && ndev == roce->netdev)) &&
+ ibdev->ib_active) {
struct ib_event ibev = { };
enum ib_port_state port_state;
struct mlx5_flow_table *ft;
int err;
- if (!ns || !mlx5_lag_is_roce(mdev))
+ if (!ns || !mlx5_lag_is_active(mdev))
return 0;
err = mlx5_cmd_create_vport_lag(mdev);
{
int err;
- err = mlx5_nic_vport_enable_roce(dev->mdev);
- if (err)
- return err;
+ if (!dev->is_rep && dev->profile != &raw_eth_profile) {
+ err = mlx5_nic_vport_enable_roce(dev->mdev);
+ if (err)
+ return err;
+ }
err = mlx5_eth_lag_init(dev);
if (err)
return 0;
err_disable_roce:
- mlx5_nic_vport_disable_roce(dev->mdev);
+ if (!dev->is_rep && dev->profile != &raw_eth_profile)
+ mlx5_nic_vport_disable_roce(dev->mdev);
return err;
}
static void mlx5_disable_eth(struct mlx5_ib_dev *dev)
{
mlx5_eth_lag_cleanup(dev);
- mlx5_nic_vport_disable_roce(dev->mdev);
+ if (!dev->is_rep && dev->profile != &raw_eth_profile)
+ mlx5_nic_vport_disable_roce(dev->mdev);
}
static int mlx5_ib_rn_get_params(struct ib_device *device, u32 port_num,
/* Register only for native ports */
err = mlx5_add_netdev_notifier(dev, port_num);
- if (err || dev->is_rep || !mlx5_is_roce_init_enabled(mdev))
- /*
- * We don't enable ETH interface for
- * 1. IB representors
- * 2. User disabled ROCE through devlink interface
- */
+ if (err)
return err;
err = mlx5_enable_eth(dev);
ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
if (ll == IB_LINK_LAYER_ETHERNET) {
- if (!dev->is_rep)
- mlx5_disable_eth(dev);
+ mlx5_disable_eth(dev);
port_num = mlx5_core_native_port_num(dev->mdev) - 1;
mlx5_remove_netdev_notifier(dev, port_num);
{
const char *name;
- if (!mlx5_lag_is_roce(dev->mdev))
+ if (!mlx5_lag_is_active(dev->mdev))
name = "mlx5_%d";
else
name = "mlx5_bond_%d";
static int fill_switchdev_info(struct mlx5_ib_dev *dev, u32 port_num,
struct mlx5_ib_uapi_query_port *info)
{
- struct mlx5_core_dev *mdev = dev->mdev;
struct mlx5_eswitch_rep *rep;
+ struct mlx5_core_dev *mdev;
int err;
rep = dev->port[port_num - 1].rep;
if (!rep)
return -EOPNOTSUPP;
+ mdev = mlx5_eswitch_get_core_dev(rep->esw);
+ if (!mdev)
+ return -EINVAL;
+
info->vport = rep->vport;
info->flags |= MLX5_IB_UAPI_QUERY_PORT_VPORT;
if (err)
return err;
- if (mlx5_eswitch_vport_match_metadata_enabled(mdev->priv.eswitch)) {
+ if (mlx5_eswitch_vport_match_metadata_enabled(rep->esw)) {
info->reg_c0.value = mlx5_eswitch_get_vport_metadata_for_match(
- mdev->priv.eswitch, rep->vport);
+ rep->esw, rep->vport);
info->reg_c0.mask = mlx5_eswitch_get_vport_metadata_mask();
info->flags |= MLX5_IB_UAPI_QUERY_PORT_VPORT_REG_C0;
}
int num_buf;
void *vaddr;
int err;
+ int i;
umem = ib_umem_get(pd->ibpd.device, start, length, access);
if (IS_ERR(umem)) {
- pr_warn("err %d from rxe_umem_get\n",
- (int)PTR_ERR(umem));
+ pr_warn("%s: Unable to pin memory region err = %d\n",
+ __func__, (int)PTR_ERR(umem));
err = PTR_ERR(umem);
- goto err1;
+ goto err_out;
}
mr->umem = umem;
err = rxe_mr_alloc(mr, num_buf);
if (err) {
- pr_warn("err %d from rxe_mr_alloc\n", err);
- ib_umem_release(umem);
- goto err1;
+ pr_warn("%s: Unable to allocate memory for map\n",
+ __func__);
+ goto err_release_umem;
}
mr->page_shift = PAGE_SHIFT;
vaddr = page_address(sg_page_iter_page(&sg_iter));
if (!vaddr) {
- pr_warn("null vaddr\n");
- ib_umem_release(umem);
+ pr_warn("%s: Unable to get virtual address\n",
+ __func__);
err = -ENOMEM;
- goto err1;
+ goto err_cleanup_map;
}
buf->addr = (uintptr_t)vaddr;
return 0;
-err1:
+err_cleanup_map:
+ for (i = 0; i < mr->num_map; i++)
+ kfree(mr->map[i]);
+ kfree(mr->map);
+err_release_umem:
+ ib_umem_release(umem);
+err_out:
return err;
}
struct media_request *req)
{
struct vb2_buffer *vb;
+ enum vb2_buffer_state orig_state;
int ret;
if (q->error) {
* Add to the queued buffers list, a buffer will stay on it until
* dequeued in dqbuf.
*/
+ orig_state = vb->state;
list_add_tail(&vb->queued_entry, &q->queued_list);
q->queued_count++;
q->waiting_for_buffers = false;
if (q->streaming && !q->start_streaming_called &&
q->queued_count >= q->min_buffers_needed) {
ret = vb2_start_streaming(q);
- if (ret)
+ if (ret) {
+ /*
+ * Since vb2_core_qbuf will return with an error,
+ * we should return it to state DEQUEUED since
+ * the error indicates that the buffer wasn't queued.
+ */
+ list_del(&vb->queued_entry);
+ q->queued_count--;
+ vb->state = orig_state;
return ret;
+ }
}
dprintk(q, 2, "qbuf of buffer %d succeeded\n", vb->index);
int ret;
for_each_acpi_dev_match(adev, cfg->hid, NULL, -1) {
- if (!adev->status.enabled) {
- acpi_dev_put(adev);
+ if (!adev->status.enabled)
continue;
- }
if (bridge->n_sensors >= CIO2_NUM_PORTS) {
acpi_dev_put(adev);
}
sensor = &bridge->sensors[bridge->n_sensors];
- sensor->adev = adev;
strscpy(sensor->name, cfg->hid, sizeof(sensor->name));
ret = cio2_bridge_read_acpi_buffer(adev, "SSDB",
goto err_free_swnodes;
}
+ sensor->adev = acpi_dev_get(adev);
adev->fwnode.secondary = fwnode;
dev_info(&cio2->dev, "Found supported sensor %s\n",
com.cmd.hdr.Opcode = CMD_CONFIGURE_FREE_BUFFER;
com.cmd.hdr.Length = 6;
- memcpy(&com.cmd.ConfigureBuffers.config, config, 6);
+ memcpy(&com.cmd.ConfigureFreeBuffers.config, config, 6);
com.in_len = 6;
com.out_len = 0;
struct FW_CONFIGURE_FREE_BUFFERS {
struct FW_HEADER hdr;
- u8 UVI1_BufferLength;
- u8 UVI2_BufferLength;
- u8 TVO_BufferLength;
- u8 AUD1_BufferLength;
- u8 AUD2_BufferLength;
- u8 TVA_BufferLength;
+ struct {
+ u8 UVI1_BufferLength;
+ u8 UVI2_BufferLength;
+ u8 TVO_BufferLength;
+ u8 AUD1_BufferLength;
+ u8 AUD2_BufferLength;
+ u8 TVA_BufferLength;
+ } __packed config;
} __attribute__ ((__packed__));
struct FW_CONFIGURE_UART {
select VIDEOBUF2_DMA_CONTIG
select REGMAP_MMIO
select V4L2_FWNODE
+ select VIDEO_ATMEL_ISC_BASE
help
This module makes the ATMEL Image Sensor Controller available
as a v4l2 device.
select VIDEOBUF2_DMA_CONTIG
select REGMAP_MMIO
select V4L2_FWNODE
+ select VIDEO_ATMEL_ISC_BASE
help
This module makes the ATMEL eXtended Image Sensor Controller
available as a v4l2 device.
+config VIDEO_ATMEL_ISC_BASE
+ tristate
+ default n
+ help
+ ATMEL ISC and XISC common code base.
+
config VIDEO_ATMEL_ISI
tristate "ATMEL Image Sensor Interface (ISI) support"
depends on VIDEO_V4L2 && OF
# SPDX-License-Identifier: GPL-2.0-only
-atmel-isc-objs = atmel-sama5d2-isc.o atmel-isc-base.o
-atmel-xisc-objs = atmel-sama7g5-isc.o atmel-isc-base.o
+atmel-isc-objs = atmel-sama5d2-isc.o
+atmel-xisc-objs = atmel-sama7g5-isc.o
obj-$(CONFIG_VIDEO_ATMEL_ISI) += atmel-isi.o
+obj-$(CONFIG_VIDEO_ATMEL_ISC_BASE) += atmel-isc-base.o
obj-$(CONFIG_VIDEO_ATMEL_ISC) += atmel-isc.o
obj-$(CONFIG_VIDEO_ATMEL_XISC) += atmel-xisc.o
return 0;
}
+EXPORT_SYMBOL_GPL(isc_clk_init);
void isc_clk_cleanup(struct isc_device *isc)
{
clk_unregister(isc_clk->clk);
}
}
+EXPORT_SYMBOL_GPL(isc_clk_cleanup);
static int isc_queue_setup(struct vb2_queue *vq,
unsigned int *nbuffers, unsigned int *nplanes,
return ret;
}
+EXPORT_SYMBOL_GPL(isc_interrupt);
static void isc_hist_count(struct isc_device *isc, u32 *min, u32 *max)
{
.unbind = isc_async_unbind,
.complete = isc_async_complete,
};
+EXPORT_SYMBOL_GPL(isc_async_ops);
void isc_subdev_cleanup(struct isc_device *isc)
{
INIT_LIST_HEAD(&isc->subdev_entities);
}
+EXPORT_SYMBOL_GPL(isc_subdev_cleanup);
int isc_pipeline_init(struct isc_device *isc)
{
return 0;
}
+EXPORT_SYMBOL_GPL(isc_pipeline_init);
/* regmap configuration */
#define ATMEL_ISC_REG_MAX 0xd5c
.val_bits = 32,
.max_register = ATMEL_ISC_REG_MAX,
};
+EXPORT_SYMBOL_GPL(isc_regmap_config);
+MODULE_AUTHOR("Songjun Wu");
+MODULE_AUTHOR("Eugen Hristev");
+MODULE_DESCRIPTION("Atmel ISC common code base");
+MODULE_LICENSE("GPL v2");
} else {
/* read */
requesttype = (USB_TYPE_VENDOR | USB_DIR_IN);
- pipe = usb_rcvctrlpipe(d->udev, 0);
+
+ /*
+ * Zero-length transfers must use usb_sndctrlpipe() and
+ * rtl28xxu_identify_state() uses a zero-length i2c read
+ * command to determine the chip type.
+ */
+ if (req->size)
+ pipe = usb_rcvctrlpipe(d->udev, 0);
+ else
+ pipe = usb_sndctrlpipe(d->udev, 0);
}
ret = usb_control_msg(d->udev, pipe, 0, requesttype, req->value,
static int rtl28xxu_identify_state(struct dvb_usb_device *d, const char **name)
{
struct rtl28xxu_dev *dev = d_to_priv(d);
- u8 buf[1];
int ret;
- struct rtl28xxu_req req_demod_i2c = {0x0020, CMD_I2C_DA_RD, 1, buf};
+ struct rtl28xxu_req req_demod_i2c = {0x0020, CMD_I2C_DA_RD, 0, NULL};
dev_dbg(&d->intf->dev, "\n");
}
/*
- * If the 'label' property is not present for the AT24 EEPROM,
- * then nvmem_config.id is initialised to NVMEM_DEVID_AUTO,
- * and this will append the 'devid' to the name of the NVMEM
- * device. This is purely legacy and the AT24 driver has always
- * defaulted to this. However, if the 'label' property is
- * present then this means that the name is specified by the
- * firmware and this name should be used verbatim and so it is
- * not necessary to append the 'devid'.
+ * We initialize nvmem_config.id to NVMEM_DEVID_AUTO even if the
+ * label property is set as some platform can have multiple eeproms
+ * with same label and we can not register each of those with same
+ * label. Failing to register those eeproms trigger cascade failure
+ * on such platform.
*/
+ nvmem_config.id = NVMEM_DEVID_AUTO;
+
if (device_property_present(dev, "label")) {
- nvmem_config.id = NVMEM_DEVID_NONE;
err = device_property_read_string(dev, "label",
&nvmem_config.name);
if (err)
return err;
} else {
- nvmem_config.id = NVMEM_DEVID_AUTO;
nvmem_config.name = dev_name(dev);
}
config MHI_NET
tristate "MHI network driver"
depends on MHI_BUS
- select WWAN
help
This is the network driver for MHI bus. It can be used with
- QCOM based WWAN modems (like SDX55). Say Y or M.
+ QCOM based WWAN modems for IP or QMAP/rmnet protocol (like SDX55).
+ Say Y or M.
endif # NET_CORE
a VM with direct attached VF by failing over to the paravirtual
datapath when the VF is unplugged.
+config NETDEV_LEGACY_INIT
+ bool
+ depends on ISA
+ help
+ Drivers that call netdev_boot_setup_check() should select this
+ symbol, everything else no longer needs it.
+
endif # NETDEVICES
obj-$(CONFIG_MACVTAP) += macvtap.o
obj-$(CONFIG_MII) += mii.o
obj-$(CONFIG_MDIO) += mdio.o
-obj-$(CONFIG_NET) += Space.o loopback.o
+obj-$(CONFIG_NET) += loopback.o
+obj-$(CONFIG_NETDEV_LEGACY_INIT) += Space.o
obj-$(CONFIG_NETCONSOLE) += netconsole.o
obj-y += phy/
obj-y += mdio/
obj-$(CONFIG_NLMON) += nlmon.o
obj-$(CONFIG_NET_VRF) += vrf.o
obj-$(CONFIG_VSOCKMON) += vsockmon.o
-obj-$(CONFIG_MHI_NET) += mhi/
+obj-$(CONFIG_MHI_NET) += mhi_net.o
#
# Networking Drivers
#include <linux/netlink.h>
#include <net/Space.h>
+/*
+ * This structure holds boot-time configured netdevice settings. They
+ * are then used in the device probing.
+ */
+struct netdev_boot_setup {
+ char name[IFNAMSIZ];
+ struct ifmap map;
+};
+#define NETDEV_BOOT_SETUP_MAX 8
+
+
+/******************************************************************************
+ *
+ * Device Boot-time Settings Routines
+ *
+ ******************************************************************************/
+
+/* Boot time configuration table */
+static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
+
+/**
+ * netdev_boot_setup_add - add new setup entry
+ * @name: name of the device
+ * @map: configured settings for the device
+ *
+ * Adds new setup entry to the dev_boot_setup list. The function
+ * returns 0 on error and 1 on success. This is a generic routine to
+ * all netdevices.
+ */
+static int netdev_boot_setup_add(char *name, struct ifmap *map)
+{
+ struct netdev_boot_setup *s;
+ int i;
+
+ s = dev_boot_setup;
+ for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
+ if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
+ memset(s[i].name, 0, sizeof(s[i].name));
+ strlcpy(s[i].name, name, IFNAMSIZ);
+ memcpy(&s[i].map, map, sizeof(s[i].map));
+ break;
+ }
+ }
+
+ return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
+}
+
+/**
+ * netdev_boot_setup_check - check boot time settings
+ * @dev: the netdevice
+ *
+ * Check boot time settings for the device.
+ * The found settings are set for the device to be used
+ * later in the device probing.
+ * Returns 0 if no settings found, 1 if they are.
+ */
+int netdev_boot_setup_check(struct net_device *dev)
+{
+ struct netdev_boot_setup *s = dev_boot_setup;
+ int i;
+
+ for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
+ if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
+ !strcmp(dev->name, s[i].name)) {
+ dev->irq = s[i].map.irq;
+ dev->base_addr = s[i].map.base_addr;
+ dev->mem_start = s[i].map.mem_start;
+ dev->mem_end = s[i].map.mem_end;
+ return 1;
+ }
+ }
+ return 0;
+}
+EXPORT_SYMBOL(netdev_boot_setup_check);
+
+/**
+ * netdev_boot_base - get address from boot time settings
+ * @prefix: prefix for network device
+ * @unit: id for network device
+ *
+ * Check boot time settings for the base address of device.
+ * The found settings are set for the device to be used
+ * later in the device probing.
+ * Returns 0 if no settings found.
+ */
+static unsigned long netdev_boot_base(const char *prefix, int unit)
+{
+ const struct netdev_boot_setup *s = dev_boot_setup;
+ char name[IFNAMSIZ];
+ int i;
+
+ sprintf(name, "%s%d", prefix, unit);
+
+ /*
+ * If device already registered then return base of 1
+ * to indicate not to probe for this interface
+ */
+ if (__dev_get_by_name(&init_net, name))
+ return 1;
+
+ for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
+ if (!strcmp(name, s[i].name))
+ return s[i].map.base_addr;
+ return 0;
+}
+
+/*
+ * Saves at boot time configured settings for any netdevice.
+ */
+static int __init netdev_boot_setup(char *str)
+{
+ int ints[5];
+ struct ifmap map;
+
+ str = get_options(str, ARRAY_SIZE(ints), ints);
+ if (!str || !*str)
+ return 0;
+
+ /* Save settings */
+ memset(&map, 0, sizeof(map));
+ if (ints[0] > 0)
+ map.irq = ints[1];
+ if (ints[0] > 1)
+ map.base_addr = ints[2];
+ if (ints[0] > 2)
+ map.mem_start = ints[3];
+ if (ints[0] > 3)
+ map.mem_end = ints[4];
+
+ /* Add new entry to the list */
+ return netdev_boot_setup_add(str, &map);
+}
+
+__setup("netdev=", netdev_boot_setup);
+
+static int __init ether_boot_setup(char *str)
+{
+ return netdev_boot_setup(str);
+}
+__setup("ether=", ether_boot_setup);
+
+
/* A unified ethernet device probe. This is the easiest way to have every
* ethernet adaptor have the name "eth[0123...]".
*/
#ifdef CONFIG_SMC9194
{smc_init, 0},
#endif
-#ifdef CONFIG_CS89x0
-#ifndef CONFIG_CS89x0_PLATFORM
+#ifdef CONFIG_CS89x0_ISA
{cs89x0_probe, 0},
#endif
-#endif
-#if defined(CONFIG_MVME16x_NET) || defined(CONFIG_BVME6000_NET) /* Intel */
- {i82596_probe, 0}, /* I82596 */
-#endif
#ifdef CONFIG_NI65
{ni65_probe, 0},
#endif
{NULL, 0},
};
-static struct devprobe2 m68k_probes[] __initdata = {
-#ifdef CONFIG_ATARILANCE /* Lance-based Atari ethernet boards */
- {atarilance_probe, 0},
-#endif
-#ifdef CONFIG_SUN3LANCE /* sun3 onboard Lance chip */
- {sun3lance_probe, 0},
-#endif
-#ifdef CONFIG_SUN3_82586 /* sun3 onboard Intel 82586 chip */
- {sun3_82586_probe, 0},
-#endif
-#ifdef CONFIG_APNE /* A1200 PCMCIA NE2000 */
- {apne_probe, 0},
-#endif
-#ifdef CONFIG_MVME147_NET /* MVME147 internal Ethernet */
- {mvme147lance_probe, 0},
-#endif
- {NULL, 0},
-};
-
/* Unified ethernet device probe, segmented per architecture and
* per bus interface. This drives the legacy devices only for now.
*/
if (base_addr == 1)
return;
- (void)(probe_list2(unit, m68k_probes, base_addr == 0) &&
- probe_list2(unit, isa_probes, base_addr == 0));
+ probe_list2(unit, isa_probes, base_addr == 0);
}
/* Statically configured drivers -- order matters here. */
{
int num;
-#ifdef CONFIG_SBNI
- for (num = 0; num < 8; ++num)
- sbni_probe(num);
-#endif
for (num = 0; num < 8; ++num)
ethif_probe2(num);
cops_probe(1);
cops_probe(2);
#endif
-#ifdef CONFIG_LTPC
- ltpc_probe();
-#endif
return 0;
}
config COPS
tristate "COPS LocalTalk PC support"
- depends on DEV_APPLETALK && (ISA || EISA)
+ depends on DEV_APPLETALK && ISA
+ depends on NETDEVICES
+ select NETDEV_LEGACY_INIT
help
This allows you to use COPS AppleTalk cards to connect to LocalTalk
networks. You also need version 1.3.3 or later of the netatalk
.ndo_set_rx_mode = set_multicast_list,
};
-struct net_device * __init ltpc_probe(void)
+static struct net_device * __init ltpc_probe(void)
{
struct net_device *dev;
int err = -ENOMEM;
}
__setup("ltpc=", ltpc_setup);
-#endif /* MODULE */
+#endif
static struct net_device *dev_ltpc;
-#ifdef MODULE
-
MODULE_LICENSE("GPL");
module_param(debug, int, 0);
module_param_hw(io, int, ioport, 0);
return PTR_ERR_OR_ZERO(dev_ltpc);
}
module_init(ltpc_module_init);
-#endif
static void __exit ltpc_cleanup(void)
{
static void ad_mux_machine(struct port *port, bool *update_slave_arr);
static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port);
static void ad_tx_machine(struct port *port);
-static void ad_periodic_machine(struct port *port);
+static void ad_periodic_machine(struct port *port, struct bond_params bond_params);
static void ad_port_selection_logic(struct port *port, bool *update_slave_arr);
static void ad_agg_selection_logic(struct aggregator *aggregator,
bool *update_slave_arr);
/**
* ad_periodic_machine - handle a port's periodic state machine
* @port: the port we're looking at
+ * @bond_params: bond parameters we will use
*
* Turn ntt flag on priodically to perform periodic transmission of lacpdu's.
*/
-static void ad_periodic_machine(struct port *port)
+static void ad_periodic_machine(struct port *port, struct bond_params bond_params)
{
periodic_states_t last_state;
/* check if port was reinitialized */
if (((port->sm_vars & AD_PORT_BEGIN) || !(port->sm_vars & AD_PORT_LACP_ENABLED) || !port->is_enabled) ||
- (!(port->actor_oper_port_state & LACP_STATE_LACP_ACTIVITY) && !(port->partner_oper.port_state & LACP_STATE_LACP_ACTIVITY))
- ) {
+ (!(port->actor_oper_port_state & LACP_STATE_LACP_ACTIVITY) && !(port->partner_oper.port_state & LACP_STATE_LACP_ACTIVITY)) ||
+ !bond_params.lacp_active) {
port->sm_periodic_state = AD_NO_PERIODIC;
}
/* check if state machine should change state */
}
ad_rx_machine(NULL, port);
- ad_periodic_machine(port);
+ ad_periodic_machine(port, bond->params);
ad_port_selection_logic(port, &update_slave_arr);
ad_mux_machine(port, &update_slave_arr);
ad_tx_machine(port);
#include <linux/if_bonding.h>
#include <linux/if_vlan.h>
#include <linux/in.h>
-#include <net/ipx.h>
#include <net/arp.h>
#include <net/ipv6.h>
#include <asm/byteorder.h>
if (!is_multicast_ether_addr(eth_data->h_dest)) {
switch (skb->protocol) {
case htons(ETH_P_IP):
- case htons(ETH_P_IPX):
- /* In case of IPX, it will falback to L2 hash */
case htons(ETH_P_IPV6):
hash_index = bond_xmit_hash(bond, skb);
if (bond->params.tlb_dynamic_lb) {
hash_size = sizeof(ip6hdr->daddr);
break;
}
- case ETH_P_IPX: {
- const struct ipxhdr *ipxhdr;
-
- if (pskb_network_may_pull(skb, sizeof(*ipxhdr))) {
- do_tx_balance = false;
- break;
- }
- ipxhdr = (struct ipxhdr *)skb_network_header(skb);
-
- if (ipxhdr->ipx_checksum != IPX_NO_CHECKSUM) {
- /* something is wrong with this packet */
- do_tx_balance = false;
- break;
- }
-
- if (ipxhdr->ipx_type != IPX_TYPE_NCP) {
- /* The only protocol worth balancing in
- * this family since it has an "ARP" like
- * mechanism
- */
- do_tx_balance = false;
- break;
- }
-
- eth_data = eth_hdr(skb);
- hash_start = (char *)eth_data->h_dest;
- hash_size = ETH_ALEN;
- break;
- }
case ETH_P_ARP:
do_tx_balance = false;
if (bond_info->rlb_enabled)
}
}
+static bool bond_xdp_check(struct bonding *bond)
+{
+ switch (BOND_MODE(bond)) {
+ case BOND_MODE_ROUNDROBIN:
+ case BOND_MODE_ACTIVEBACKUP:
+ case BOND_MODE_8023AD:
+ case BOND_MODE_XOR:
+ return true;
+ default:
+ return false;
+ }
+}
+
/*---------------------------------- VLAN -----------------------------------*/
/* In the following 2 functions, bond_vlan_rx_add_vid and bond_vlan_rx_kill_vid,
bond_update_slave_arr(bond, NULL);
+ if (!slave_dev->netdev_ops->ndo_bpf ||
+ !slave_dev->netdev_ops->ndo_xdp_xmit) {
+ if (bond->xdp_prog) {
+ NL_SET_ERR_MSG(extack, "Slave does not support XDP");
+ slave_err(bond_dev, slave_dev, "Slave does not support XDP\n");
+ res = -EOPNOTSUPP;
+ goto err_sysfs_del;
+ }
+ } else {
+ struct netdev_bpf xdp = {
+ .command = XDP_SETUP_PROG,
+ .flags = 0,
+ .prog = bond->xdp_prog,
+ .extack = extack,
+ };
+
+ if (dev_xdp_prog_count(slave_dev) > 0) {
+ NL_SET_ERR_MSG(extack,
+ "Slave has XDP program loaded, please unload before enslaving");
+ slave_err(bond_dev, slave_dev,
+ "Slave has XDP program loaded, please unload before enslaving\n");
+ res = -EOPNOTSUPP;
+ goto err_sysfs_del;
+ }
+
+ res = slave_dev->netdev_ops->ndo_bpf(slave_dev, &xdp);
+ if (res < 0) {
+ /* ndo_bpf() sets extack error message */
+ slave_dbg(bond_dev, slave_dev, "Error %d calling ndo_bpf\n", res);
+ goto err_sysfs_del;
+ }
+ if (bond->xdp_prog)
+ bpf_prog_inc(bond->xdp_prog);
+ }
+
slave_info(bond_dev, slave_dev, "Enslaving as %s interface with %s link\n",
bond_is_active_slave(new_slave) ? "an active" : "a backup",
new_slave->link != BOND_LINK_DOWN ? "an up" : "a down");
/* recompute stats just before removing the slave */
bond_get_stats(bond->dev, &bond->bond_stats);
- bond_upper_dev_unlink(bond, slave);
+ if (bond->xdp_prog) {
+ struct netdev_bpf xdp = {
+ .command = XDP_SETUP_PROG,
+ .flags = 0,
+ .prog = NULL,
+ .extack = NULL,
+ };
+ if (slave_dev->netdev_ops->ndo_bpf(slave_dev, &xdp))
+ slave_warn(bond_dev, slave_dev, "failed to unload XDP program\n");
+ }
+
/* unregister rx_handler early so bond_handle_frame wouldn't be called
* for this slave anymore.
*/
if (BOND_MODE(bond) == BOND_MODE_8023AD)
bond_3ad_unbind_slave(slave);
+ bond_upper_dev_unlink(bond, slave);
+
if (bond_mode_can_use_xmit_hash(bond))
bond_update_slave_arr(bond, slave);
/*---------------------------- Hashing Policies -----------------------------*/
+/* Helper to access data in a packet, with or without a backing skb.
+ * If skb is given the data is linearized if necessary via pskb_may_pull.
+ */
+static inline const void *bond_pull_data(struct sk_buff *skb,
+ const void *data, int hlen, int n)
+{
+ if (likely(n <= hlen))
+ return data;
+ else if (skb && likely(pskb_may_pull(skb, n)))
+ return skb->head;
+
+ return NULL;
+}
+
/* L2 hash helper */
-static inline u32 bond_eth_hash(struct sk_buff *skb)
+static inline u32 bond_eth_hash(struct sk_buff *skb, const void *data, int mhoff, int hlen)
{
- struct ethhdr *ep, hdr_tmp;
+ struct ethhdr *ep;
- ep = skb_header_pointer(skb, 0, sizeof(hdr_tmp), &hdr_tmp);
- if (ep)
- return ep->h_dest[5] ^ ep->h_source[5] ^ ep->h_proto;
- return 0;
+ data = bond_pull_data(skb, data, hlen, mhoff + sizeof(struct ethhdr));
+ if (!data)
+ return 0;
+
+ ep = (struct ethhdr *)(data + mhoff);
+ return ep->h_dest[5] ^ ep->h_source[5] ^ be16_to_cpu(ep->h_proto);
}
-static bool bond_flow_ip(struct sk_buff *skb, struct flow_keys *fk,
- int *noff, int *proto, bool l34)
+static bool bond_flow_ip(struct sk_buff *skb, struct flow_keys *fk, const void *data,
+ int hlen, __be16 l2_proto, int *nhoff, int *ip_proto, bool l34)
{
const struct ipv6hdr *iph6;
const struct iphdr *iph;
- if (skb->protocol == htons(ETH_P_IP)) {
- if (unlikely(!pskb_may_pull(skb, *noff + sizeof(*iph))))
+ if (l2_proto == htons(ETH_P_IP)) {
+ data = bond_pull_data(skb, data, hlen, *nhoff + sizeof(*iph));
+ if (!data)
return false;
- iph = (const struct iphdr *)(skb->data + *noff);
+
+ iph = (const struct iphdr *)(data + *nhoff);
iph_to_flow_copy_v4addrs(fk, iph);
- *noff += iph->ihl << 2;
+ *nhoff += iph->ihl << 2;
if (!ip_is_fragment(iph))
- *proto = iph->protocol;
- } else if (skb->protocol == htons(ETH_P_IPV6)) {
- if (unlikely(!pskb_may_pull(skb, *noff + sizeof(*iph6))))
+ *ip_proto = iph->protocol;
+ } else if (l2_proto == htons(ETH_P_IPV6)) {
+ data = bond_pull_data(skb, data, hlen, *nhoff + sizeof(*iph6));
+ if (!data)
return false;
- iph6 = (const struct ipv6hdr *)(skb->data + *noff);
+
+ iph6 = (const struct ipv6hdr *)(data + *nhoff);
iph_to_flow_copy_v6addrs(fk, iph6);
- *noff += sizeof(*iph6);
- *proto = iph6->nexthdr;
+ *nhoff += sizeof(*iph6);
+ *ip_proto = iph6->nexthdr;
} else {
return false;
}
- if (l34 && *proto >= 0)
- fk->ports.ports = skb_flow_get_ports(skb, *noff, *proto);
+ if (l34 && *ip_proto >= 0)
+ fk->ports.ports = __skb_flow_get_ports(skb, *nhoff, *ip_proto, data, hlen);
return true;
}
-static u32 bond_vlan_srcmac_hash(struct sk_buff *skb)
+static u32 bond_vlan_srcmac_hash(struct sk_buff *skb, const void *data, int mhoff, int hlen)
{
- struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb);
+ struct ethhdr *mac_hdr;
u32 srcmac_vendor = 0, srcmac_dev = 0;
u16 vlan;
int i;
+ data = bond_pull_data(skb, data, hlen, mhoff + sizeof(struct ethhdr));
+ if (!data)
+ return 0;
+ mac_hdr = (struct ethhdr *)(data + mhoff);
+
for (i = 0; i < 3; i++)
srcmac_vendor = (srcmac_vendor << 8) | mac_hdr->h_source[i];
}
/* Extract the appropriate headers based on bond's xmit policy */
-static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb,
- struct flow_keys *fk)
+static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb, const void *data,
+ __be16 l2_proto, int nhoff, int hlen, struct flow_keys *fk)
{
bool l34 = bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34;
- int noff, proto = -1;
+ int ip_proto = -1;
switch (bond->params.xmit_policy) {
case BOND_XMIT_POLICY_ENCAP23:
case BOND_XMIT_POLICY_ENCAP34:
memset(fk, 0, sizeof(*fk));
return __skb_flow_dissect(NULL, skb, &flow_keys_bonding,
- fk, NULL, 0, 0, 0, 0);
+ fk, data, l2_proto, nhoff, hlen, 0);
default:
break;
}
fk->ports.ports = 0;
memset(&fk->icmp, 0, sizeof(fk->icmp));
- noff = skb_network_offset(skb);
- if (!bond_flow_ip(skb, fk, &noff, &proto, l34))
+ if (!bond_flow_ip(skb, fk, data, hlen, l2_proto, &nhoff, &ip_proto, l34))
return false;
/* ICMP error packets contains at least 8 bytes of the header
* to correlate ICMP error packets within the same flow which
* generated the error.
*/
- if (proto == IPPROTO_ICMP || proto == IPPROTO_ICMPV6) {
- skb_flow_get_icmp_tci(skb, &fk->icmp, skb->data,
- skb_transport_offset(skb),
- skb_headlen(skb));
- if (proto == IPPROTO_ICMP) {
+ if (ip_proto == IPPROTO_ICMP || ip_proto == IPPROTO_ICMPV6) {
+ skb_flow_get_icmp_tci(skb, &fk->icmp, data, nhoff, hlen);
+ if (ip_proto == IPPROTO_ICMP) {
if (!icmp_is_err(fk->icmp.type))
return true;
- noff += sizeof(struct icmphdr);
- } else if (proto == IPPROTO_ICMPV6) {
+ nhoff += sizeof(struct icmphdr);
+ } else if (ip_proto == IPPROTO_ICMPV6) {
if (!icmpv6_is_err(fk->icmp.type))
return true;
- noff += sizeof(struct icmp6hdr);
+ nhoff += sizeof(struct icmp6hdr);
}
- return bond_flow_ip(skb, fk, &noff, &proto, l34);
+ return bond_flow_ip(skb, fk, data, hlen, l2_proto, &nhoff, &ip_proto, l34);
}
return true;
return hash >> 1;
}
-/**
- * bond_xmit_hash - generate a hash value based on the xmit policy
- * @bond: bonding device
- * @skb: buffer to use for headers
- *
- * This function will extract the necessary headers from the skb buffer and use
- * them to generate a hash based on the xmit_policy set in the bonding device
+/* Generate hash based on xmit policy. If @skb is given it is used to linearize
+ * the data as required, but this function can be used without it if the data is
+ * known to be linear (e.g. with xdp_buff).
*/
-u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb)
+static u32 __bond_xmit_hash(struct bonding *bond, struct sk_buff *skb, const void *data,
+ __be16 l2_proto, int mhoff, int nhoff, int hlen)
{
struct flow_keys flow;
u32 hash;
- if (bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP34 &&
- skb->l4_hash)
- return skb->hash;
-
if (bond->params.xmit_policy == BOND_XMIT_POLICY_VLAN_SRCMAC)
- return bond_vlan_srcmac_hash(skb);
+ return bond_vlan_srcmac_hash(skb, data, mhoff, hlen);
if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER2 ||
- !bond_flow_dissect(bond, skb, &flow))
- return bond_eth_hash(skb);
+ !bond_flow_dissect(bond, skb, data, l2_proto, nhoff, hlen, &flow))
+ return bond_eth_hash(skb, data, mhoff, hlen);
if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER23 ||
bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP23) {
- hash = bond_eth_hash(skb);
+ hash = bond_eth_hash(skb, data, mhoff, hlen);
} else {
if (flow.icmp.id)
memcpy(&hash, &flow.icmp, sizeof(hash));
return bond_ip_hash(hash, &flow);
}
+/**
+ * bond_xmit_hash - generate a hash value based on the xmit policy
+ * @bond: bonding device
+ * @skb: buffer to use for headers
+ *
+ * This function will extract the necessary headers from the skb buffer and use
+ * them to generate a hash based on the xmit_policy set in the bonding device
+ */
+u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb)
+{
+ if (bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP34 &&
+ skb->l4_hash)
+ return skb->hash;
+
+ return __bond_xmit_hash(bond, skb, skb->head, skb->protocol,
+ skb->mac_header, skb->network_header,
+ skb_headlen(skb));
+}
+
+/**
+ * bond_xmit_hash_xdp - generate a hash value based on the xmit policy
+ * @bond: bonding device
+ * @xdp: buffer to use for headers
+ *
+ * The XDP variant of bond_xmit_hash.
+ */
+static u32 bond_xmit_hash_xdp(struct bonding *bond, struct xdp_buff *xdp)
+{
+ struct ethhdr *eth;
+
+ if (xdp->data + sizeof(struct ethhdr) > xdp->data_end)
+ return 0;
+
+ eth = (struct ethhdr *)xdp->data;
+
+ return __bond_xmit_hash(bond, NULL, xdp->data, eth->h_proto, 0,
+ sizeof(struct ethhdr), xdp->data_end - xdp->data);
+}
+
/*-------------------------- Device entry points ----------------------------*/
void bond_work_init_all(struct bonding *bond)
return NULL;
}
+static struct slave *bond_xdp_xmit_roundrobin_slave_get(struct bonding *bond,
+ struct xdp_buff *xdp)
+{
+ struct slave *slave;
+ int slave_cnt;
+ u32 slave_id;
+ const struct ethhdr *eth;
+ void *data = xdp->data;
+
+ if (data + sizeof(struct ethhdr) > xdp->data_end)
+ goto non_igmp;
+
+ eth = (struct ethhdr *)data;
+ data += sizeof(struct ethhdr);
+
+ /* See comment on IGMP in bond_xmit_roundrobin_slave_get() */
+ if (eth->h_proto == htons(ETH_P_IP)) {
+ const struct iphdr *iph;
+
+ if (data + sizeof(struct iphdr) > xdp->data_end)
+ goto non_igmp;
+
+ iph = (struct iphdr *)data;
+
+ if (iph->protocol == IPPROTO_IGMP) {
+ slave = rcu_dereference(bond->curr_active_slave);
+ if (slave)
+ return slave;
+ return bond_get_slave_by_id(bond, 0);
+ }
+ }
+
+non_igmp:
+ slave_cnt = READ_ONCE(bond->slave_cnt);
+ if (likely(slave_cnt)) {
+ slave_id = bond_rr_gen_slave_id(bond) % slave_cnt;
+ return bond_get_slave_by_id(bond, slave_id);
+ }
+ return NULL;
+}
+
static netdev_tx_t bond_xmit_roundrobin(struct sk_buff *skb,
struct net_device *bond_dev)
{
return bond_tx_drop(bond_dev, skb);
}
-static struct slave *bond_xmit_activebackup_slave_get(struct bonding *bond,
- struct sk_buff *skb)
+static struct slave *bond_xmit_activebackup_slave_get(struct bonding *bond)
{
return rcu_dereference(bond->curr_active_slave);
}
struct bonding *bond = netdev_priv(bond_dev);
struct slave *slave;
- slave = bond_xmit_activebackup_slave_get(bond, skb);
+ slave = bond_xmit_activebackup_slave_get(bond);
if (slave)
return bond_dev_queue_xmit(bond, skb, slave->dev);
return slave;
}
+static struct slave *bond_xdp_xmit_3ad_xor_slave_get(struct bonding *bond,
+ struct xdp_buff *xdp)
+{
+ struct bond_up_slave *slaves;
+ unsigned int count;
+ u32 hash;
+
+ hash = bond_xmit_hash_xdp(bond, xdp);
+ slaves = rcu_dereference(bond->usable_slaves);
+ count = slaves ? READ_ONCE(slaves->count) : 0;
+ if (unlikely(!count))
+ return NULL;
+
+ return slaves->arr[hash % count];
+}
+
/* Use this Xmit function for 3AD as well as XOR modes. The current
* usable slave array is formed in the control path. The xmit function
* just calculates hash and sends the packet out.
slave = bond_xmit_roundrobin_slave_get(bond, skb);
break;
case BOND_MODE_ACTIVEBACKUP:
- slave = bond_xmit_activebackup_slave_get(bond, skb);
+ slave = bond_xmit_activebackup_slave_get(bond);
break;
case BOND_MODE_8023AD:
case BOND_MODE_XOR:
return ret;
}
+static struct net_device *
+bond_xdp_get_xmit_slave(struct net_device *bond_dev, struct xdp_buff *xdp)
+{
+ struct bonding *bond = netdev_priv(bond_dev);
+ struct slave *slave;
+
+ /* Caller needs to hold rcu_read_lock() */
+
+ switch (BOND_MODE(bond)) {
+ case BOND_MODE_ROUNDROBIN:
+ slave = bond_xdp_xmit_roundrobin_slave_get(bond, xdp);
+ break;
+
+ case BOND_MODE_ACTIVEBACKUP:
+ slave = bond_xmit_activebackup_slave_get(bond);
+ break;
+
+ case BOND_MODE_8023AD:
+ case BOND_MODE_XOR:
+ slave = bond_xdp_xmit_3ad_xor_slave_get(bond, xdp);
+ break;
+
+ default:
+ /* Should never happen. Mode guarded by bond_xdp_check() */
+ netdev_err(bond_dev, "Unknown bonding mode %d for xdp xmit\n", BOND_MODE(bond));
+ WARN_ON_ONCE(1);
+ return NULL;
+ }
+
+ if (slave)
+ return slave->dev;
+
+ return NULL;
+}
+
+static int bond_xdp_xmit(struct net_device *bond_dev,
+ int n, struct xdp_frame **frames, u32 flags)
+{
+ int nxmit, err = -ENXIO;
+
+ rcu_read_lock();
+
+ for (nxmit = 0; nxmit < n; nxmit++) {
+ struct xdp_frame *frame = frames[nxmit];
+ struct xdp_frame *frames1[] = {frame};
+ struct net_device *slave_dev;
+ struct xdp_buff xdp;
+
+ xdp_convert_frame_to_buff(frame, &xdp);
+
+ slave_dev = bond_xdp_get_xmit_slave(bond_dev, &xdp);
+ if (!slave_dev) {
+ err = -ENXIO;
+ break;
+ }
+
+ err = slave_dev->netdev_ops->ndo_xdp_xmit(slave_dev, 1, frames1, flags);
+ if (err < 1)
+ break;
+ }
+
+ rcu_read_unlock();
+
+ /* If error happened on the first frame then we can pass the error up, otherwise
+ * report the number of frames that were xmitted.
+ */
+ if (err < 0)
+ return (nxmit == 0 ? err : nxmit);
+
+ return nxmit;
+}
+
+static int bond_xdp_set(struct net_device *dev, struct bpf_prog *prog,
+ struct netlink_ext_ack *extack)
+{
+ struct bonding *bond = netdev_priv(dev);
+ struct list_head *iter;
+ struct slave *slave, *rollback_slave;
+ struct bpf_prog *old_prog;
+ struct netdev_bpf xdp = {
+ .command = XDP_SETUP_PROG,
+ .flags = 0,
+ .prog = prog,
+ .extack = extack,
+ };
+ int err;
+
+ ASSERT_RTNL();
+
+ if (!bond_xdp_check(bond))
+ return -EOPNOTSUPP;
+
+ old_prog = bond->xdp_prog;
+ bond->xdp_prog = prog;
+
+ bond_for_each_slave(bond, slave, iter) {
+ struct net_device *slave_dev = slave->dev;
+
+ if (!slave_dev->netdev_ops->ndo_bpf ||
+ !slave_dev->netdev_ops->ndo_xdp_xmit) {
+ NL_SET_ERR_MSG(extack, "Slave device does not support XDP");
+ slave_err(dev, slave_dev, "Slave does not support XDP\n");
+ err = -EOPNOTSUPP;
+ goto err;
+ }
+
+ if (dev_xdp_prog_count(slave_dev) > 0) {
+ NL_SET_ERR_MSG(extack,
+ "Slave has XDP program loaded, please unload before enslaving");
+ slave_err(dev, slave_dev,
+ "Slave has XDP program loaded, please unload before enslaving\n");
+ err = -EOPNOTSUPP;
+ goto err;
+ }
+
+ err = slave_dev->netdev_ops->ndo_bpf(slave_dev, &xdp);
+ if (err < 0) {
+ /* ndo_bpf() sets extack error message */
+ slave_err(dev, slave_dev, "Error %d calling ndo_bpf\n", err);
+ goto err;
+ }
+ if (prog)
+ bpf_prog_inc(prog);
+ }
+
+ if (old_prog)
+ bpf_prog_put(old_prog);
+
+ if (prog)
+ static_branch_inc(&bpf_master_redirect_enabled_key);
+ else
+ static_branch_dec(&bpf_master_redirect_enabled_key);
+
+ return 0;
+
+err:
+ /* unwind the program changes */
+ bond->xdp_prog = old_prog;
+ xdp.prog = old_prog;
+ xdp.extack = NULL; /* do not overwrite original error */
+
+ bond_for_each_slave(bond, rollback_slave, iter) {
+ struct net_device *slave_dev = rollback_slave->dev;
+ int err_unwind;
+
+ if (slave == rollback_slave)
+ break;
+
+ err_unwind = slave_dev->netdev_ops->ndo_bpf(slave_dev, &xdp);
+ if (err_unwind < 0)
+ slave_err(dev, slave_dev,
+ "Error %d when unwinding XDP program change\n", err_unwind);
+ else if (xdp.prog)
+ bpf_prog_inc(xdp.prog);
+ }
+ return err;
+}
+
+static int bond_xdp(struct net_device *dev, struct netdev_bpf *xdp)
+{
+ switch (xdp->command) {
+ case XDP_SETUP_PROG:
+ return bond_xdp_set(dev, xdp->prog, xdp->extack);
+ default:
+ return -EINVAL;
+ }
+}
+
static u32 bond_mode_bcast_speed(struct slave *slave, u32 speed)
{
if (speed == 0 || speed == SPEED_UNKNOWN)
.ndo_features_check = passthru_features_check,
.ndo_get_xmit_slave = bond_xmit_get_slave,
.ndo_sk_get_lower_dev = bond_sk_get_lower_dev,
+ .ndo_bpf = bond_xdp,
+ .ndo_xdp_xmit = bond_xdp_xmit,
+ .ndo_xdp_get_xmit_slave = bond_xdp_get_xmit_slave,
};
static const struct device_type bond_type = {
params->downdelay = downdelay;
params->peer_notif_delay = 0;
params->use_carrier = use_carrier;
+ params->lacp_active = 1;
params->lacp_fast = lacp_fast;
params->primary[0] = 0;
params->primary_reselect = primary_reselect_value;
[IFLA_BOND_MIN_LINKS] = { .type = NLA_U32 },
[IFLA_BOND_LP_INTERVAL] = { .type = NLA_U32 },
[IFLA_BOND_PACKETS_PER_SLAVE] = { .type = NLA_U32 },
+ [IFLA_BOND_AD_LACP_ACTIVE] = { .type = NLA_U8 },
[IFLA_BOND_AD_LACP_RATE] = { .type = NLA_U8 },
[IFLA_BOND_AD_SELECT] = { .type = NLA_U8 },
[IFLA_BOND_AD_INFO] = { .type = NLA_NESTED },
if (err)
return err;
}
+
+ if (data[IFLA_BOND_AD_LACP_ACTIVE]) {
+ int lacp_active = nla_get_u8(data[IFLA_BOND_AD_LACP_ACTIVE]);
+
+ bond_opt_initval(&newval, lacp_active);
+ err = __bond_opt_set(bond, BOND_OPT_LACP_ACTIVE, &newval);
+ if (err)
+ return err;
+ }
+
if (data[IFLA_BOND_AD_LACP_RATE]) {
int lacp_rate =
nla_get_u8(data[IFLA_BOND_AD_LACP_RATE]);
nla_total_size(sizeof(u32)) + /* IFLA_BOND_MIN_LINKS */
nla_total_size(sizeof(u32)) + /* IFLA_BOND_LP_INTERVAL */
nla_total_size(sizeof(u32)) + /* IFLA_BOND_PACKETS_PER_SLAVE */
+ nla_total_size(sizeof(u8)) + /* IFLA_BOND_AD_LACP_ACTIVE */
nla_total_size(sizeof(u8)) + /* IFLA_BOND_AD_LACP_RATE */
nla_total_size(sizeof(u8)) + /* IFLA_BOND_AD_SELECT */
nla_total_size(sizeof(struct nlattr)) + /* IFLA_BOND_AD_INFO */
packets_per_slave))
goto nla_put_failure;
+ if (nla_put_u8(skb, IFLA_BOND_AD_LACP_ACTIVE,
+ bond->params.lacp_active))
+ goto nla_put_failure;
+
if (nla_put_u8(skb, IFLA_BOND_AD_LACP_RATE,
bond->params.lacp_fast))
goto nla_put_failure;
const struct bond_opt_value *newval);
static int bond_option_pps_set(struct bonding *bond,
const struct bond_opt_value *newval);
+static int bond_option_lacp_active_set(struct bonding *bond,
+ const struct bond_opt_value *newval);
static int bond_option_lacp_rate_set(struct bonding *bond,
const struct bond_opt_value *newval);
static int bond_option_ad_select_set(struct bonding *bond,
{ NULL, -1, 0}
};
+static const struct bond_opt_value bond_lacp_active[] = {
+ { "off", 0, 0},
+ { "on", 1, BOND_VALFLAG_DEFAULT},
+ { NULL, -1, 0}
+};
+
static const struct bond_opt_value bond_lacp_rate_tbl[] = {
{ "slow", AD_LACP_SLOW, 0},
{ "fast", AD_LACP_FAST, 0},
.values = bond_intmax_tbl,
.set = bond_option_updelay_set
},
+ [BOND_OPT_LACP_ACTIVE] = {
+ .id = BOND_OPT_LACP_ACTIVE,
+ .name = "lacp_active",
+ .desc = "Send LACPDU frames with configured lacp rate or acts as speak when spoken to",
+ .flags = BOND_OPTFLAG_IFDOWN,
+ .unsuppmodes = BOND_MODE_ALL_EX(BIT(BOND_MODE_8023AD)),
+ .values = bond_lacp_active,
+ .set = bond_option_lacp_active_set
+ },
[BOND_OPT_LACP_RATE] = {
.id = BOND_OPT_LACP_RATE,
.name = "lacp_rate",
return 0;
}
+static int bond_option_lacp_active_set(struct bonding *bond,
+ const struct bond_opt_value *newval)
+{
+ netdev_dbg(bond->dev, "Setting LACP active to %s (%llu)\n",
+ newval->string, newval->value);
+ bond->params.lacp_active = newval->value;
+
+ return 0;
+}
+
static int bond_option_lacp_rate_set(struct bonding *bond,
const struct bond_opt_value *newval)
{
struct ad_info ad_info;
seq_puts(seq, "\n802.3ad info\n");
+ seq_printf(seq, "LACP active: %s\n",
+ (bond->params.lacp_active) ? "on" : "off");
seq_printf(seq, "LACP rate: %s\n",
(bond->params.lacp_fast) ? "fast" : "slow");
seq_printf(seq, "Min links: %d\n", bond->params.min_links);
static DEVICE_ATTR(peer_notif_delay, 0644,
bonding_show_peer_notif_delay, bonding_sysfs_store_option);
-/* Show the LACP interval. */
-static ssize_t bonding_show_lacp(struct device *d,
- struct device_attribute *attr,
- char *buf)
+/* Show the LACP activity and interval. */
+static ssize_t bonding_show_lacp_active(struct device *d,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct bonding *bond = to_bond(d);
+ const struct bond_opt_value *val;
+
+ val = bond_opt_get_val(BOND_OPT_LACP_ACTIVE, bond->params.lacp_active);
+
+ return sprintf(buf, "%s %d\n", val->string, bond->params.lacp_active);
+}
+static DEVICE_ATTR(lacp_active, 0644,
+ bonding_show_lacp_active, bonding_sysfs_store_option);
+
+static ssize_t bonding_show_lacp_rate(struct device *d,
+ struct device_attribute *attr,
+ char *buf)
{
struct bonding *bond = to_bond(d);
const struct bond_opt_value *val;
return sprintf(buf, "%s %d\n", val->string, bond->params.lacp_fast);
}
static DEVICE_ATTR(lacp_rate, 0644,
- bonding_show_lacp, bonding_sysfs_store_option);
+ bonding_show_lacp_rate, bonding_sysfs_store_option);
static ssize_t bonding_show_min_links(struct device *d,
struct device_attribute *attr,
&dev_attr_downdelay.attr,
&dev_attr_updelay.attr,
&dev_attr_peer_notif_delay.attr,
+ &dev_attr_lacp_active.attr,
&dev_attr_lacp_rate.attr,
&dev_attr_ad_select.attr,
&dev_attr_xmit_hash_policy.attr,
static int flexcan_clks_enable(const struct flexcan_priv *priv)
{
- int err;
+ int err = 0;
if (priv->clk_ipg) {
err = clk_prepare_enable(priv->clk_ipg);
return ret;
}
-static u8 hi3110_cmd(struct spi_device *spi, u8 command)
+static int hi3110_cmd(struct spi_device *spi, u8 command)
{
struct hi3110_priv *priv = spi_get_drvdata(spi);
err, priv->regs_status.intf);
mcp251xfd_dump(priv);
mcp251xfd_chip_interrupts_disable(priv);
+ mcp251xfd_timestamp_stop(priv);
return handled;
}
unsigned int free_slots; /* remember number of available slots */
struct ems_cpc_msg active_params; /* active controller parameters */
+ void *rxbuf[MAX_RX_URBS];
+ dma_addr_t rxbuf_dma[MAX_RX_URBS];
};
static void ems_usb_read_interrupt_callback(struct urb *urb)
for (i = 0; i < MAX_RX_URBS; i++) {
struct urb *urb = NULL;
u8 *buf = NULL;
+ dma_addr_t buf_dma;
/* create a URB, and a buffer for it */
urb = usb_alloc_urb(0, GFP_KERNEL);
}
buf = usb_alloc_coherent(dev->udev, RX_BUFFER_SIZE, GFP_KERNEL,
- &urb->transfer_dma);
+ &buf_dma);
if (!buf) {
netdev_err(netdev, "No memory left for USB buffer\n");
usb_free_urb(urb);
break;
}
+ urb->transfer_dma = buf_dma;
+
usb_fill_bulk_urb(urb, dev->udev, usb_rcvbulkpipe(dev->udev, 2),
buf, RX_BUFFER_SIZE,
ems_usb_read_bulk_callback, dev);
break;
}
+ dev->rxbuf[i] = buf;
+ dev->rxbuf_dma[i] = buf_dma;
+
/* Drop reference, USB core will take care of freeing it */
usb_free_urb(urb);
}
usb_kill_anchored_urbs(&dev->rx_submitted);
+ for (i = 0; i < MAX_RX_URBS; ++i)
+ usb_free_coherent(dev->udev, RX_BUFFER_SIZE,
+ dev->rxbuf[i], dev->rxbuf_dma[i]);
+
usb_kill_anchored_urbs(&dev->tx_submitted);
atomic_set(&dev->active_tx_urbs, 0);
int net_count;
u32 version;
int rxinitdone;
+ void *rxbuf[MAX_RX_URBS];
+ dma_addr_t rxbuf_dma[MAX_RX_URBS];
};
struct esd_usb2_net_priv {
for (i = 0; i < MAX_RX_URBS; i++) {
struct urb *urb = NULL;
u8 *buf = NULL;
+ dma_addr_t buf_dma;
/* create a URB, and a buffer for it */
urb = usb_alloc_urb(0, GFP_KERNEL);
}
buf = usb_alloc_coherent(dev->udev, RX_BUFFER_SIZE, GFP_KERNEL,
- &urb->transfer_dma);
+ &buf_dma);
if (!buf) {
dev_warn(dev->udev->dev.parent,
"No memory left for USB buffer\n");
goto freeurb;
}
+ urb->transfer_dma = buf_dma;
+
usb_fill_bulk_urb(urb, dev->udev,
usb_rcvbulkpipe(dev->udev, 1),
buf, RX_BUFFER_SIZE,
usb_unanchor_urb(urb);
usb_free_coherent(dev->udev, RX_BUFFER_SIZE, buf,
urb->transfer_dma);
+ goto freeurb;
}
+ dev->rxbuf[i] = buf;
+ dev->rxbuf_dma[i] = buf_dma;
+
freeurb:
/* Drop reference, USB core will take care of freeing it */
usb_free_urb(urb);
int i, j;
usb_kill_anchored_urbs(&dev->rx_submitted);
+
+ for (i = 0; i < MAX_RX_URBS; ++i)
+ usb_free_coherent(dev->udev, RX_BUFFER_SIZE,
+ dev->rxbuf[i], dev->rxbuf_dma[i]);
+
for (i = 0; i < dev->net_count; i++) {
priv = dev->nets[i];
if (priv) {
break;
}
+ urb->transfer_dma = buf_dma;
+
usb_fill_bulk_urb(urb, priv->udev,
usb_rcvbulkpipe(priv->udev, MCBA_USB_EP_IN),
buf, MCBA_USB_RX_BUFF_SIZE,
#define PCAN_USB_BERR_MASK (PCAN_USB_ERR_RXERR | PCAN_USB_ERR_TXERR)
/* identify bus event packets with rx/tx error counters */
-#define PCAN_USB_ERR_CNT 0x80
+#define PCAN_USB_ERR_CNT_DEC 0x00 /* counters are decreasing */
+#define PCAN_USB_ERR_CNT_INC 0x80 /* counters are increasing */
/* private to PCAN-USB adapter */
struct pcan_usb {
/* acccording to the content of the packet */
switch (ir) {
- case PCAN_USB_ERR_CNT:
+ case PCAN_USB_ERR_CNT_DEC:
+ case PCAN_USB_ERR_CNT_INC:
/* save rx/tx error counters from in the device context */
- pdev->bec.rxerr = mc->ptr[0];
- pdev->bec.txerr = mc->ptr[1];
+ pdev->bec.rxerr = mc->ptr[1];
+ pdev->bec.txerr = mc->ptr[2];
break;
default:
u8 *cmd_msg_buffer;
struct mutex usb_8dev_cmd_lock;
-
+ void *rxbuf[MAX_RX_URBS];
+ dma_addr_t rxbuf_dma[MAX_RX_URBS];
};
/* tx frame */
for (i = 0; i < MAX_RX_URBS; i++) {
struct urb *urb = NULL;
u8 *buf;
+ dma_addr_t buf_dma;
/* create a URB, and a buffer for it */
urb = usb_alloc_urb(0, GFP_KERNEL);
}
buf = usb_alloc_coherent(priv->udev, RX_BUFFER_SIZE, GFP_KERNEL,
- &urb->transfer_dma);
+ &buf_dma);
if (!buf) {
netdev_err(netdev, "No memory left for USB buffer\n");
usb_free_urb(urb);
break;
}
+ urb->transfer_dma = buf_dma;
+
usb_fill_bulk_urb(urb, priv->udev,
usb_rcvbulkpipe(priv->udev,
USB_8DEV_ENDP_DATA_RX),
break;
}
+ priv->rxbuf[i] = buf;
+ priv->rxbuf_dma[i] = buf_dma;
+
/* Drop reference, USB core will take care of freeing it */
usb_free_urb(urb);
}
usb_kill_anchored_urbs(&priv->rx_submitted);
+ for (i = 0; i < MAX_RX_URBS; ++i)
+ usb_free_coherent(priv->udev, RX_BUFFER_SIZE,
+ priv->rxbuf[i], priv->rxbuf_dma[i]);
+
usb_kill_anchored_urbs(&priv->tx_submitted);
atomic_set(&priv->active_tx_urbs, 0);
}
EXPORT_SYMBOL(b53_br_flags);
-int b53_set_mrouter(struct dsa_switch *ds, int port, bool mrouter,
- struct netlink_ext_ack *extack)
-{
- b53_port_set_mcast_flood(ds->priv, port, mrouter);
-
- return 0;
-}
-EXPORT_SYMBOL(b53_set_mrouter);
-
static bool b53_possible_cpu_port(struct dsa_switch *ds, int port)
{
/* Broadcom switches will accept enabling Broadcom tags on the
.port_bridge_leave = b53_br_leave,
.port_pre_bridge_flags = b53_br_flags_pre,
.port_bridge_flags = b53_br_flags,
- .port_set_mrouter = b53_set_mrouter,
.port_stp_state_set = b53_br_set_stp_state,
.port_fast_age = b53_br_fast_age,
.port_vlan_filtering = b53_vlan_filtering,
int b53_br_flags(struct dsa_switch *ds, int port,
struct switchdev_brport_flags flags,
struct netlink_ext_ack *extack);
-int b53_set_mrouter(struct dsa_switch *ds, int port, bool mrouter,
- struct netlink_ext_ack *extack);
int b53_setup_devlink_resources(struct dsa_switch *ds);
void b53_port_event(struct dsa_switch *ds, int port);
void b53_phylink_validate(struct dsa_switch *ds, int port,
.port_pre_bridge_flags = b53_br_flags_pre,
.port_bridge_flags = b53_br_flags,
.port_stp_state_set = b53_br_set_stp_state,
- .port_set_mrouter = b53_set_mrouter,
.port_fast_age = b53_br_fast_age,
.port_vlan_filtering = b53_vlan_filtering,
.port_vlan_add = b53_vlan_add,
int i;
reg[1] |= vid & CVID_MASK;
- if (vid > 1)
- reg[1] |= ATA2_IVL;
+ reg[1] |= ATA2_IVL;
+ reg[1] |= ATA2_FID(FID_BRIDGED);
reg[2] |= (aging & AGE_TIMER_MASK) << AGE_TIMER;
reg[2] |= (port_mask & PORT_MAP_MASK) << PORT_MAP;
/* STATIC_ENT indicate that entry is static wouldn't
mt7530_write(priv, MT7530_PCR_P(port),
PCR_MATRIX(dsa_user_ports(priv->ds)));
+ /* Set to fallback mode for independent VLAN learning */
+ mt7530_rmw(priv, MT7530_PCR_P(port), PCR_PORT_VLAN_MASK,
+ MT7530_PORT_FALLBACK_MODE);
+
return 0;
}
break;
}
- mt7530_rmw(priv, MT7530_SSP_P(port), FID_PST_MASK, stp_state);
+ mt7530_rmw(priv, MT7530_SSP_P(port), FID_PST_MASK(FID_BRIDGED),
+ FID_PST(FID_BRIDGED, stp_state));
}
static int
return 0;
}
-static int
-mt7530_port_set_mrouter(struct dsa_switch *ds, int port, bool mrouter,
- struct netlink_ext_ack *extack)
-{
- struct mt7530_priv *priv = ds->priv;
-
- mt7530_rmw(priv, MT7530_MFC, UNM_FFP(BIT(port)),
- mrouter ? UNM_FFP(BIT(port)) : 0);
-
- return 0;
-}
-
static int
mt7530_port_bridge_join(struct dsa_switch *ds, int port,
struct net_device *bridge)
PCR_MATRIX_MASK, PCR_MATRIX(port_bitmap));
priv->ports[port].pm |= PCR_MATRIX(port_bitmap);
+ /* Set to fallback mode for independent VLAN learning */
+ mt7530_rmw(priv, MT7530_PCR_P(port), PCR_PORT_VLAN_MASK,
+ MT7530_PORT_FALLBACK_MODE);
+
mutex_unlock(&priv->reg_mutex);
return 0;
bool all_user_ports_removed = true;
int i;
- /* When a port is removed from the bridge, the port would be set up
- * back to the default as is at initial boot which is a VLAN-unaware
- * port.
+ /* This is called after .port_bridge_leave when leaving a VLAN-aware
+ * bridge. Don't set standalone ports to fallback mode.
*/
- mt7530_rmw(priv, MT7530_PCR_P(port), PCR_PORT_VLAN_MASK,
- MT7530_PORT_MATRIX_MODE);
- mt7530_rmw(priv, MT7530_PVC_P(port), VLAN_ATTR_MASK | PVC_EG_TAG_MASK,
+ if (dsa_to_port(ds, port)->bridge_dev)
+ mt7530_rmw(priv, MT7530_PCR_P(port), PCR_PORT_VLAN_MASK,
+ MT7530_PORT_FALLBACK_MODE);
+
+ mt7530_rmw(priv, MT7530_PVC_P(port),
+ VLAN_ATTR_MASK | PVC_EG_TAG_MASK | ACC_FRM_MASK,
VLAN_ATTR(MT7530_VLAN_TRANSPARENT) |
- PVC_EG_TAG(MT7530_VLAN_EG_CONSISTENT));
+ PVC_EG_TAG(MT7530_VLAN_EG_CONSISTENT) |
+ MT7530_VLAN_ACC_ALL);
+
+ /* Set PVID to 0 */
+ mt7530_rmw(priv, MT7530_PPBV1_P(port), G0_PORT_VID_MASK,
+ G0_PORT_VID_DEF);
for (i = 0; i < MT7530_NUM_PORTS; i++) {
if (dsa_is_user_port(ds, i) &&
struct mt7530_priv *priv = ds->priv;
/* Trapped into security mode allows packet forwarding through VLAN
- * table lookup. CPU port is set to fallback mode to let untagged
- * frames pass through.
+ * table lookup.
*/
- if (dsa_is_cpu_port(ds, port))
- mt7530_rmw(priv, MT7530_PCR_P(port), PCR_PORT_VLAN_MASK,
- MT7530_PORT_FALLBACK_MODE);
- else
+ if (dsa_is_user_port(ds, port)) {
mt7530_rmw(priv, MT7530_PCR_P(port), PCR_PORT_VLAN_MASK,
MT7530_PORT_SECURITY_MODE);
+ mt7530_rmw(priv, MT7530_PPBV1_P(port), G0_PORT_VID_MASK,
+ G0_PORT_VID(priv->ports[port].pvid));
+
+ /* Only accept tagged frames if PVID is not set */
+ if (!priv->ports[port].pvid)
+ mt7530_rmw(priv, MT7530_PVC_P(port), ACC_FRM_MASK,
+ MT7530_VLAN_ACC_TAGGED);
+ }
/* Set the port as a user port which is to be able to recognize VID
* from incoming packets before fetching entry within the VLAN table.
PCR_MATRIX(BIT(MT7530_CPU_PORT)));
priv->ports[port].pm = PCR_MATRIX(BIT(MT7530_CPU_PORT));
+ /* When a port is removed from the bridge, the port would be set up
+ * back to the default as is at initial boot which is a VLAN-unaware
+ * port.
+ */
+ mt7530_rmw(priv, MT7530_PCR_P(port), PCR_PORT_VLAN_MASK,
+ MT7530_PORT_MATRIX_MODE);
+
mutex_unlock(&priv->reg_mutex);
}
/* Validate the entry with independent learning, create egress tag per
* VLAN and joining the port as one of the port members.
*/
- val = IVL_MAC | VTAG_EN | PORT_MEM(new_members) | VLAN_VALID;
+ val = IVL_MAC | VTAG_EN | PORT_MEM(new_members) | FID(FID_BRIDGED) |
+ VLAN_VALID;
mt7530_write(priv, MT7530_VAWD1, val);
/* Decide whether adding tag or not for those outgoing packets from the
mt7530_hw_vlan_update(priv, vlan->vid, &new_entry, mt7530_hw_vlan_add);
if (pvid) {
- mt7530_rmw(priv, MT7530_PPBV1_P(port), G0_PORT_VID_MASK,
- G0_PORT_VID(vlan->vid));
priv->ports[port].pvid = vlan->vid;
+
+ /* Accept all frames if PVID is set */
+ mt7530_rmw(priv, MT7530_PVC_P(port), ACC_FRM_MASK,
+ MT7530_VLAN_ACC_ALL);
+
+ /* Only configure PVID if VLAN filtering is enabled */
+ if (dsa_port_is_vlan_filtering(dsa_to_port(ds, port)))
+ mt7530_rmw(priv, MT7530_PPBV1_P(port),
+ G0_PORT_VID_MASK,
+ G0_PORT_VID(vlan->vid));
+ } else if (vlan->vid && priv->ports[port].pvid == vlan->vid) {
+ /* This VLAN is overwritten without PVID, so unset it */
+ priv->ports[port].pvid = G0_PORT_VID_DEF;
+
+ /* Only accept tagged frames if the port is VLAN-aware */
+ if (dsa_port_is_vlan_filtering(dsa_to_port(ds, port)))
+ mt7530_rmw(priv, MT7530_PVC_P(port), ACC_FRM_MASK,
+ MT7530_VLAN_ACC_TAGGED);
+
+ mt7530_rmw(priv, MT7530_PPBV1_P(port), G0_PORT_VID_MASK,
+ G0_PORT_VID_DEF);
}
mutex_unlock(&priv->reg_mutex);
{
struct mt7530_hw_vlan_entry target_entry;
struct mt7530_priv *priv = ds->priv;
- u16 pvid;
mutex_lock(&priv->reg_mutex);
- pvid = priv->ports[port].pvid;
mt7530_hw_vlan_entry_init(&target_entry, port, 0);
mt7530_hw_vlan_update(priv, vlan->vid, &target_entry,
mt7530_hw_vlan_del);
/* PVID is being restored to the default whenever the PVID port
* is being removed from the VLAN.
*/
- if (pvid == vlan->vid)
- pvid = G0_PORT_VID_DEF;
+ if (priv->ports[port].pvid == vlan->vid) {
+ priv->ports[port].pvid = G0_PORT_VID_DEF;
+
+ /* Only accept tagged frames if the port is VLAN-aware */
+ if (dsa_port_is_vlan_filtering(dsa_to_port(ds, port)))
+ mt7530_rmw(priv, MT7530_PVC_P(port), ACC_FRM_MASK,
+ MT7530_VLAN_ACC_TAGGED);
+
+ mt7530_rmw(priv, MT7530_PPBV1_P(port), G0_PORT_VID_MASK,
+ G0_PORT_VID_DEF);
+ }
- mt7530_rmw(priv, MT7530_PPBV1_P(port), G0_PORT_VID_MASK, pvid);
- priv->ports[port].pvid = pvid;
mutex_unlock(&priv->reg_mutex);
mtk_get_tag_protocol(struct dsa_switch *ds, int port,
enum dsa_tag_protocol mp)
{
- struct mt7530_priv *priv = ds->priv;
-
- if (port != MT7530_CPU_PORT) {
- dev_warn(priv->dev,
- "port not matched with tagging CPU port\n");
- return DSA_TAG_PROTO_NONE;
- } else {
- return DSA_TAG_PROTO_MTK;
- }
+ return DSA_TAG_PROTO_MTK;
}
#ifdef CONFIG_GPIOLIB
* as two netdev instances.
*/
dn = dsa_to_port(ds, MT7530_CPU_PORT)->master->dev.of_node->parent;
+ ds->assisted_learning_on_cpu_port = true;
ds->mtu_enforcement_ingress = true;
if (priv->id == ID_MT7530) {
mt7530_rmw(priv, MT7530_PCR_P(i), PCR_MATRIX_MASK,
PCR_MATRIX_CLR);
+ /* Disable learning by default on all ports */
+ mt7530_set(priv, MT7530_PSC_P(i), SA_DIS);
+
if (dsa_is_cpu_port(ds, i)) {
ret = mt753x_cpu_port_enable(ds, i);
if (ret)
} else {
mt7530_port_disable(ds, i);
- /* Disable learning by default on all user ports */
- mt7530_set(priv, MT7530_PSC_P(i), SA_DIS);
+ /* Set default PVID to 0 on all user ports */
+ mt7530_rmw(priv, MT7530_PPBV1_P(i), G0_PORT_VID_MASK,
+ G0_PORT_VID_DEF);
}
/* Enable consistent egress tag */
mt7530_rmw(priv, MT7530_PVC_P(i), PVC_EG_TAG_MASK,
mt7530_rmw(priv, MT7530_PCR_P(i), PCR_MATRIX_MASK,
PCR_MATRIX_CLR);
+ /* Disable learning by default on all ports */
+ mt7530_set(priv, MT7530_PSC_P(i), SA_DIS);
+
mt7530_set(priv, MT7531_DBG_CNT(i), MT7531_DIS_CLR);
if (dsa_is_cpu_port(ds, i)) {
} else {
mt7530_port_disable(ds, i);
- /* Disable learning by default on all user ports */
- mt7530_set(priv, MT7530_PSC_P(i), SA_DIS);
+ /* Set default PVID to 0 on all user ports */
+ mt7530_rmw(priv, MT7530_PPBV1_P(i), G0_PORT_VID_MASK,
+ G0_PORT_VID_DEF);
}
/* Enable consistent egress tag */
PVC_EG_TAG(MT7530_VLAN_EG_CONSISTENT));
}
+ ds->assisted_learning_on_cpu_port = true;
ds->mtu_enforcement_ingress = true;
/* Flush the FDB table */
.port_stp_state_set = mt7530_stp_state_set,
.port_pre_bridge_flags = mt7530_port_pre_bridge_flags,
.port_bridge_flags = mt7530_port_bridge_flags,
- .port_set_mrouter = mt7530_port_set_mrouter,
.port_bridge_join = mt7530_port_bridge_join,
.port_bridge_leave = mt7530_port_bridge_leave,
.port_fdb_add = mt7530_port_fdb_add,
#define STATIC_ENT 3
#define MT7530_ATA2 0x78
#define ATA2_IVL BIT(15)
+#define ATA2_FID(x) (((x) & 0x7) << 12)
/* Register for address table write data */
#define MT7530_ATWD 0x7c
#define VTAG_EN BIT(28)
/* VLAN Member Control */
#define PORT_MEM(x) (((x) & 0xff) << 16)
+/* Filter ID */
+#define FID(x) (((x) & 0x7) << 1)
/* VLAN Entry Valid */
#define VLAN_VALID BIT(0)
#define PORT_MEM_SHFT 16
#define PORT_MEM_MASK 0xff
+enum mt7530_fid {
+ FID_STANDALONE = 0,
+ FID_BRIDGED = 1,
+};
+
#define MT7530_VAWD2 0x98
/* Egress Tag Control */
#define ETAG_CTRL_P(p, x) (((x) & 0x3) << ((p) << 1))
/* Register for port STP state control */
#define MT7530_SSP_P(x) (0x2000 + ((x) * 0x100))
-#define FID_PST(x) ((x) & 0x3)
-#define FID_PST_MASK FID_PST(0x3)
+#define FID_PST(fid, state) (((state) & 0x3) << ((fid) * 2))
+#define FID_PST_MASK(fid) FID_PST(fid, 0x3)
enum mt7530_stp_state {
MT7530_STP_DISABLED = 0,
#define PVC_EG_TAG_MASK PVC_EG_TAG(7)
#define VLAN_ATTR(x) (((x) & 0x3) << 6)
#define VLAN_ATTR_MASK VLAN_ATTR(3)
+#define ACC_FRM_MASK GENMASK(1, 0)
enum mt7530_vlan_port_eg_tag {
MT7530_VLAN_EG_DISABLED = 0,
MT7530_VLAN_TRANSPARENT = 3,
};
+enum mt7530_vlan_port_acc_frm {
+ MT7530_VLAN_ACC_ALL = 0,
+ MT7530_VLAN_ACC_TAGGED = 1,
+ MT7530_VLAN_ACC_UNTAGGED = 2,
+};
+
#define STAG_VPID (((x) & 0xffff) << 16)
/* Register for port port-and-protocol based vlan 1 control */
#define MT7530_PPBV1_P(x) (0x2014 + ((x) * 0x100))
#define G0_PORT_VID(x) (((x) & 0xfff) << 0)
#define G0_PORT_VID_MASK G0_PORT_VID(0xfff)
-#define G0_PORT_VID_DEF G0_PORT_VID(1)
+#define G0_PORT_VID_DEF G0_PORT_VID(0)
/* Register for port MAC control register */
#define MT7530_PMCR_P(x) (0x3000 + ((x) * 0x100))
int i, err;
if (!vid)
- return -EOPNOTSUPP;
+ return 0;
err = mv88e6xxx_vtu_get(chip, vid, &vlan);
if (err)
struct netlink_ext_ack *extack)
{
struct mv88e6xxx_chip *chip = ds->priv;
- bool do_fast_age = false;
int err = -EOPNOTSUPP;
mv88e6xxx_reg_lock(chip);
err = mv88e6xxx_port_set_assoc_vector(chip, port, pav);
if (err)
goto out;
-
- if (!learning)
- do_fast_age = true;
}
if (flags.mask & BR_FLOOD) {
out:
mv88e6xxx_reg_unlock(chip);
- if (do_fast_age)
- mv88e6xxx_port_fast_age(ds, port);
-
- return err;
-}
-
-static int mv88e6xxx_port_set_mrouter(struct dsa_switch *ds, int port,
- bool mrouter,
- struct netlink_ext_ack *extack)
-{
- struct mv88e6xxx_chip *chip = ds->priv;
- int err;
-
- if (!chip->info->ops->port_set_mcast_flood)
- return -EOPNOTSUPP;
-
- mv88e6xxx_reg_lock(chip);
- err = chip->info->ops->port_set_mcast_flood(chip, port, mrouter);
- mv88e6xxx_reg_unlock(chip);
-
return err;
}
.port_bridge_leave = mv88e6xxx_port_bridge_leave,
.port_pre_bridge_flags = mv88e6xxx_port_pre_bridge_flags,
.port_bridge_flags = mv88e6xxx_port_bridge_flags,
- .port_set_mrouter = mv88e6xxx_port_set_mrouter,
.port_stp_state_set = mv88e6xxx_port_stp_state_set,
.port_fast_age = mv88e6xxx_port_fast_age,
.port_vlan_filtering = mv88e6xxx_port_vlan_filtering,
return 0;
}
- ret = __ar9331_mdio_write(sbus, AR9331_SW_MDIO_PHY_MODE_REG, reg, val);
+ /* In case of this switch we work with 32bit registers on top of 16bit
+ * bus. Some registers (for example access to forwarding database) have
+ * trigger bit on the first 16bit half of request, the result and
+ * configuration of request in the second half.
+ * To make it work properly, we should do the second part of transfer
+ * before the first one is done.
+ */
+ ret = __ar9331_mdio_write(sbus, AR9331_SW_MDIO_PHY_MODE_REG, reg + 2,
+ val >> 16);
if (ret < 0)
goto error;
- ret = __ar9331_mdio_write(sbus, AR9331_SW_MDIO_PHY_MODE_REG, reg + 2,
- val >> 16);
+ ret = __ar9331_mdio_write(sbus, AR9331_SW_MDIO_PHY_MODE_REG, reg, val);
if (ret < 0)
goto error;
return 0;
+
error:
dev_err_ratelimited(&sbus->dev, "Bus error. Failed to write register.\n");
return ret;
phy_interface_t phy_mode[SJA1105_MAX_NUM_PORTS];
bool fixed_link[SJA1105_MAX_NUM_PORTS];
bool vlan_aware;
- unsigned long learn_ena;
unsigned long ucast_egress_floods;
unsigned long bcast_egress_floods;
const struct sja1105_info *info;
hostcmd = SJA1105_HOSTCMD_INVALIDATE;
}
sja1105_packing(p, &hostcmd, 25, 23, size, op);
+}
+
+static void
+sja1105pqrs_l2_lookup_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd,
+ enum packing_op op)
+{
+ int entry_size = SJA1105PQRS_SIZE_L2_LOOKUP_ENTRY;
+
+ sja1105pqrs_common_l2_lookup_cmd_packing(buf, cmd, op, entry_size);
/* Hack - The hardware takes the 'index' field within
* struct sja1105_l2_lookup_entry as the index on which this command
* such that our API doesn't need to ask for a full-blown entry
* structure when e.g. a delete is requested.
*/
- sja1105_packing(buf, &cmd->index, 15, 6,
- SJA1105PQRS_SIZE_L2_LOOKUP_ENTRY, op);
-}
-
-static void
-sja1105pqrs_l2_lookup_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd,
- enum packing_op op)
-{
- int size = SJA1105PQRS_SIZE_L2_LOOKUP_ENTRY;
-
- return sja1105pqrs_common_l2_lookup_cmd_packing(buf, cmd, op, size);
+ sja1105_packing(buf, &cmd->index, 15, 6, entry_size, op);
}
static void
sja1110_l2_lookup_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd,
enum packing_op op)
{
- int size = SJA1110_SIZE_L2_LOOKUP_ENTRY;
+ int entry_size = SJA1110_SIZE_L2_LOOKUP_ENTRY;
+
+ sja1105pqrs_common_l2_lookup_cmd_packing(buf, cmd, op, entry_size);
- return sja1105pqrs_common_l2_lookup_cmd_packing(buf, cmd, op, size);
+ sja1105_packing(buf, &cmd->index, 10, 1, entry_size, op);
}
/* The switch is so retarded that it makes our command/entry abstraction
struct sja1105_mac_config_entry *mac;
struct dsa_switch *ds = priv->ds;
struct sja1105_table *table;
- int i;
+ struct dsa_port *dp;
table = &priv->static_config.tables[BLK_IDX_MAC_CONFIG];
mac = table->entries;
- for (i = 0; i < ds->num_ports; i++) {
- mac[i] = default_mac;
+ list_for_each_entry(dp, &ds->dst->ports, list) {
+ if (dp->ds != ds)
+ continue;
+
+ mac[dp->index] = default_mac;
/* Let sja1105_bridge_stp_state_set() keep address learning
- * enabled for the CPU port.
+ * enabled for the DSA ports. CPU ports use software-assisted
+ * learning to ensure that only FDB entries belonging to the
+ * bridge are learned, and that they are learned towards all
+ * CPU ports in a cross-chip topology if multiple CPU ports
+ * exist.
*/
- if (dsa_is_cpu_port(ds, i))
- priv->learn_ena |= BIT(i);
+ if (dsa_port_is_dsa(dp))
+ dp->learning = true;
}
return 0;
pvid.vlan_bc |= BIT(port);
pvid.tag_port &= ~BIT(port);
- if (dsa_is_cpu_port(ds, port)) {
+ if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)) {
priv->tag_8021q_pvid[port] = SJA1105_DEFAULT_VLAN;
priv->bridge_pvid[port] = SJA1105_DEFAULT_VLAN;
}
{
struct sja1105_l2_forwarding_entry *l2fwd;
struct dsa_switch *ds = priv->ds;
+ struct dsa_switch_tree *dst;
struct sja1105_table *table;
- int i, j;
+ struct dsa_link *dl;
+ int port, tc;
+ int from, to;
table = &priv->static_config.tables[BLK_IDX_L2_FORWARDING];
l2fwd = table->entries;
- /* First 5 entries define the forwarding rules */
- for (i = 0; i < ds->num_ports; i++) {
- unsigned int upstream = dsa_upstream_port(priv->ds, i);
+ /* First 5 entries in the L2 Forwarding Table define the forwarding
+ * rules and the VLAN PCP to ingress queue mapping.
+ * Set up the ingress queue mapping first.
+ */
+ for (port = 0; port < ds->num_ports; port++) {
+ if (dsa_is_unused_port(ds, port))
+ continue;
+
+ for (tc = 0; tc < SJA1105_NUM_TC; tc++)
+ l2fwd[port].vlan_pmap[tc] = tc;
+ }
- if (dsa_is_unused_port(ds, i))
+ /* Then manage the forwarding domain for user ports. These can forward
+ * only to the always-on domain (CPU port and DSA links)
+ */
+ for (from = 0; from < ds->num_ports; from++) {
+ if (!dsa_is_user_port(ds, from))
continue;
- for (j = 0; j < SJA1105_NUM_TC; j++)
- l2fwd[i].vlan_pmap[j] = j;
+ for (to = 0; to < ds->num_ports; to++) {
+ if (!dsa_is_cpu_port(ds, to) &&
+ !dsa_is_dsa_port(ds, to))
+ continue;
- /* All ports start up with egress flooding enabled,
- * including the CPU port.
- */
- priv->ucast_egress_floods |= BIT(i);
- priv->bcast_egress_floods |= BIT(i);
+ l2fwd[from].bc_domain |= BIT(to);
+ l2fwd[from].fl_domain |= BIT(to);
- if (i == upstream)
+ sja1105_port_allow_traffic(l2fwd, from, to, true);
+ }
+ }
+
+ /* Then manage the forwarding domain for DSA links and CPU ports (the
+ * always-on domain). These can send packets to any enabled port except
+ * themselves.
+ */
+ for (from = 0; from < ds->num_ports; from++) {
+ if (!dsa_is_cpu_port(ds, from) && !dsa_is_dsa_port(ds, from))
continue;
- sja1105_port_allow_traffic(l2fwd, i, upstream, true);
- sja1105_port_allow_traffic(l2fwd, upstream, i, true);
+ for (to = 0; to < ds->num_ports; to++) {
+ if (dsa_is_unused_port(ds, to))
+ continue;
- l2fwd[i].bc_domain = BIT(upstream);
- l2fwd[i].fl_domain = BIT(upstream);
+ if (from == to)
+ continue;
+
+ l2fwd[from].bc_domain |= BIT(to);
+ l2fwd[from].fl_domain |= BIT(to);
+
+ sja1105_port_allow_traffic(l2fwd, from, to, true);
+ }
+ }
+
+ /* In odd topologies ("H" connections where there is a DSA link to
+ * another switch which also has its own CPU port), TX packets can loop
+ * back into the system (they are flooded from CPU port 1 to the DSA
+ * link, and from there to CPU port 2). Prevent this from happening by
+ * cutting RX from DSA links towards our CPU port, if the remote switch
+ * has its own CPU port and therefore doesn't need ours for network
+ * stack termination.
+ */
+ dst = ds->dst;
+
+ list_for_each_entry(dl, &dst->rtable, list) {
+ if (dl->dp->ds != ds || dl->link_dp->cpu_dp == dl->dp->cpu_dp)
+ continue;
+
+ from = dl->dp->index;
+ to = dsa_upstream_port(ds, from);
+
+ dev_warn(ds->dev,
+ "H topology detected, cutting RX from DSA link %d to CPU port %d to prevent TX packet loops\n",
+ from, to);
+
+ sja1105_port_allow_traffic(l2fwd, from, to, false);
- l2fwd[upstream].bc_domain |= BIT(i);
- l2fwd[upstream].fl_domain |= BIT(i);
+ l2fwd[from].bc_domain &= ~BIT(to);
+ l2fwd[from].fl_domain &= ~BIT(to);
+ }
+
+ /* Finally, manage the egress flooding domain. All ports start up with
+ * flooding enabled, including the CPU port and DSA links.
+ */
+ for (port = 0; port < ds->num_ports; port++) {
+ if (dsa_is_unused_port(ds, port))
+ continue;
+
+ priv->ucast_egress_floods |= BIT(port);
+ priv->bcast_egress_floods |= BIT(port);
}
/* Next 8 entries define VLAN PCP mapping from ingress to egress.
* Create a one-to-one mapping.
*/
- for (i = 0; i < SJA1105_NUM_TC; i++) {
- for (j = 0; j < ds->num_ports; j++) {
- if (dsa_is_unused_port(ds, j))
+ for (tc = 0; tc < SJA1105_NUM_TC; tc++) {
+ for (port = 0; port < ds->num_ports; port++) {
+ if (dsa_is_unused_port(ds, port))
continue;
- l2fwd[ds->num_ports + i].vlan_pmap[j] = i;
+ l2fwd[ds->num_ports + tc].vlan_pmap[port] = tc;
}
- l2fwd[ds->num_ports + i].type_egrpcp2outputq = true;
+ l2fwd[ds->num_ports + tc].type_egrpcp2outputq = true;
}
return 0;
general_params->tdmaconfigidx = tdmaconfigidx;
}
+static int sja1105_init_topology(struct sja1105_private *priv,
+ struct sja1105_general_params_entry *general_params)
+{
+ struct dsa_switch *ds = priv->ds;
+ int port;
+
+ /* The host port is the destination for traffic matching mac_fltres1
+ * and mac_fltres0 on all ports except itself. Default to an invalid
+ * value.
+ */
+ general_params->host_port = ds->num_ports;
+
+ /* Link-local traffic received on casc_port will be forwarded
+ * to host_port without embedding the source port and device ID
+ * info in the destination MAC address, and no RX timestamps will be
+ * taken either (presumably because it is a cascaded port and a
+ * downstream SJA switch already did that).
+ * To disable the feature, we need to do different things depending on
+ * switch generation. On SJA1105 we need to set an invalid port, while
+ * on SJA1110 which support multiple cascaded ports, this field is a
+ * bitmask so it must be left zero.
+ */
+ if (!priv->info->multiple_cascade_ports)
+ general_params->casc_port = ds->num_ports;
+
+ for (port = 0; port < ds->num_ports; port++) {
+ bool is_upstream = dsa_is_upstream_port(ds, port);
+ bool is_dsa_link = dsa_is_dsa_port(ds, port);
+
+ /* Upstream ports can be dedicated CPU ports or
+ * upstream-facing DSA links
+ */
+ if (is_upstream) {
+ if (general_params->host_port == ds->num_ports) {
+ general_params->host_port = port;
+ } else {
+ dev_err(ds->dev,
+ "Port %llu is already a host port, configuring %d as one too is not supported\n",
+ general_params->host_port, port);
+ return -EINVAL;
+ }
+ }
+
+ /* Cascade ports are downstream-facing DSA links */
+ if (is_dsa_link && !is_upstream) {
+ if (priv->info->multiple_cascade_ports) {
+ general_params->casc_port |= BIT(port);
+ } else if (general_params->casc_port == ds->num_ports) {
+ general_params->casc_port = port;
+ } else {
+ dev_err(ds->dev,
+ "Port %llu is already a cascade port, configuring %d as one too is not supported\n",
+ general_params->casc_port, port);
+ return -EINVAL;
+ }
+ }
+ }
+
+ if (general_params->host_port == ds->num_ports) {
+ dev_err(ds->dev, "No host port configured\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int sja1105_init_general_params(struct sja1105_private *priv)
{
struct sja1105_general_params_entry default_general_params = {
.mac_flt0 = SJA1105_LINKLOCAL_FILTER_B_MASK,
.incl_srcpt0 = false,
.send_meta0 = false,
- /* The destination for traffic matching mac_fltres1 and
- * mac_fltres0 on all ports except host_port. Such traffic
- * receieved on host_port itself would be dropped, except
- * by installing a temporary 'management route'
- */
- .host_port = priv->ds->num_ports,
/* Default to an invalid value */
.mirr_port = priv->ds->num_ports,
/* No TTEthernet */
.header_type = ETH_P_SJA1110,
};
struct sja1105_general_params_entry *general_params;
- struct dsa_switch *ds = priv->ds;
struct sja1105_table *table;
- int port;
+ int rc;
- for (port = 0; port < ds->num_ports; port++) {
- if (dsa_is_cpu_port(ds, port)) {
- default_general_params.host_port = port;
- break;
- }
- }
+ rc = sja1105_init_topology(priv, &default_general_params);
+ if (rc)
+ return rc;
table = &priv->static_config.tables[BLK_IDX_GENERAL_PARAMS];
sja1110_select_tdmaconfigidx(priv);
- /* Link-local traffic received on casc_port will be forwarded
- * to host_port without embedding the source port and device ID
- * info in the destination MAC address, and no RX timestamps will be
- * taken either (presumably because it is a cascaded port and a
- * downstream SJA switch already did that).
- * To disable the feature, we need to do different things depending on
- * switch generation. On SJA1105 we need to set an invalid port, while
- * on SJA1110 which support multiple cascaded ports, this field is a
- * bitmask so it must be left zero.
- */
- if (!priv->info->multiple_cascade_ports)
- general_params->casc_port = ds->num_ports;
-
return 0;
}
for (port = 0; port < ds->num_ports; port++) {
int mtu = VLAN_ETH_FRAME_LEN + ETH_FCS_LEN;
- if (dsa_is_cpu_port(priv->ds, port))
+ if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))
mtu += VLAN_HLEN;
policing[port].smax = 65535; /* Burst size in bytes */
int sja1105et_fdb_add(struct dsa_switch *ds, int port,
const unsigned char *addr, u16 vid)
{
- struct sja1105_l2_lookup_entry l2_lookup = {0};
+ struct sja1105_l2_lookup_entry l2_lookup = {0}, tmp;
struct sja1105_private *priv = ds->priv;
struct device *dev = ds->dev;
int last_unused = -1;
+ int start, end, i;
int bin, way, rc;
bin = sja1105et_fdb_hash(priv, addr, vid);
* mask? If yes, we need to do nothing. If not, we need
* to rewrite the entry by adding this port to it.
*/
- if (l2_lookup.destports & BIT(port))
+ if ((l2_lookup.destports & BIT(port)) && l2_lookup.lockeds)
return 0;
l2_lookup.destports |= BIT(port);
} else {
index, NULL, false);
}
}
+ l2_lookup.lockeds = true;
l2_lookup.index = sja1105et_fdb_index(bin, way);
rc = sja1105_dynamic_config_write(priv, BLK_IDX_L2_LOOKUP,
if (rc < 0)
return rc;
+ /* Invalidate a dynamically learned entry if that exists */
+ start = sja1105et_fdb_index(bin, 0);
+ end = sja1105et_fdb_index(bin, way);
+
+ for (i = start; i < end; i++) {
+ rc = sja1105_dynamic_config_read(priv, BLK_IDX_L2_LOOKUP,
+ i, &tmp);
+ if (rc == -ENOENT)
+ continue;
+ if (rc)
+ return rc;
+
+ if (tmp.macaddr != ether_addr_to_u64(addr) || tmp.vlanid != vid)
+ continue;
+
+ rc = sja1105_dynamic_config_write(priv, BLK_IDX_L2_LOOKUP,
+ i, NULL, false);
+ if (rc)
+ return rc;
+
+ break;
+ }
+
return sja1105_static_fdb_change(priv, port, &l2_lookup, true);
}
int sja1105pqrs_fdb_add(struct dsa_switch *ds, int port,
const unsigned char *addr, u16 vid)
{
- struct sja1105_l2_lookup_entry l2_lookup = {0};
+ struct sja1105_l2_lookup_entry l2_lookup = {0}, tmp;
struct sja1105_private *priv = ds->priv;
int rc, i;
/* Search for an existing entry in the FDB table */
l2_lookup.macaddr = ether_addr_to_u64(addr);
l2_lookup.vlanid = vid;
- l2_lookup.iotag = SJA1105_S_TAG;
l2_lookup.mask_macaddr = GENMASK_ULL(ETH_ALEN * 8 - 1, 0);
- if (priv->vlan_aware) {
- l2_lookup.mask_vlanid = VLAN_VID_MASK;
- l2_lookup.mask_iotag = BIT(0);
- } else {
- l2_lookup.mask_vlanid = 0;
- l2_lookup.mask_iotag = 0;
- }
+ l2_lookup.mask_vlanid = VLAN_VID_MASK;
l2_lookup.destports = BIT(port);
+ tmp = l2_lookup;
+
rc = sja1105_dynamic_config_read(priv, BLK_IDX_L2_LOOKUP,
- SJA1105_SEARCH, &l2_lookup);
- if (rc == 0) {
- /* Found and this port is already in the entry's
+ SJA1105_SEARCH, &tmp);
+ if (rc == 0 && tmp.index != SJA1105_MAX_L2_LOOKUP_COUNT - 1) {
+ /* Found a static entry and this port is already in the entry's
* port mask => job done
*/
- if (l2_lookup.destports & BIT(port))
+ if ((tmp.destports & BIT(port)) && tmp.lockeds)
return 0;
+
+ l2_lookup = tmp;
+
/* l2_lookup.index is populated by the switch in case it
* found something.
*/
dev_err(ds->dev, "FDB is full, cannot add entry.\n");
return -EINVAL;
}
- l2_lookup.lockeds = true;
l2_lookup.index = i;
skip_finding_an_index:
+ l2_lookup.lockeds = true;
+
rc = sja1105_dynamic_config_write(priv, BLK_IDX_L2_LOOKUP,
l2_lookup.index, &l2_lookup,
true);
if (rc < 0)
return rc;
+ /* The switch learns dynamic entries and looks up the FDB left to
+ * right. It is possible that our addition was concurrent with the
+ * dynamic learning of the same address, so now that the static entry
+ * has been installed, we are certain that address learning for this
+ * particular address has been turned off, so the dynamic entry either
+ * is in the FDB at an index smaller than the static one, or isn't (it
+ * can also be at a larger index, but in that case it is inactive
+ * because the static FDB entry will match first, and the dynamic one
+ * will eventually age out). Search for a dynamically learned address
+ * prior to our static one and invalidate it.
+ */
+ tmp = l2_lookup;
+
+ rc = sja1105_dynamic_config_read(priv, BLK_IDX_L2_LOOKUP,
+ SJA1105_SEARCH, &tmp);
+ if (rc < 0) {
+ dev_err(ds->dev,
+ "port %d failed to read back entry for %pM vid %d: %pe\n",
+ port, addr, vid, ERR_PTR(rc));
+ return rc;
+ }
+
+ if (tmp.index < l2_lookup.index) {
+ rc = sja1105_dynamic_config_write(priv, BLK_IDX_L2_LOOKUP,
+ tmp.index, NULL, false);
+ if (rc < 0)
+ return rc;
+ }
+
return sja1105_static_fdb_change(priv, port, &l2_lookup, true);
}
l2_lookup.macaddr = ether_addr_to_u64(addr);
l2_lookup.vlanid = vid;
- l2_lookup.iotag = SJA1105_S_TAG;
l2_lookup.mask_macaddr = GENMASK_ULL(ETH_ALEN * 8 - 1, 0);
- if (priv->vlan_aware) {
- l2_lookup.mask_vlanid = VLAN_VID_MASK;
- l2_lookup.mask_iotag = BIT(0);
- } else {
- l2_lookup.mask_vlanid = 0;
- l2_lookup.mask_iotag = 0;
- }
+ l2_lookup.mask_vlanid = VLAN_VID_MASK;
l2_lookup.destports = BIT(port);
rc = sja1105_dynamic_config_read(priv, BLK_IDX_L2_LOOKUP,
return 0;
}
+static void sja1105_fast_age(struct dsa_switch *ds, int port)
+{
+ struct sja1105_private *priv = ds->priv;
+ int i;
+
+ for (i = 0; i < SJA1105_MAX_L2_LOOKUP_COUNT; i++) {
+ struct sja1105_l2_lookup_entry l2_lookup = {0};
+ u8 macaddr[ETH_ALEN];
+ int rc;
+
+ rc = sja1105_dynamic_config_read(priv, BLK_IDX_L2_LOOKUP,
+ i, &l2_lookup);
+ /* No fdb entry at i, not an issue */
+ if (rc == -ENOENT)
+ continue;
+ if (rc) {
+ dev_err(ds->dev, "Failed to read FDB: %pe\n",
+ ERR_PTR(rc));
+ return;
+ }
+
+ if (!(l2_lookup.destports & BIT(port)))
+ continue;
+
+ /* Don't delete static FDB entries */
+ if (l2_lookup.lockeds)
+ continue;
+
+ u64_to_ether_addr(l2_lookup.macaddr, macaddr);
+
+ rc = sja1105_fdb_del(ds, port, macaddr, l2_lookup.vlanid);
+ if (rc) {
+ dev_err(ds->dev,
+ "Failed to delete FDB entry %pM vid %lld: %pe\n",
+ macaddr, l2_lookup.vlanid, ERR_PTR(rc));
+ return;
+ }
+ }
+}
+
static int sja1105_mdb_add(struct dsa_switch *ds, int port,
const struct switchdev_obj_port_mdb *mdb)
{
static void sja1105_bridge_stp_state_set(struct dsa_switch *ds, int port,
u8 state)
{
+ struct dsa_port *dp = dsa_to_port(ds, port);
struct sja1105_private *priv = ds->priv;
struct sja1105_mac_config_entry *mac;
case BR_STATE_LEARNING:
mac[port].ingress = true;
mac[port].egress = false;
- mac[port].dyn_learn = !!(priv->learn_ena & BIT(port));
+ mac[port].dyn_learn = dp->learning;
break;
case BR_STATE_FORWARDING:
mac[port].ingress = true;
mac[port].egress = true;
- mac[port].dyn_learn = !!(priv->learn_ena & BIT(port));
+ mac[port].dyn_learn = dp->learning;
break;
default:
dev_err(ds->dev, "invalid STP state: %d\n", state);
return -EBUSY;
}
- /* Always install bridge VLANs as egress-tagged on the CPU port. */
- if (dsa_is_cpu_port(ds, port))
+ /* Always install bridge VLANs as egress-tagged on CPU and DSA ports */
+ if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))
flags = 0;
rc = sja1105_vlan_add(priv, port, vlan->vid, flags);
ds->num_tx_queues = SJA1105_NUM_TC;
ds->mtu_enforcement_ingress = true;
+ ds->assisted_learning_on_cpu_port = true;
rc = sja1105_devlink_setup(ds);
if (rc < 0)
new_mtu += VLAN_ETH_HLEN + ETH_FCS_LEN;
- if (dsa_is_cpu_port(ds, port))
+ if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))
new_mtu += VLAN_HLEN;
policing = priv->static_config.tables[BLK_IDX_L2_POLICING].entries;
bool enabled)
{
struct sja1105_mac_config_entry *mac;
- int rc;
mac = priv->static_config.tables[BLK_IDX_MAC_CONFIG].entries;
mac[port].dyn_learn = enabled;
- rc = sja1105_dynamic_config_write(priv, BLK_IDX_MAC_CONFIG, port,
- &mac[port], true);
- if (rc)
- return rc;
-
- if (enabled)
- priv->learn_ena |= BIT(port);
- else
- priv->learn_ena &= ~BIT(port);
-
- return 0;
+ return sja1105_dynamic_config_write(priv, BLK_IDX_MAC_CONFIG, port,
+ &mac[port], true);
}
static int sja1105_port_ucast_bcast_flood(struct sja1105_private *priv, int to,
.port_fdb_dump = sja1105_fdb_dump,
.port_fdb_add = sja1105_fdb_add,
.port_fdb_del = sja1105_fdb_del,
+ .port_fast_age = sja1105_fast_age,
.port_bridge_join = sja1105_bridge_join,
.port_bridge_leave = sja1105_bridge_leave,
.port_pre_bridge_flags = sja1105_port_pre_bridge_flags,
return -ENOMEM;
SET_NETDEV_DEV(dev, pdev);
- netdev_boot_setup_check(dev);
if (!request_region(ioaddr, EL3_IO_EXTENT, "3c509-isa")) {
free_netdev(dev);
return -ENOMEM;
}
SET_NETDEV_DEV(dev, &pdev->dev);
- netdev_boot_setup_check(dev);
el3_dev_fill(dev, phys_addr, ioaddr, irq, if_port, EL3_PNP);
pnp_set_drvdata(pdev, dev);
{
struct el3_private *lp = netdev_priv(dev);
int err;
- const char *if_names[] = {"10baseT", "AUI", "undefined", "BNC"};
+ static const char * const if_names[] = {
+ "10baseT", "AUI", "undefined", "BNC"
+ };
spin_lock_init(&lp->lock);
}
SET_NETDEV_DEV(dev, device);
- netdev_boot_setup_check(dev);
el3_dev_fill(dev, phys_addr, ioaddr, irq, if_port, EL3_EISA);
eisa_set_drvdata (edev, dev);
/* we will need locking (and refcounting) if we ever use it for more */
static LIST_HEAD(root_corkscrew_dev);
-int init_module(void)
+static int corkscrew_init_module(void)
{
int found = 0;
if (debug >= 0)
found++;
return found ? 0 : -ENODEV;
}
+module_init(corkscrew_init_module);
#else
struct net_device *tc515_probe(int unit)
config 3C515
tristate "3c515 ISA \"Fast EtherLink\""
depends on ISA && ISA_DMA_API && !PPC32
+ select NETDEV_LEGACY_INIT
help
If you have a 3Com ISA EtherLink XL "Corkscrew" 3c515 Fast Ethernet
network card, say Y here.
config NE2000
tristate "NE2000/NE1000 support"
depends on (ISA || (Q40 && m) || MACH_TX49XX || ATARI_ETHERNEC)
+ select NETDEV_LEGACY_INIT if ISA
select CRC32
help
If you have a network (Ethernet) card of this type, say Y here.
config ULTRA
tristate "SMC Ultra support"
depends on ISA
+ select NETDEV_LEGACY_INIT
select CRC32
help
If you have a network (Ethernet) card of this type, say Y here.
config WD80x3
tristate "WD80*3 support"
depends on ISA
+ select NETDEV_LEGACY_INIT
select CRC32
help
If you have a network (Ethernet) card of this type, say Y here.
#define NESM_STOP_PG 0x80 /* Last page +1 of RX ring */
-struct net_device * __init apne_probe(int unit);
static int apne_probe1(struct net_device *dev, int ioaddr);
static void apne_reset_8390(struct net_device *dev);
module_param_named(msg_enable, apne_msg_enable, uint, 0444);
MODULE_PARM_DESC(msg_enable, "Debug message level (see linux/netdevice.h for bitmap)");
-struct net_device * __init apne_probe(int unit)
+static struct net_device * __init apne_probe(void)
{
struct net_device *dev;
struct ei_device *ei_local;
dev = alloc_ei_netdev();
if (!dev)
return ERR_PTR(-ENOMEM);
- if (unit >= 0) {
- sprintf(dev->name, "eth%d", unit);
- netdev_boot_setup_check(dev);
- }
ei_local = netdev_priv(dev);
ei_local->msg_enable = apne_msg_enable;
return IRQ_HANDLED;
}
-#ifdef MODULE
static struct net_device *apne_dev;
static int __init apne_module_init(void)
{
- apne_dev = apne_probe(-1);
+ apne_dev = apne_probe();
return PTR_ERR_OR_ZERO(apne_dev);
}
}
module_init(apne_module_init);
module_exit(apne_module_exit);
-#endif
static int init_pcmcia(void)
{
return (struct ax_device *)(ei_local + 1);
}
+void ax_NS8390_reinit(struct net_device *dev)
+{
+ ax_NS8390_init(dev, 1);
+}
+
+EXPORT_SYMBOL_GPL(ax_NS8390_reinit);
+
/*
* ax_initial_check
*
}
#ifdef MODULE
-int __init init_module(void)
+static int __init ne_init(void)
{
int retval;
ne_add_devices();
ne_loop_rm_unreg(0);
return retval;
}
+module_init(ne_init);
#else /* MODULE */
static int __init ne_init(void)
{
}
module_init(ne_init);
+#ifdef CONFIG_NETDEV_LEGACY_INIT
struct net_device * __init ne_probe(int unit)
{
int this_dev;
return ERR_PTR(-ENODEV);
}
+#endif
#endif /* MODULE */
static void __exit ne_exit(void)
/* We know skbuffs are padded to at least word alignment. */
insw(ioaddr + IOPD, buf, (count+1)>>1);
}
-
static void ultra_pio_output(struct net_device *dev, int count,
const unsigned char *buf, const int start_page)
{
/* This is set up so that only a single autoprobe takes place per call.
ISA device autoprobes on a running machine are not recommended. */
-int __init
-init_module(void)
+static int __init ultra_init_module(void)
{
struct net_device *dev;
int this_dev, found = 0;
return 0;
return -ENXIO;
}
+module_init(ultra_init_module);
static void cleanup_card(struct net_device *dev)
{
iounmap(ei_status.mem);
}
-void __exit
-cleanup_module(void)
+static void __exit ultra_cleanup_module(void)
{
int this_dev;
}
}
}
+module_exit(ultra_cleanup_module);
#endif /* MODULE */
/* This is set up so that only a single autoprobe takes place per call.
ISA device autoprobes on a running machine are not recommended. */
-int __init init_module(void)
+static int __init wd_init_module(void)
{
struct net_device *dev;
int this_dev, found = 0;
return 0;
return -ENXIO;
}
+module_init(wd_init_module);
static void cleanup_card(struct net_device *dev)
{
iounmap(ei_status.mem);
}
-void __exit
-cleanup_module(void)
+static void __exit wd_cleanup_module(void)
{
int this_dev;
}
}
}
+module_exit(wd_cleanup_module);
#endif /* MODULE */
#define XS100_8390_DATA_WRITE32_BASE 0x0C80
#define XS100_8390_DATA_AREA_SIZE 0x80
-#define __NS8390_init ax_NS8390_init
-
/* force unsigned long back to 'void __iomem *' */
#define ax_convert_addr(_a) ((void __force __iomem *)(_a))
/* Ensure we have our RCR base value */
#define AX88796_PLATFORM
-static unsigned char version[] =
- "ax88796.c: Copyright 2005,2007 Simtec Electronics\n";
-
-#include "lib8390.c"
+#include "8390.h"
/* from ne.c */
#define NE_CMD EI_SHIFT(0x00)
if (jiffies - dma_start > 2 * HZ / 100) { /* 20ms */
netdev_warn(dev, "timeout waiting for Tx RDC.\n");
ei_local->reset_8390(dev);
- ax_NS8390_init(dev, 1);
+ ax_NS8390_reinit(dev);
break;
}
}
config LANCE
tristate "AMD LANCE and PCnet (AT1500 and NE2100) support"
depends on ISA && ISA_DMA_API && !ARM && !PPC32
+ select NETDEV_LEGACY_INIT
help
If you have a network (Ethernet) card of this type, say Y here.
Some LinkSys cards are of this type.
config NI65
tristate "NI6510 support"
depends on ISA && ISA_DMA_API && !ARM && !PPC32
+ select NETDEV_LEGACY_INIT
help
If you have a network (Ethernet) card of this type, say Y here.
}
-struct net_device * __init atarilance_probe(int unit)
+struct net_device * __init atarilance_probe(void)
{
int i;
static int found;
dev = alloc_etherdev(sizeof(struct lance_private));
if (!dev)
return ERR_PTR(-ENOMEM);
- if (unit >= 0) {
- sprintf(dev->name, "eth%d", unit);
- netdev_boot_setup_check(dev);
- }
for( i = 0; i < N_LANCE_ADDR; ++i ) {
if (lance_probe1( dev, &lance_addr_list[i] )) {
return 0;
}
-
-#ifdef MODULE
static struct net_device *atarilance_dev;
static int __init atarilance_module_init(void)
{
- atarilance_dev = atarilance_probe(-1);
+ atarilance_dev = atarilance_probe();
return PTR_ERR_OR_ZERO(atarilance_dev);
}
}
module_init(atarilance_module_init);
module_exit(atarilance_module_exit);
-#endif /* MODULE */
MODULE_PARM_DESC(irq, "LANCE/PCnet IRQ number (ignored for some devices)");
MODULE_PARM_DESC(lance_debug, "LANCE/PCnet debug level (0-7)");
-int __init init_module(void)
+static int __init lance_init_module(void)
{
struct net_device *dev;
int this_dev, found = 0;
return 0;
return -ENXIO;
}
+module_init(lance_init_module);
static void cleanup_card(struct net_device *dev)
{
kfree(lp);
}
-void __exit cleanup_module(void)
+static void __exit lance_cleanup_module(void)
{
int this_dev;
}
}
}
+module_exit(lance_cleanup_module);
#endif /* MODULE */
MODULE_LICENSE("GPL");
};
/* Initialise the one and only on-board 7990 */
-struct net_device * __init mvme147lance_probe(int unit)
+static struct net_device * __init mvme147lance_probe(void)
{
struct net_device *dev;
static int called;
if (!dev)
return ERR_PTR(-ENOMEM);
- if (unit >= 0)
- sprintf(dev->name, "eth%d", unit);
-
/* Fill the dev fields */
dev->base_addr = (unsigned long)MVME147_LANCE_BASE;
dev->netdev_ops = &lance_netdev_ops;
return 0;
}
-#ifdef MODULE
MODULE_LICENSE("GPL");
static struct net_device *dev_mvme147_lance;
-int __init init_module(void)
+static int __init m147lance_init(void)
{
- dev_mvme147_lance = mvme147lance_probe(-1);
+ dev_mvme147_lance = mvme147lance_probe();
return PTR_ERR_OR_ZERO(dev_mvme147_lance);
}
+module_init(m147lance_init);
-void __exit cleanup_module(void)
+static void __exit m147lance_exit(void)
{
struct m147lance_private *lp = netdev_priv(dev_mvme147_lance);
unregister_netdev(dev_mvme147_lance);
free_pages(lp->ram, 3);
free_netdev(dev_mvme147_lance);
}
-
-#endif /* MODULE */
+module_exit(m147lance_exit);
MODULE_PARM_DESC(io, "ni6510 I/O base address");
MODULE_PARM_DESC(dma, "ni6510 ISA DMA channel (ignored for some cards)");
-int __init init_module(void)
+static int __init ni65_init_module(void)
{
dev_ni65 = ni65_probe(-1);
return PTR_ERR_OR_ZERO(dev_ni65);
}
+module_init(ni65_init_module);
-void __exit cleanup_module(void)
+static void __exit ni65_cleanup_module(void)
{
unregister_netdev(dev_ni65);
cleanup_card(dev_ni65);
free_netdev(dev_ni65);
}
+module_exit(ni65_cleanup_module);
#endif /* MODULE */
MODULE_LICENSE("GPL");
/************************* End of Prototypes **************************/
-struct net_device * __init sun3lance_probe(int unit)
+static struct net_device * __init sun3lance_probe(void)
{
struct net_device *dev;
static int found;
dev = alloc_etherdev(sizeof(struct lance_private));
if (!dev)
return ERR_PTR(-ENOMEM);
- if (unit >= 0) {
- sprintf(dev->name, "eth%d", unit);
- netdev_boot_setup_check(dev);
- }
if (!lance_probe(dev))
goto out;
}
-#ifdef MODULE
-
static struct net_device *sun3lance_dev;
-int __init init_module(void)
+static int __init sun3lance_init(void)
{
- sun3lance_dev = sun3lance_probe(-1);
+ sun3lance_dev = sun3lance_probe();
return PTR_ERR_OR_ZERO(sun3lance_dev);
}
+module_init(sun3lance_init);
-void __exit cleanup_module(void)
+static void __exit sun3lance_cleanup(void)
{
unregister_netdev(sun3lance_dev);
#ifdef CONFIG_SUN3
#endif
free_netdev(sun3lance_dev);
}
-
-#endif /* MODULE */
-
+module_exit(sun3lance_cleanup);
}
/* Allocated memory for FW statistics */
- if (bnx2x_alloc_fw_stats_mem(bp))
+ rc = bnx2x_alloc_fw_stats_mem(bp);
+ if (rc)
LOAD_ERROR_EXIT(bp, load_error0);
/* request pf to initialize status blocks */
return 0;
}
+static void bnxt_free_cp_arrays(struct bnxt_cp_ring_info *cpr)
+{
+ kfree(cpr->cp_desc_ring);
+ cpr->cp_desc_ring = NULL;
+ kfree(cpr->cp_desc_mapping);
+ cpr->cp_desc_mapping = NULL;
+}
+
+static int bnxt_alloc_cp_arrays(struct bnxt_cp_ring_info *cpr, int n)
+{
+ cpr->cp_desc_ring = kcalloc(n, sizeof(*cpr->cp_desc_ring), GFP_KERNEL);
+ if (!cpr->cp_desc_ring)
+ return -ENOMEM;
+ cpr->cp_desc_mapping = kcalloc(n, sizeof(*cpr->cp_desc_mapping),
+ GFP_KERNEL);
+ if (!cpr->cp_desc_mapping)
+ return -ENOMEM;
+ return 0;
+}
+
+static void bnxt_free_all_cp_arrays(struct bnxt *bp)
+{
+ int i;
+
+ if (!bp->bnapi)
+ return;
+ for (i = 0; i < bp->cp_nr_rings; i++) {
+ struct bnxt_napi *bnapi = bp->bnapi[i];
+
+ if (!bnapi)
+ continue;
+ bnxt_free_cp_arrays(&bnapi->cp_ring);
+ }
+}
+
+static int bnxt_alloc_all_cp_arrays(struct bnxt *bp)
+{
+ int i, n = bp->cp_nr_pages;
+
+ for (i = 0; i < bp->cp_nr_rings; i++) {
+ struct bnxt_napi *bnapi = bp->bnapi[i];
+ int rc;
+
+ if (!bnapi)
+ continue;
+ rc = bnxt_alloc_cp_arrays(&bnapi->cp_ring, n);
+ if (rc)
+ return rc;
+ }
+ return 0;
+}
+
static void bnxt_free_cp_rings(struct bnxt *bp)
{
int i;
if (cpr2) {
ring = &cpr2->cp_ring_struct;
bnxt_free_ring(bp, &ring->ring_mem);
+ bnxt_free_cp_arrays(cpr2);
kfree(cpr2);
cpr->cp_ring_arr[j] = NULL;
}
if (!cpr)
return NULL;
+ rc = bnxt_alloc_cp_arrays(cpr, bp->cp_nr_pages);
+ if (rc) {
+ bnxt_free_cp_arrays(cpr);
+ kfree(cpr);
+ return NULL;
+ }
ring = &cpr->cp_ring_struct;
rmem = &ring->ring_mem;
rmem->nr_pages = bp->cp_nr_pages;
rc = bnxt_alloc_ring(bp, rmem);
if (rc) {
bnxt_free_ring(bp, rmem);
+ bnxt_free_cp_arrays(cpr);
kfree(cpr);
cpr = NULL;
}
if (jumbo_factor > agg_factor)
agg_factor = jumbo_factor;
}
- agg_ring_size = ring_size * agg_factor;
+ if (agg_factor) {
+ if (ring_size > BNXT_MAX_RX_DESC_CNT_JUM_ENA) {
+ ring_size = BNXT_MAX_RX_DESC_CNT_JUM_ENA;
+ netdev_warn(bp->dev, "RX ring size reduced from %d to %d because the jumbo ring is now enabled\n",
+ bp->rx_ring_size, ring_size);
+ bp->rx_ring_size = ring_size;
+ }
+ agg_ring_size = ring_size * agg_factor;
- if (agg_ring_size) {
bp->rx_agg_nr_pages = bnxt_calc_nr_ring_pages(agg_ring_size,
RX_DESC_CNT);
if (bp->rx_agg_nr_pages > MAX_RX_AGG_PAGES) {
bnxt_free_tx_rings(bp);
bnxt_free_rx_rings(bp);
bnxt_free_cp_rings(bp);
+ bnxt_free_all_cp_arrays(bp);
bnxt_free_ntp_fltrs(bp, irq_re_init);
if (irq_re_init) {
bnxt_free_ring_stats(bp);
goto alloc_mem_err;
}
+ rc = bnxt_alloc_all_cp_arrays(bp);
+ if (rc)
+ goto alloc_mem_err;
+
bnxt_init_ring_struct(bp);
rc = bnxt_alloc_rx_rings(bp);
/* Make sure fw_reset_state is 0 before clearing the flag */
smp_mb__before_atomic();
clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state);
- bnxt_ulp_start(bp, rc);
- if (!rc)
- bnxt_reenable_sriov(bp);
+ bnxt_ulp_start(bp, 0);
+ bnxt_reenable_sriov(bp);
bnxt_vf_reps_alloc(bp);
bnxt_vf_reps_open(bp);
bnxt_ptp_reapply_pps(bp);
#define MAX_TPA_SEGS_P5 0x3f
#if (BNXT_PAGE_SHIFT == 16)
-#define MAX_RX_PAGES 1
+#define MAX_RX_PAGES_AGG_ENA 1
+#define MAX_RX_PAGES 4
#define MAX_RX_AGG_PAGES 4
#define MAX_TX_PAGES 1
-#define MAX_CP_PAGES 8
+#define MAX_CP_PAGES 16
#else
-#define MAX_RX_PAGES 8
+#define MAX_RX_PAGES_AGG_ENA 8
+#define MAX_RX_PAGES 32
#define MAX_RX_AGG_PAGES 32
#define MAX_TX_PAGES 8
-#define MAX_CP_PAGES 64
+#define MAX_CP_PAGES 128
#endif
#define RX_DESC_CNT (BNXT_PAGE_SIZE / sizeof(struct rx_bd))
#define HW_CMPD_RING_SIZE (sizeof(struct tx_cmp) * CP_DESC_CNT)
#define BNXT_MAX_RX_DESC_CNT (RX_DESC_CNT * MAX_RX_PAGES - 1)
+#define BNXT_MAX_RX_DESC_CNT_JUM_ENA (RX_DESC_CNT * MAX_RX_PAGES_AGG_ENA - 1)
#define BNXT_MAX_RX_JUM_DESC_CNT (RX_DESC_CNT * MAX_RX_AGG_PAGES - 1)
#define BNXT_MAX_TX_DESC_CNT (TX_DESC_CNT * MAX_TX_PAGES - 1)
struct dim dim;
union {
- struct tx_cmp *cp_desc_ring[MAX_CP_PAGES];
- struct nqe_cn *nq_desc_ring[MAX_CP_PAGES];
+ struct tx_cmp **cp_desc_ring;
+ struct nqe_cn **nq_desc_ring;
};
- dma_addr_t cp_desc_mapping[MAX_CP_PAGES];
+ dma_addr_t *cp_desc_mapping;
struct bnxt_stats_mem stats;
u32 hw_stats_ctx_id;
int bnxt_dl_register(struct bnxt *bp)
{
+ const struct devlink_ops *devlink_ops;
struct devlink_port_attrs attrs = {};
struct devlink *dl;
int rc;
if (BNXT_PF(bp))
- dl = devlink_alloc(&bnxt_dl_ops, sizeof(struct bnxt_dl));
+ devlink_ops = &bnxt_dl_ops;
else
- dl = devlink_alloc(&bnxt_vf_dl_ops, sizeof(struct bnxt_dl));
+ devlink_ops = &bnxt_vf_dl_ops;
+
+ dl = devlink_alloc(devlink_ops, sizeof(struct bnxt_dl), &bp->pdev->dev);
if (!dl) {
netdev_warn(bp->dev, "devlink_alloc failed\n");
return -ENOMEM;
bp->hwrm_spec_code > 0x10803)
bp->eswitch_mode = DEVLINK_ESWITCH_MODE_LEGACY;
- rc = devlink_register(dl, &bp->pdev->dev);
+ rc = devlink_register(dl);
if (rc) {
netdev_warn(bp->dev, "devlink_register failed. rc=%d\n", rc);
goto err_dl_free;
{
struct bnxt *bp = netdev_priv(dev);
- ering->rx_max_pending = BNXT_MAX_RX_DESC_CNT;
- ering->rx_jumbo_max_pending = BNXT_MAX_RX_JUM_DESC_CNT;
+ if (bp->flags & BNXT_FLAG_AGG_RINGS) {
+ ering->rx_max_pending = BNXT_MAX_RX_DESC_CNT_JUM_ENA;
+ ering->rx_jumbo_max_pending = BNXT_MAX_RX_JUM_DESC_CNT;
+ } else {
+ ering->rx_max_pending = BNXT_MAX_RX_DESC_CNT;
+ ering->rx_jumbo_max_pending = 0;
+ }
ering->tx_max_pending = BNXT_MAX_TX_DESC_CNT;
ering->rx_pending = bp->rx_ring_size;
bnxt_ptp_get_current_time(bp);
ptp->next_period = now + HZ;
+ if (time_after_eq(now, ptp->next_overflow_check)) {
+ spin_lock_bh(&ptp->ptp_lock);
+ timecounter_read(&ptp->tc);
+ spin_unlock_bh(&ptp->ptp_lock);
+ ptp->next_overflow_check = now + BNXT_PHC_OVERFLOW_PERIOD;
+ }
return HZ;
}
ptp->cc.shift = 0;
ptp->cc.mult = 1;
+ ptp->next_overflow_check = jiffies + BNXT_PHC_OVERFLOW_PERIOD;
timecounter_init(&ptp->tc, &ptp->cc, ktime_to_ns(ktime_get_real()));
ptp->ptp_info = bnxt_ptp_caps;
u64 current_time;
u64 old_time;
unsigned long next_period;
+ unsigned long next_overflow_check;
+ /* 48-bit PHC overflows in 78 hours. Check overflow every 19 hours. */
+ #define BNXT_PHC_OVERFLOW_PERIOD (19 * 3600 * HZ)
+
u16 tx_seqid;
struct bnxt *bp;
atomic_t tx_avail;
*/
dev->needed_headroom += 64;
- netdev_boot_setup_check(dev);
-
priv->dev = dev;
priv->pdev = pdev;
while (frags--) {
skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1];
- pci_unmap_page((lio->oct_dev)->pci_dev,
+ dma_unmap_page(&lio->oct_dev->pci_dev->dev,
g->sg[(i >> 2)].ptr[(i & 3)],
skb_frag_size(frag), DMA_TO_DEVICE);
i++;
while (frags--) {
skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1];
- pci_unmap_page((lio->oct_dev)->pci_dev,
+ dma_unmap_page(&lio->oct_dev->pci_dev->dev,
g->sg[(i >> 2)].ptr[(i & 3)],
skb_frag_size(frag), DMA_TO_DEVICE);
i++;
}
devlink = devlink_alloc(&liquidio_devlink_ops,
- sizeof(struct lio_devlink_priv));
+ sizeof(struct lio_devlink_priv),
+ &octeon_dev->pci_dev->dev);
if (!devlink) {
dev_err(&octeon_dev->pci_dev->dev, "devlink alloc failed\n");
goto setup_nic_dev_free;
lio_devlink = devlink_priv(devlink);
lio_devlink->oct = octeon_dev;
- if (devlink_register(devlink, &octeon_dev->pci_dev->dev)) {
+ if (devlink_register(devlink)) {
devlink_free(devlink);
dev_err(&octeon_dev->pci_dev->dev,
"devlink registration failed\n");
while (frags--) {
skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1];
- pci_unmap_page((lio->oct_dev)->pci_dev,
+ dma_unmap_page(&lio->oct_dev->pci_dev->dev,
g->sg[(i >> 2)].ptr[(i & 3)],
skb_frag_size(frag), DMA_TO_DEVICE);
i++;
while (frags--) {
skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1];
- pci_unmap_page((lio->oct_dev)->pci_dev,
+ dma_unmap_page(&lio->oct_dev->pci_dev->dev,
g->sg[(i >> 2)].ptr[(i & 3)],
skb_frag_size(frag), DMA_TO_DEVICE);
i++;
goto err_disable_device;
}
- err = pci_set_dma_mask(pdev, DMA_BIT_MASK(48));
+ err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(48));
if (err) {
dev_err(dev, "Unable to get usable DMA configuration\n");
goto err_release_regions;
}
- err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(48));
- if (err) {
- dev_err(dev, "Unable to get 48-bit DMA for consistent allocations\n");
- goto err_release_regions;
- }
-
/* MAP PF's configuration registers */
nic->reg_base = pcim_iomap(pdev, PCI_CFG_REG_BAR_NUM, 0);
if (!nic->reg_base) {
goto err_disable_device;
}
- err = pci_set_dma_mask(pdev, DMA_BIT_MASK(48));
+ err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(48));
if (err) {
dev_err(dev, "Unable to get usable DMA configuration\n");
goto err_release_regions;
}
- err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(48));
- if (err) {
- dev_err(dev, "unable to get 48-bit DMA for consistent allocations\n");
- goto err_release_regions;
- }
-
qcount = netif_get_num_default_rss_queues();
/* Restrict multiqset support only for host bound VFs */
} else if (iconf & USE_ENC_IDX_F) {
if (f->fs.val.encap_vld) {
struct port_info *pi = netdev_priv(f->dev);
- u8 match_all_mac[] = { 0, 0, 0, 0, 0, 0 };
+ static const u8 match_all_mac[] = { 0, 0, 0, 0, 0, 0 };
/* allocate MPS TCAM entry */
ret = t4_alloc_encap_mac_filt(adapter, pi->viid,
} else if (iconf & USE_ENC_IDX_F) {
if (f->fs.val.encap_vld) {
struct port_info *pi = netdev_priv(f->dev);
- u8 match_all_mac[] = { 0, 0, 0, 0, 0, 0 };
+ static const u8 match_all_mac[] = { 0, 0, 0, 0, 0, 0 };
/* allocate MPS TCAM entry */
ret = t4_alloc_encap_mac_filt(adapter, pi->viid,
config NET_VENDOR_CIRRUS
bool "Cirrus devices"
default y
- depends on ISA || EISA || ARM || MAC
+ depends on ISA || EISA || ARM || MAC || COMPILE_TEST
help
If you have a network (Ethernet) card belonging to this class, say Y.
if NET_VENDOR_CIRRUS
config CS89x0
- tristate "CS89x0 support"
- depends on ISA || EISA || ARM
+ tristate
+
+config CS89x0_ISA
+ tristate "CS89x0 ISA driver support"
+ depends on HAS_IOPORT_MAP
+ depends on ISA
depends on !PPC32
+ depends on CS89x0_PLATFORM=n
+ select NETDEV_LEGACY_INIT
+ select CS89x0
help
Support for CS89x0 chipset based Ethernet cards. If you have a
network (Ethernet) card of this type, say Y and read the file
will be called cs89x0.
config CS89x0_PLATFORM
- bool "CS89x0 platform driver support" if HAS_IOPORT_MAP
- default !HAS_IOPORT_MAP
- depends on CS89x0
+ tristate "CS89x0 platform driver support"
+ depends on ARM || COMPILE_TEST
+ select CS89x0
help
- Say Y to compile the cs89x0 driver as a platform driver. This
- makes this driver suitable for use on certain evaluation boards
- such as the iMX21ADS.
+ Say Y to compile the cs89x0 platform driver. This makes this driver
+ suitable for use on certain evaluation boards such as the iMX21ADS.
- If you are unsure, say N.
+ To compile this driver as a module, choose M here. The module
+ will be called cs89x0.
config EP93XX_ETH
tristate "EP93xx Ethernet support"
* them to system IRQ numbers. This mapping is card specific and is set to
* the configuration of the Cirrus Eval board for this chip.
*/
-#ifndef CONFIG_CS89x0_PLATFORM
+#if IS_ENABLED(CONFIG_CS89x0_ISA)
static unsigned int netcard_portlist[] __used __initdata = {
0x300, 0x320, 0x340, 0x360, 0x200, 0x220, 0x240,
0x260, 0x280, 0x2a0, 0x2c0, 0x2e0, 0
int i;
if (chip_type == CS8900) {
-#ifndef CONFIG_CS89x0_PLATFORM
+#if IS_ENABLED(CONFIG_CS89x0_ISA)
/* Search the mapping table for the corresponding IRQ pin. */
for (i = 0; i != ARRAY_SIZE(cs8900_irq_map); i++)
if (cs8900_irq_map[i] == irq)
goto bad_out;
}
} else {
-#if !defined(CONFIG_CS89x0_PLATFORM)
+#if IS_ENABLED(CONFIG_CS89x0_ISA)
if (((1 << dev->irq) & lp->irq_map) == 0) {
pr_err("%s: IRQ %d is not in our map of allowable IRQs, which is %x\n",
dev->name, dev->irq, lp->irq_map);
dev->irq = i;
} else {
i = lp->isa_config & INT_NO_MASK;
-#ifndef CONFIG_CS89x0_PLATFORM
+#if IS_ENABLED(CONFIG_CS89x0_ISA)
if (lp->chip_type == CS8900) {
/* Translate the IRQ using the IRQ mapping table. */
if (i >= ARRAY_SIZE(cs8900_irq_map))
return retval;
}
-#ifndef CONFIG_CS89x0_PLATFORM
+#if IS_ENABLED(CONFIG_CS89x0_ISA)
/*
* This function converts the I/O port address used by the cs89x0_probe() and
* init_module() functions to the I/O memory address used by the
pr_warn("no cs8900 or cs8920 detected. Be sure to disable PnP with SETUP\n");
return ERR_PTR(err);
}
-#endif
-#endif
-
-#if defined(MODULE) && !defined(CONFIG_CS89x0_PLATFORM)
-
+#else
static struct net_device *dev_cs89x0;
/* Support the 'debug' module parm even if we're compiled for non-debug to
* (hw or software util)
*/
-int __init init_module(void)
+static int __init cs89x0_isa_init_module(void)
{
- struct net_device *dev = alloc_etherdev(sizeof(struct net_local));
+ struct net_device *dev;
struct net_local *lp;
int ret = 0;
#else
debug = 0;
#endif
+ dev = alloc_etherdev(sizeof(struct net_local));
if (!dev)
return -ENOMEM;
free_netdev(dev);
return ret;
}
+module_init(cs89x0_isa_init_module);
-void __exit
-cleanup_module(void)
+static void __exit cs89x0_isa_cleanup_module(void)
{
struct net_local *lp = netdev_priv(dev_cs89x0);
release_region(dev_cs89x0->base_addr, NETCARD_IO_EXTENT);
free_netdev(dev_cs89x0);
}
-#endif /* MODULE && !CONFIG_CS89x0_PLATFORM */
+module_exit(cs89x0_isa_cleanup_module);
+#endif /* MODULE */
+#endif /* CONFIG_CS89x0_ISA */
-#ifdef CONFIG_CS89x0_PLATFORM
+#if IS_ENABLED(CONFIG_CS89x0_PLATFORM)
static int __init cs89x0_platform_probe(struct platform_device *pdev)
{
struct net_device *dev = alloc_etherdev(sizeof(struct net_local));
iowrite32(0x33, ioaddr + CSR12);
new_csr6 = 0x01860000;
/* Trigger autonegotiation. */
- iowrite32(startup ? 0x0201F868 : 0x0001F868, ioaddr + 0xB8);
+ iowrite32(0x0001F868, ioaddr + 0xB8);
} else {
iowrite32(0x32, ioaddr + CSR12);
new_csr6 = 0x00420000;
int i, option = find_cnt < MAX_UNITS ? options[find_cnt] : 0;
void __iomem *ioaddr;
- i = pci_enable_device(pdev);
+ i = pcim_enable_device(pdev);
if (i) return i;
pci_set_master(pdev);
ioaddr = pci_iomap(pdev, TULIP_BAR, netdev_res_size);
if (!ioaddr)
- goto err_out_free_res;
+ goto err_out_netdev;
for (i = 0; i < 3; i++)
((__le16 *)dev->dev_addr)[i] = cpu_to_le16(eeprom_read(ioaddr, i));
err_out_cleardev:
pci_iounmap(pdev, ioaddr);
-err_out_free_res:
- pci_release_regions(pdev);
err_out_netdev:
free_netdev (dev);
return -ENODEV;
if (dev) {
struct netdev_private *np = netdev_priv(dev);
unregister_netdev(dev);
- pci_release_regions(pdev);
pci_iounmap(pdev, np->base_addr);
free_netdev(dev);
}
fsl-dpaa2-eth-${CONFIG_FSL_DPAA2_ETH_DCB} += dpaa2-eth-dcb.o
fsl-dpaa2-eth-${CONFIG_DEBUG_FS} += dpaa2-eth-debugfs.o
fsl-dpaa2-ptp-objs := dpaa2-ptp.o dprtc.o
-fsl-dpaa2-switch-objs := dpaa2-switch.o dpaa2-switch-ethtool.o dpsw.o dpaa2-switch-flower.o
+fsl-dpaa2-switch-objs := dpaa2-switch.o dpaa2-switch-ethtool.o dpsw.o dpaa2-switch-flower.o dpaa2-mac.o dpmac.o
# Needed by the tracing framework
CFLAGS_dpaa2-eth.o := -I$(src)
struct dpaa2_eth_trap_item *dpaa2_eth_dl_get_trap(struct dpaa2_eth_priv *priv,
struct dpaa2_fapr *fapr)
{
- struct dpaa2_faf_error_bit {
+ static const struct dpaa2_faf_error_bit {
int position;
enum devlink_trap_generic_id trap_id;
} faf_bits[] = {
struct dpaa2_eth_devlink_priv *dl_priv;
int err;
- priv->devlink = devlink_alloc(&dpaa2_eth_devlink_ops, sizeof(*dl_priv));
+ priv->devlink =
+ devlink_alloc(&dpaa2_eth_devlink_ops, sizeof(*dl_priv), dev);
if (!priv->devlink) {
dev_err(dev, "devlink_alloc failed\n");
return -ENOMEM;
dl_priv = devlink_priv(priv->devlink);
dl_priv->dpaa2_priv = priv;
- err = devlink_register(priv->devlink, dev);
+ err = devlink_register(priv->devlink);
if (err) {
dev_err(dev, "devlink_register() = %d\n", err);
goto devlink_free;
int err;
dpni_dev = to_fsl_mc_device(priv->net_dev->dev.parent);
- dpmac_dev = fsl_mc_get_endpoint(dpni_dev);
+ dpmac_dev = fsl_mc_get_endpoint(dpni_dev, 0);
if (PTR_ERR(dpmac_dev) == -EPROBE_DEFER)
return PTR_ERR(dpmac_dev);
enum dpsw_counter id;
char name[ETH_GSTRING_LEN];
} dpaa2_switch_ethtool_counters[] = {
- {DPSW_CNT_ING_FRAME, "rx frames"},
- {DPSW_CNT_ING_BYTE, "rx bytes"},
- {DPSW_CNT_ING_FLTR_FRAME, "rx filtered frames"},
- {DPSW_CNT_ING_FRAME_DISCARD, "rx discarded frames"},
- {DPSW_CNT_ING_BCAST_FRAME, "rx b-cast frames"},
- {DPSW_CNT_ING_BCAST_BYTES, "rx b-cast bytes"},
- {DPSW_CNT_ING_MCAST_FRAME, "rx m-cast frames"},
- {DPSW_CNT_ING_MCAST_BYTE, "rx m-cast bytes"},
- {DPSW_CNT_EGR_FRAME, "tx frames"},
- {DPSW_CNT_EGR_BYTE, "tx bytes"},
- {DPSW_CNT_EGR_FRAME_DISCARD, "tx discarded frames"},
- {DPSW_CNT_ING_NO_BUFF_DISCARD, "rx discarded no buffer frames"},
+ {DPSW_CNT_ING_FRAME, "[hw] rx frames"},
+ {DPSW_CNT_ING_BYTE, "[hw] rx bytes"},
+ {DPSW_CNT_ING_FLTR_FRAME, "[hw] rx filtered frames"},
+ {DPSW_CNT_ING_FRAME_DISCARD, "[hw] rx discarded frames"},
+ {DPSW_CNT_ING_BCAST_FRAME, "[hw] rx bcast frames"},
+ {DPSW_CNT_ING_BCAST_BYTES, "[hw] rx bcast bytes"},
+ {DPSW_CNT_ING_MCAST_FRAME, "[hw] rx mcast frames"},
+ {DPSW_CNT_ING_MCAST_BYTE, "[hw] rx mcast bytes"},
+ {DPSW_CNT_EGR_FRAME, "[hw] tx frames"},
+ {DPSW_CNT_EGR_BYTE, "[hw] tx bytes"},
+ {DPSW_CNT_EGR_FRAME_DISCARD, "[hw] tx discarded frames"},
+ {DPSW_CNT_ING_NO_BUFF_DISCARD, "[hw] rx nobuffer discards"},
};
#define DPAA2_SWITCH_NUM_COUNTERS ARRAY_SIZE(dpaa2_switch_ethtool_counters)
struct dpsw_link_state state = {0};
int err = 0;
+ if (dpaa2_switch_port_is_type_phy(port_priv))
+ return phylink_ethtool_ksettings_get(port_priv->mac->phylink,
+ link_ksettings);
+
err = dpsw_if_get_link_state(port_priv->ethsw_data->mc_io, 0,
port_priv->ethsw_data->dpsw_handle,
port_priv->idx,
bool if_running;
int err = 0, ret;
+ if (dpaa2_switch_port_is_type_phy(port_priv))
+ return phylink_ethtool_ksettings_set(port_priv->mac->phylink,
+ link_ksettings);
+
/* Interface needs to be down to change link settings */
if_running = netif_running(netdev);
if (if_running) {
return err;
}
-static int dpaa2_switch_ethtool_get_sset_count(struct net_device *dev, int sset)
+static int
+dpaa2_switch_ethtool_get_sset_count(struct net_device *netdev, int sset)
{
+ struct ethsw_port_priv *port_priv = netdev_priv(netdev);
+ int num_ss_stats = DPAA2_SWITCH_NUM_COUNTERS;
+
switch (sset) {
case ETH_SS_STATS:
- return DPAA2_SWITCH_NUM_COUNTERS;
+ if (port_priv->mac)
+ num_ss_stats += dpaa2_mac_get_sset_count();
+ return num_ss_stats;
default:
return -EOPNOTSUPP;
}
static void dpaa2_switch_ethtool_get_strings(struct net_device *netdev,
u32 stringset, u8 *data)
{
+ struct ethsw_port_priv *port_priv = netdev_priv(netdev);
+ u8 *p = data;
int i;
switch (stringset) {
case ETH_SS_STATS:
- for (i = 0; i < DPAA2_SWITCH_NUM_COUNTERS; i++)
- memcpy(data + i * ETH_GSTRING_LEN,
- dpaa2_switch_ethtool_counters[i].name,
+ for (i = 0; i < DPAA2_SWITCH_NUM_COUNTERS; i++) {
+ memcpy(p, dpaa2_switch_ethtool_counters[i].name,
ETH_GSTRING_LEN);
+ p += ETH_GSTRING_LEN;
+ }
+ if (port_priv->mac)
+ dpaa2_mac_get_strings(p);
break;
}
}
netdev_err(netdev, "dpsw_if_get_counter[%s] err %d\n",
dpaa2_switch_ethtool_counters[i].name, err);
}
+
+ if (port_priv->mac)
+ dpaa2_mac_get_ethtool_stats(port_priv->mac, data + i);
}
const struct ethtool_ops dpaa2_switch_port_ethtool_ops = {
return 0;
}
-static int dpaa2_switch_port_carrier_state_sync(struct net_device *netdev)
+static int dpaa2_switch_port_link_state_update(struct net_device *netdev)
{
struct ethsw_port_priv *port_priv = netdev_priv(netdev);
struct dpsw_link_state state;
int err;
+ /* When we manage the MAC/PHY using phylink there is no need
+ * to manually update the netif_carrier.
+ */
+ if (dpaa2_switch_port_is_type_phy(port_priv))
+ return 0;
+
/* Interrupts are received even though no one issued an 'ifconfig up'
* on the switch interface. Ignore these link state update interrupts
*/
struct ethsw_core *ethsw = port_priv->ethsw_data;
int err;
- /* Explicitly set carrier off, otherwise
- * netif_carrier_ok() will return true and cause 'ip link show'
- * to report the LOWER_UP flag, even though the link
- * notification wasn't even received.
- */
- netif_carrier_off(netdev);
+ if (!dpaa2_switch_port_is_type_phy(port_priv)) {
+ /* Explicitly set carrier off, otherwise
+ * netif_carrier_ok() will return true and cause 'ip link show'
+ * to report the LOWER_UP flag, even though the link
+ * notification wasn't even received.
+ */
+ netif_carrier_off(netdev);
+ }
err = dpsw_if_enable(port_priv->ethsw_data->mc_io, 0,
port_priv->ethsw_data->dpsw_handle,
return err;
}
- /* sync carrier state */
- err = dpaa2_switch_port_carrier_state_sync(netdev);
- if (err) {
- netdev_err(netdev,
- "dpaa2_switch_port_carrier_state_sync err %d\n", err);
- goto err_carrier_sync;
- }
-
dpaa2_switch_enable_ctrl_if_napi(ethsw);
- return 0;
+ if (dpaa2_switch_port_is_type_phy(port_priv))
+ phylink_start(port_priv->mac->phylink);
-err_carrier_sync:
- dpsw_if_disable(port_priv->ethsw_data->mc_io, 0,
- port_priv->ethsw_data->dpsw_handle,
- port_priv->idx);
- return err;
+ return 0;
}
static int dpaa2_switch_port_stop(struct net_device *netdev)
struct ethsw_core *ethsw = port_priv->ethsw_data;
int err;
+ if (dpaa2_switch_port_is_type_phy(port_priv)) {
+ phylink_stop(port_priv->mac->phylink);
+ } else {
+ netif_tx_stop_all_queues(netdev);
+ netif_carrier_off(netdev);
+ }
+
err = dpsw_if_disable(port_priv->ethsw_data->mc_io, 0,
port_priv->ethsw_data->dpsw_handle,
port_priv->idx);
return netdev->netdev_ops == &dpaa2_switch_port_ops;
}
-static void dpaa2_switch_links_state_update(struct ethsw_core *ethsw)
+static int dpaa2_switch_port_connect_mac(struct ethsw_port_priv *port_priv)
{
- int i;
+ struct fsl_mc_device *dpsw_port_dev, *dpmac_dev;
+ struct dpaa2_mac *mac;
+ int err;
- for (i = 0; i < ethsw->sw_attr.num_ifs; i++) {
- dpaa2_switch_port_carrier_state_sync(ethsw->ports[i]->netdev);
- dpaa2_switch_port_set_mac_addr(ethsw->ports[i]);
+ dpsw_port_dev = to_fsl_mc_device(port_priv->netdev->dev.parent);
+ dpmac_dev = fsl_mc_get_endpoint(dpsw_port_dev, port_priv->idx);
+
+ if (PTR_ERR(dpmac_dev) == -EPROBE_DEFER)
+ return PTR_ERR(dpmac_dev);
+
+ if (IS_ERR(dpmac_dev) || dpmac_dev->dev.type != &fsl_mc_bus_dpmac_type)
+ return 0;
+
+ mac = kzalloc(sizeof(*mac), GFP_KERNEL);
+ if (!mac)
+ return -ENOMEM;
+
+ mac->mc_dev = dpmac_dev;
+ mac->mc_io = port_priv->ethsw_data->mc_io;
+ mac->net_dev = port_priv->netdev;
+
+ err = dpaa2_mac_open(mac);
+ if (err)
+ goto err_free_mac;
+ port_priv->mac = mac;
+
+ if (dpaa2_switch_port_is_type_phy(port_priv)) {
+ err = dpaa2_mac_connect(mac);
+ if (err) {
+ netdev_err(port_priv->netdev,
+ "Error connecting to the MAC endpoint %pe\n",
+ ERR_PTR(err));
+ goto err_close_mac;
+ }
}
+
+ return 0;
+
+err_close_mac:
+ dpaa2_mac_close(mac);
+ port_priv->mac = NULL;
+err_free_mac:
+ kfree(mac);
+ return err;
+}
+
+static void dpaa2_switch_port_disconnect_mac(struct ethsw_port_priv *port_priv)
+{
+ if (dpaa2_switch_port_is_type_phy(port_priv))
+ dpaa2_mac_disconnect(port_priv->mac);
+
+ if (!dpaa2_switch_port_has_mac(port_priv))
+ return;
+
+ dpaa2_mac_close(port_priv->mac);
+ kfree(port_priv->mac);
+ port_priv->mac = NULL;
}
static irqreturn_t dpaa2_switch_irq0_handler_thread(int irq_num, void *arg)
{
struct device *dev = (struct device *)arg;
struct ethsw_core *ethsw = dev_get_drvdata(dev);
-
- /* Mask the events and the if_id reserved bits to be cleared on read */
- u32 status = DPSW_IRQ_EVENT_LINK_CHANGED | 0xFFFF0000;
- int err;
+ struct ethsw_port_priv *port_priv;
+ u32 status = ~0;
+ int err, if_id;
err = dpsw_get_irq_status(ethsw->mc_io, 0, ethsw->dpsw_handle,
DPSW_IRQ_INDEX_IF, &status);
if (err) {
dev_err(dev, "Can't get irq status (err %d)\n", err);
-
- err = dpsw_clear_irq_status(ethsw->mc_io, 0, ethsw->dpsw_handle,
- DPSW_IRQ_INDEX_IF, 0xFFFFFFFF);
- if (err)
- dev_err(dev, "Can't clear irq status (err %d)\n", err);
goto out;
}
- if (status & DPSW_IRQ_EVENT_LINK_CHANGED)
- dpaa2_switch_links_state_update(ethsw);
+ if_id = (status & 0xFFFF0000) >> 16;
+ port_priv = ethsw->ports[if_id];
+
+ if (status & DPSW_IRQ_EVENT_LINK_CHANGED) {
+ dpaa2_switch_port_link_state_update(port_priv->netdev);
+ dpaa2_switch_port_set_mac_addr(port_priv);
+ }
+
+ if (status & DPSW_IRQ_EVENT_ENDPOINT_CHANGED) {
+ if (dpaa2_switch_port_has_mac(port_priv))
+ dpaa2_switch_port_disconnect_mac(port_priv);
+ else
+ dpaa2_switch_port_connect_mac(port_priv);
+ }
out:
+ err = dpsw_clear_irq_status(ethsw->mc_io, 0, ethsw->dpsw_handle,
+ DPSW_IRQ_INDEX_IF, status);
+ if (err)
+ dev_err(dev, "Can't clear irq status (err %d)\n", err);
+
return IRQ_HANDLED;
}
for (i = 0; i < ethsw->sw_attr.num_ifs; i++) {
port_priv = ethsw->ports[i];
unregister_netdev(port_priv->netdev);
+ dpaa2_switch_port_disconnect_mac(port_priv);
free_netdev(port_priv->netdev);
}
goto err_port_probe;
port_priv->learn_ena = false;
+ err = dpaa2_switch_port_connect_mac(port_priv);
+ if (err)
+ goto err_port_probe;
+
return 0;
err_port_probe:
ðsw->fq[i].napi, dpaa2_switch_poll,
NAPI_POLL_WEIGHT);
- err = dpsw_enable(ethsw->mc_io, 0, ethsw->dpsw_handle);
- if (err) {
- dev_err(ethsw->dev, "dpsw_enable err %d\n", err);
- goto err_free_netdev;
- }
-
/* Setup IRQs */
err = dpaa2_switch_setup_irqs(sw_dev);
if (err)
#include <net/pkt_cls.h>
#include <soc/fsl/dpaa2-io.h>
+#include "dpaa2-mac.h"
#include "dpsw.h"
/* Number of IRQs supported */
bool learn_ena;
struct dpaa2_switch_filter_block *filter_block;
+ struct dpaa2_mac *mac;
};
/* Switch data */
return true;
}
+static inline bool
+dpaa2_switch_port_is_type_phy(struct ethsw_port_priv *port_priv)
+{
+ if (port_priv->mac &&
+ (port_priv->mac->attr.link_type == DPMAC_LINK_TYPE_PHY ||
+ port_priv->mac->attr.link_type == DPMAC_LINK_TYPE_BACKPLANE))
+ return true;
+
+ return false;
+}
+
+static inline bool dpaa2_switch_port_has_mac(struct ethsw_port_priv *port_priv)
+{
+ return port_priv->mac ? true : false;
+}
+
bool dpaa2_switch_port_dev_check(const struct net_device *netdev);
int dpaa2_switch_port_vlans_add(struct net_device *netdev,
*/
#define DPSW_IRQ_EVENT_LINK_CHANGED 0x0001
+/**
+ * DPSW_IRQ_EVENT_ENDPOINT_CHANGED - Indicates a change in endpoint
+ */
+#define DPSW_IRQ_EVENT_ENDPOINT_CHANGED 0x0002
+
/**
* struct dpsw_irq_cfg - IRQ configuration
* @addr: Address that must be written to signal a message-based interrupt
#define FEC_RXIC0 0xfff
#define FEC_RXIC1 0xfff
#define FEC_RXIC2 0xfff
+#define FEC_LPI_SLEEP 0xfff
+#define FEC_LPI_WAKE 0xfff
#endif /* CONFIG_M5272 */
return ret;
}
+static int fec_enet_parse_rgmii_delay(struct fec_enet_private *fep,
+ struct device_node *np)
+{
+ u32 rgmii_tx_delay, rgmii_rx_delay;
+
+ /* For rgmii tx internal delay, valid values are 0ps and 2000ps */
+ if (!of_property_read_u32(np, "tx-internal-delay-ps", &rgmii_tx_delay)) {
+ if (rgmii_tx_delay != 0 && rgmii_tx_delay != 2000) {
+ dev_err(&fep->pdev->dev, "The only allowed RGMII TX delay values are: 0ps, 2000ps");
+ return -EINVAL;
+ } else if (rgmii_tx_delay == 2000) {
+ fep->rgmii_txc_dly = true;
+ }
+ }
+
+ /* For rgmii rx internal delay, valid values are 0ps and 2000ps */
+ if (!of_property_read_u32(np, "rx-internal-delay-ps", &rgmii_rx_delay)) {
+ if (rgmii_rx_delay != 0 && rgmii_rx_delay != 2000) {
+ dev_err(&fep->pdev->dev, "The only allowed RGMII RX delay values are: 0ps, 2000ps");
+ return -EINVAL;
+ } else if (rgmii_rx_delay == 2000) {
+ fep->rgmii_rxc_dly = true;
+ }
+ }
+
+ return 0;
+}
+
static int fec_enet_mii_probe(struct net_device *ndev)
{
struct fec_enet_private *fep = netdev_priv(ndev);
char irq_name[8];
int irq_cnt;
struct fec_devinfo *dev_info;
- u32 rgmii_delay;
fec_enet_get_queue_num(pdev, &num_tx_qs, &num_rx_qs);
if (ret)
goto failed_stop_mode;
- /* For rgmii internal delay, valid values are 0ps and 2000ps */
- if (of_property_read_u32(np, "tx-internal-delay-ps", &rgmii_delay))
- fep->rgmii_txc_dly = true;
- if (of_property_read_u32(np, "rx-internal-delay-ps", &rgmii_delay))
- fep->rgmii_rxc_dly = true;
-
phy_node = of_parse_phandle(np, "phy-handle", 0);
if (!phy_node && of_phy_is_fixed_link(np)) {
ret = of_phy_register_fixed_link(np);
fep->phy_interface = interface;
}
+ ret = fec_enet_parse_rgmii_delay(fep, np);
+ if (ret)
+ goto failed_rgmii_delay;
+
fep->clk_ipg = devm_clk_get(&pdev->dev, "ipg");
if (IS_ERR(fep->clk_ipg)) {
ret = PTR_ERR(fep->clk_ipg);
fep->clk_ref_rate = clk_get_rate(fep->clk_ref);
/* clk_2x_txclk is optional, depends on board */
- fep->clk_2x_txclk = devm_clk_get(&pdev->dev, "enet_2x_txclk");
- if (IS_ERR(fep->clk_2x_txclk))
- fep->clk_2x_txclk = NULL;
+ if (fep->rgmii_txc_dly || fep->rgmii_rxc_dly) {
+ fep->clk_2x_txclk = devm_clk_get(&pdev->dev, "enet_2x_txclk");
+ if (IS_ERR(fep->clk_2x_txclk))
+ fep->clk_2x_txclk = NULL;
+ }
fep->bufdesc_ex = fep->quirks & FEC_QUIRK_HAS_BUFDESC_EX;
fep->clk_ptp = devm_clk_get(&pdev->dev, "ptp");
failed_clk_ipg:
fec_enet_clk_enable(ndev, false);
failed_clk:
+failed_rgmii_delay:
if (of_phy_is_fixed_link(np))
of_phy_deregister_fixed_link(np);
of_node_put(phy_node);
if (of_phy_is_fixed_link(np))
of_phy_deregister_fixed_link(np);
of_node_put(fep->phy_node);
- free_netdev(ndev);
clk_disable_unprepare(fep->clk_ahb);
clk_disable_unprepare(fep->clk_ipg);
pm_runtime_put_noidle(&pdev->dev);
pm_runtime_disable(&pdev->dev);
+ free_netdev(ndev);
return 0;
}
tristate "Hisilicon Network Subsystem Support HNS3 (Framework)"
depends on PCI
select NET_DEVLINK
+ select PAGE_POOL
help
This selects the framework support for Hisilicon Network Subsystem 3.
This layer facilitates clients like ENET, RoCE and user-space ethernet
unsigned int order = hns3_page_order(ring);
struct page *p;
+ if (ring->page_pool) {
+ p = page_pool_dev_alloc_frag(ring->page_pool,
+ &cb->page_offset,
+ hns3_buf_size(ring));
+ if (unlikely(!p))
+ return -ENOMEM;
+
+ cb->priv = p;
+ cb->buf = page_address(p);
+ cb->dma = page_pool_get_dma_addr(p);
+ cb->type = DESC_TYPE_PP_FRAG;
+ cb->reuse_flag = 0;
+ return 0;
+ }
+
p = dev_alloc_pages(order);
if (!p)
return -ENOMEM;
if (cb->type & (DESC_TYPE_SKB | DESC_TYPE_BOUNCE_HEAD |
DESC_TYPE_BOUNCE_ALL | DESC_TYPE_SGL_SKB))
napi_consume_skb(cb->priv, budget);
- else if (!HNAE3_IS_TX_RING(ring) && cb->pagecnt_bias)
- __page_frag_cache_drain(cb->priv, cb->pagecnt_bias);
+ else if (!HNAE3_IS_TX_RING(ring)) {
+ if (cb->type & DESC_TYPE_PAGE && cb->pagecnt_bias)
+ __page_frag_cache_drain(cb->priv, cb->pagecnt_bias);
+ else if (cb->type & DESC_TYPE_PP_FRAG)
+ page_pool_put_full_page(ring->page_pool, cb->priv,
+ false);
+ }
memset(cb, 0, sizeof(*cb));
}
int ret;
ret = hns3_alloc_buffer(ring, cb);
- if (ret)
+ if (ret || ring->page_pool)
goto out;
ret = hns3_map_buffer(ring, cb);
if (ret)
return ret;
- ring->desc[i].addr = cpu_to_le64(ring->desc_cb[i].dma);
+ ring->desc[i].addr = cpu_to_le64(ring->desc_cb[i].dma +
+ ring->desc_cb[i].page_offset);
return 0;
}
{
hns3_unmap_buffer(ring, &ring->desc_cb[i]);
ring->desc_cb[i] = *res_cb;
- ring->desc[i].addr = cpu_to_le64(ring->desc_cb[i].dma);
+ ring->desc[i].addr = cpu_to_le64(ring->desc_cb[i].dma +
+ ring->desc_cb[i].page_offset);
ring->desc[i].rx.bd_base_info = 0;
}
u32 frag_size = size - pull_len;
bool reused;
+ if (ring->page_pool) {
+ skb_add_rx_frag(skb, i, desc_cb->priv, frag_offset,
+ frag_size, truesize);
+ return;
+ }
+
/* Avoid re-using remote or pfmem page */
if (unlikely(!dev_page_is_reusable(desc_cb->priv)))
goto out;
/* We can reuse buffer as-is, just make sure it is reusable */
if (dev_page_is_reusable(desc_cb->priv))
desc_cb->reuse_flag = 1;
+ else if (desc_cb->type & DESC_TYPE_PP_FRAG)
+ page_pool_put_full_page(ring->page_pool, desc_cb->priv,
+ false);
else /* This page cannot be reused so discard it */
__page_frag_cache_drain(desc_cb->priv,
desc_cb->pagecnt_bias);
hns3_rx_ring_move_fw(ring);
return 0;
}
+
+ if (ring->page_pool)
+ skb_mark_for_recycle(skb);
+
u64_stats_update_begin(&ring->syncp);
ring->stats.seg_pkt_cnt++;
u64_stats_update_end(&ring->syncp);
"alloc rx fraglist skb fail\n");
return -ENXIO;
}
+
+ if (ring->page_pool)
+ skb_mark_for_recycle(new_skb);
+
ring->frag_num = 0;
if (ring->tail_skb) {
priv->ring = NULL;
}
+static void hns3_alloc_page_pool(struct hns3_enet_ring *ring)
+{
+ struct page_pool_params pp_params = {
+ .flags = PP_FLAG_DMA_MAP | PP_FLAG_PAGE_FRAG |
+ PP_FLAG_DMA_SYNC_DEV,
+ .order = hns3_page_order(ring),
+ .pool_size = ring->desc_num * hns3_buf_size(ring) /
+ (PAGE_SIZE << hns3_page_order(ring)),
+ .nid = dev_to_node(ring_to_dev(ring)),
+ .dev = ring_to_dev(ring),
+ .dma_dir = DMA_FROM_DEVICE,
+ .offset = 0,
+ .max_len = PAGE_SIZE << hns3_page_order(ring),
+ };
+
+ ring->page_pool = page_pool_create(&pp_params);
+ if (IS_ERR(ring->page_pool)) {
+ dev_warn(ring_to_dev(ring), "page pool creation failed: %ld\n",
+ PTR_ERR(ring->page_pool));
+ ring->page_pool = NULL;
+ }
+}
+
static int hns3_alloc_ring_memory(struct hns3_enet_ring *ring)
{
int ret;
goto out_with_desc_cb;
if (!HNAE3_IS_TX_RING(ring)) {
+ hns3_alloc_page_pool(ring);
+
ret = hns3_alloc_ring_buffers(ring);
if (ret)
goto out_with_desc;
devm_kfree(ring_to_dev(ring), tx_spare);
ring->tx_spare = NULL;
}
+
+ if (!HNAE3_IS_TX_RING(ring) && ring->page_pool) {
+ page_pool_destroy(ring->page_pool);
+ ring->page_pool = NULL;
+ }
}
static int hns3_buf_size2type(u32 buf_size)
#include <linux/dim.h>
#include <linux/if_vlan.h>
+#include <net/page_pool.h>
#include "hnae3.h"
DESC_TYPE_BOUNCE_ALL = 1 << 3,
DESC_TYPE_BOUNCE_HEAD = 1 << 4,
DESC_TYPE_SGL_SKB = 1 << 5,
+ DESC_TYPE_PP_FRAG = 1 << 6,
};
struct hns3_desc_cb {
struct hnae3_queue *tqp;
int queue_index;
struct device *dev; /* will be used for DMA mapping of descriptors */
+ struct page_pool *page_pool;
/* statistic */
struct ring_stats stats;
int ret;
devlink = devlink_alloc(&hclge_devlink_ops,
- sizeof(struct hclge_devlink_priv));
+ sizeof(struct hclge_devlink_priv), &pdev->dev);
if (!devlink)
return -ENOMEM;
priv = devlink_priv(devlink);
priv->hdev = hdev;
- ret = devlink_register(devlink, &pdev->dev);
+ ret = devlink_register(devlink);
if (ret) {
dev_err(&pdev->dev, "failed to register devlink, ret = %d\n",
ret);
#include "hclge_main.h"
#include "hnae3.h"
+static int hclge_ptp_get_cycle(struct hclge_dev *hdev)
+{
+ struct hclge_ptp *ptp = hdev->ptp;
+
+ ptp->cycle.quo = readl(hdev->ptp->io_base + HCLGE_PTP_CYCLE_QUO_REG) &
+ HCLGE_PTP_CYCLE_QUO_MASK;
+ ptp->cycle.numer = readl(hdev->ptp->io_base + HCLGE_PTP_CYCLE_NUM_REG);
+ ptp->cycle.den = readl(hdev->ptp->io_base + HCLGE_PTP_CYCLE_DEN_REG);
+
+ if (ptp->cycle.den == 0) {
+ dev_err(&hdev->pdev->dev, "invalid ptp cycle denominator!\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int hclge_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
{
struct hclge_dev *hdev = hclge_ptp_get_hdev(ptp);
+ struct hclge_ptp_cycle *cycle = &hdev->ptp->cycle;
u64 adj_val, adj_base, diff;
unsigned long flags;
bool is_neg = false;
is_neg = true;
}
- adj_base = HCLGE_PTP_CYCLE_ADJ_BASE * HCLGE_PTP_CYCLE_ADJ_UNIT;
+ adj_base = (u64)cycle->quo * (u64)cycle->den + (u64)cycle->numer;
adj_val = adj_base * ppb;
diff = div_u64(adj_val, 1000000000ULL);
/* This clock cycle is defined by three part: quotient, numerator
* and denominator. For example, 2.5ns, the quotient is 2,
- * denominator is fixed to HCLGE_PTP_CYCLE_ADJ_UNIT, and numerator
- * is 0.5 * HCLGE_PTP_CYCLE_ADJ_UNIT.
+ * denominator is fixed to ptp->cycle.den, and numerator
+ * is 0.5 * ptp->cycle.den.
*/
- quo = div_u64_rem(adj_val, HCLGE_PTP_CYCLE_ADJ_UNIT, &numerator);
+ quo = div_u64_rem(adj_val, cycle->den, &numerator);
spin_lock_irqsave(&hdev->ptp->lock, flags);
- writel(quo, hdev->ptp->io_base + HCLGE_PTP_CYCLE_QUO_REG);
+ writel(quo & HCLGE_PTP_CYCLE_QUO_MASK,
+ hdev->ptp->io_base + HCLGE_PTP_CYCLE_QUO_REG);
writel(numerator, hdev->ptp->io_base + HCLGE_PTP_CYCLE_NUM_REG);
- writel(HCLGE_PTP_CYCLE_ADJ_UNIT,
- hdev->ptp->io_base + HCLGE_PTP_CYCLE_DEN_REG);
+ writel(cycle->den, hdev->ptp->io_base + HCLGE_PTP_CYCLE_DEN_REG);
writel(HCLGE_PTP_CYCLE_ADJ_EN,
hdev->ptp->io_base + HCLGE_PTP_CYCLE_CFG_REG);
spin_unlock_irqrestore(&hdev->ptp->lock, flags);
ret = hclge_ptp_create_clock(hdev);
if (ret)
return ret;
+
+ ret = hclge_ptp_get_cycle(hdev);
+ if (ret)
+ return ret;
}
ret = hclge_ptp_int_en(hdev, true);
#define HCLGE_PTP_TIME_ADJ_REG 0x60
#define HCLGE_PTP_TIME_ADJ_EN BIT(0)
#define HCLGE_PTP_CYCLE_QUO_REG 0x64
+#define HCLGE_PTP_CYCLE_QUO_MASK GENMASK(7, 0)
#define HCLGE_PTP_CYCLE_DEN_REG 0x68
#define HCLGE_PTP_CYCLE_NUM_REG 0x6C
#define HCLGE_PTP_CYCLE_CFG_REG 0x70
#define HCLGE_PTP_CUR_TIME_SEC_L_REG 0x78
#define HCLGE_PTP_CUR_TIME_NSEC_REG 0x7C
-#define HCLGE_PTP_CYCLE_ADJ_BASE 2
#define HCLGE_PTP_CYCLE_ADJ_MAX 500000000
-#define HCLGE_PTP_CYCLE_ADJ_UNIT 100000000
#define HCLGE_PTP_SEC_H_OFFSET 32u
#define HCLGE_PTP_SEC_L_MASK GENMASK(31, 0)
#define HCLGE_PTP_FLAG_TX_EN 1
#define HCLGE_PTP_FLAG_RX_EN 2
+struct hclge_ptp_cycle {
+ u32 quo;
+ u32 numer;
+ u32 den;
+};
+
struct hclge_ptp {
struct hclge_dev *hdev;
struct ptp_clock *clock;
spinlock_t lock; /* protects ptp registers */
u32 ptp_cfg;
u32 last_tx_seqid;
+ struct hclge_ptp_cycle cycle;
unsigned long tx_start;
unsigned long tx_cnt;
unsigned long tx_skipped;
struct devlink *devlink;
int ret;
- devlink = devlink_alloc(&hclgevf_devlink_ops,
- sizeof(struct hclgevf_devlink_priv));
+ devlink =
+ devlink_alloc(&hclgevf_devlink_ops,
+ sizeof(struct hclgevf_devlink_priv), &pdev->dev);
if (!devlink)
return -ENOMEM;
priv = devlink_priv(devlink);
priv->hdev = hdev;
- ret = devlink_register(devlink, &pdev->dev);
+ ret = devlink_register(devlink);
if (ret) {
dev_err(&pdev->dev, "failed to register devlink, ret = %d\n",
ret);
.flash_update = hinic_devlink_flash_update,
};
-struct devlink *hinic_devlink_alloc(void)
+struct devlink *hinic_devlink_alloc(struct device *dev)
{
- return devlink_alloc(&hinic_devlink_ops, sizeof(struct hinic_dev));
+ return devlink_alloc(&hinic_devlink_ops, sizeof(struct hinic_dev), dev);
}
void hinic_devlink_free(struct devlink *devlink)
devlink_free(devlink);
}
-int hinic_devlink_register(struct hinic_devlink_priv *priv, struct device *dev)
+int hinic_devlink_register(struct hinic_devlink_priv *priv)
{
struct devlink *devlink = priv_to_devlink(priv);
- return devlink_register(devlink, dev);
+ return devlink_register(devlink);
}
void hinic_devlink_unregister(struct hinic_devlink_priv *priv)
u32 device_id;
};
-struct devlink *hinic_devlink_alloc(void);
+struct devlink *hinic_devlink_alloc(struct device *dev);
void hinic_devlink_free(struct devlink *devlink);
-int hinic_devlink_register(struct hinic_devlink_priv *priv, struct device *dev);
+int hinic_devlink_register(struct hinic_devlink_priv *priv);
void hinic_devlink_unregister(struct hinic_devlink_priv *priv);
int hinic_health_reporters_create(struct hinic_devlink_priv *priv);
return err;
}
- err = hinic_devlink_register(hwdev->devlink_dev, &pdev->dev);
+ err = hinic_devlink_register(hwdev->devlink_dev);
if (err) {
dev_err(&hwif->pdev->dev, "Failed to register devlink\n");
hinic_pf_to_mgmt_free(&pfhwdev->pf_to_mgmt);
struct devlink *devlink;
int err, num_qps;
- devlink = hinic_devlink_alloc();
+ devlink = hinic_devlink_alloc(&pdev->dev);
if (!devlink) {
dev_err(&pdev->dev, "Hinic devlink alloc failed\n");
return -ENOMEM;
add, add + 6, add, add[12], add[13], str);
}
-static int io = 0x300;
-static int irq = 10;
-
static const struct net_device_ops i596_netdev_ops = {
.ndo_open = i596_open,
.ndo_stop = i596_close,
.ndo_validate_addr = eth_validate_addr,
};
-struct net_device * __init i82596_probe(int unit)
+static struct net_device * __init i82596_probe(void)
{
struct net_device *dev;
int i;
if (!dev)
return ERR_PTR(-ENOMEM);
- if (unit >= 0) {
- sprintf(dev->name, "eth%d", unit);
- netdev_boot_setup_check(dev);
- } else {
- dev->base_addr = io;
- dev->irq = irq;
- }
-
#ifdef ENABLE_MVME16x_NET
if (MACH_IS_MVME16x) {
if (mvme16x_config & MVME16x_CONFIG_NO_ETHERNET) {
}
}
-#ifdef MODULE
static struct net_device *dev_82596;
static int debug = -1;
module_param(debug, int, 0);
MODULE_PARM_DESC(debug, "i82596 debug mask");
-int __init init_module(void)
+static int __init i82596_init(void)
{
if (debug >= 0)
i596_debug = debug;
- dev_82596 = i82596_probe(-1);
+ dev_82596 = i82596_probe();
return PTR_ERR_OR_ZERO(dev_82596);
}
+module_init(i82596_init);
-void __exit cleanup_module(void)
+static void __exit i82596_cleanup(void)
{
unregister_netdev(dev_82596);
#ifdef __mc68000__
free_page ((u32)(dev_82596->mem_start));
free_netdev(dev_82596);
}
-
-#endif /* MODULE */
+module_exit(i82596_cleanup);
static int fifo=0x8; /* don't change */
#include <linux/kernel.h>
+#include <linux/module.h>
#include <linux/string.h>
#include <linux/errno.h>
#include <linux/ioport.h>
memset((char *)p->scb,0,sizeof(struct scb_struct));
}
-struct net_device * __init sun3_82586_probe(int unit)
+static int __init sun3_82586_probe(void)
{
struct net_device *dev;
unsigned long ioaddr;
break;
default:
- return ERR_PTR(-ENODEV);
+ return -ENODEV;
}
if (found)
- return ERR_PTR(-ENODEV);
+ return -ENODEV;
ioaddr = (unsigned long)ioremap(IE_OBIO, SUN3_82586_TOTAL_SIZE);
if (!ioaddr)
- return ERR_PTR(-ENOMEM);
+ return -ENOMEM;
found = 1;
dev = alloc_etherdev(sizeof(struct priv));
if (!dev)
goto out;
- if (unit >= 0) {
- sprintf(dev->name, "eth%d", unit);
- netdev_boot_setup_check(dev);
- }
-
dev->irq = IE_IRQ;
dev->base_addr = ioaddr;
err = sun3_82586_probe1(dev, ioaddr);
free_netdev(dev);
out:
iounmap((void __iomem *)ioaddr);
- return ERR_PTR(err);
+ return err;
}
+module_init(sun3_82586_probe);
static const struct net_device_ops sun3_82586_netdev_ops = {
.ndo_open = sun3_82586_open,
default:
/* if we got here and link is up something bad is afoot */
netdev_info(netdev,
- "WARNING: Link is up but PHY type 0x%x is not recognized.\n",
+ "WARNING: Link is up but PHY type 0x%x is not recognized, or incorrect cable is in use\n",
hw_link_info->phy_type);
}
dev_warn(&pf->pdev->dev,
"Device configuration forbids SW from starting the LLDP agent.\n");
return -EINVAL;
+ case I40E_AQ_RC_EAGAIN:
+ dev_warn(&pf->pdev->dev,
+ "Stop FW LLDP agent command is still being processed, please try again in a second.\n");
+ return -EBUSY;
default:
dev_warn(&pf->pdev->dev,
"Starting FW LLDP agent failed: error: %s, %s\n",
}
/**
- * i40e_vsi_control_tx - Start or stop a VSI's rings
+ * i40e_vsi_enable_tx - Start a VSI's rings
* @vsi: the VSI being configured
- * @enable: start or stop the rings
**/
-static int i40e_vsi_control_tx(struct i40e_vsi *vsi, bool enable)
+static int i40e_vsi_enable_tx(struct i40e_vsi *vsi)
{
struct i40e_pf *pf = vsi->back;
int i, pf_q, ret = 0;
for (i = 0; i < vsi->num_queue_pairs; i++, pf_q++) {
ret = i40e_control_wait_tx_q(vsi->seid, pf,
pf_q,
- false /*is xdp*/, enable);
+ false /*is xdp*/, true);
if (ret)
break;
ret = i40e_control_wait_tx_q(vsi->seid, pf,
pf_q + vsi->alloc_queue_pairs,
- true /*is xdp*/, enable);
+ true /*is xdp*/, true);
if (ret)
break;
}
}
/**
- * i40e_vsi_control_rx - Start or stop a VSI's rings
+ * i40e_vsi_enable_rx - Start a VSI's rings
* @vsi: the VSI being configured
- * @enable: start or stop the rings
**/
-static int i40e_vsi_control_rx(struct i40e_vsi *vsi, bool enable)
+static int i40e_vsi_enable_rx(struct i40e_vsi *vsi)
{
struct i40e_pf *pf = vsi->back;
int i, pf_q, ret = 0;
pf_q = vsi->base_queue;
for (i = 0; i < vsi->num_queue_pairs; i++, pf_q++) {
- ret = i40e_control_wait_rx_q(pf, pf_q, enable);
+ ret = i40e_control_wait_rx_q(pf, pf_q, true);
if (ret) {
dev_info(&pf->pdev->dev,
- "VSI seid %d Rx ring %d %sable timeout\n",
- vsi->seid, pf_q, (enable ? "en" : "dis"));
+ "VSI seid %d Rx ring %d enable timeout\n",
+ vsi->seid, pf_q);
break;
}
}
- /* Due to HW errata, on Rx disable only, the register can indicate done
- * before it really is. Needs 50ms to be sure
- */
- if (!enable)
- mdelay(50);
-
return ret;
}
int ret = 0;
/* do rx first for enable and last for disable */
- ret = i40e_vsi_control_rx(vsi, true);
+ ret = i40e_vsi_enable_rx(vsi);
if (ret)
return ret;
- ret = i40e_vsi_control_tx(vsi, true);
+ ret = i40e_vsi_enable_tx(vsi);
return ret;
}
+#define I40E_DISABLE_TX_GAP_MSEC 50
+
/**
* i40e_vsi_stop_rings - Stop a VSI's rings
* @vsi: the VSI being configured
**/
void i40e_vsi_stop_rings(struct i40e_vsi *vsi)
{
+ struct i40e_pf *pf = vsi->back;
+ int pf_q, err, q_end;
+
/* When port TX is suspended, don't wait */
if (test_bit(__I40E_PORT_SUSPENDED, vsi->back->state))
return i40e_vsi_stop_rings_no_wait(vsi);
- /* do rx first for enable and last for disable
- * Ignore return value, we need to shutdown whatever we can
- */
- i40e_vsi_control_tx(vsi, false);
- i40e_vsi_control_rx(vsi, false);
+ q_end = vsi->base_queue + vsi->num_queue_pairs;
+ for (pf_q = vsi->base_queue; pf_q < q_end; pf_q++)
+ i40e_pre_tx_queue_cfg(&pf->hw, (u32)pf_q, false);
+
+ for (pf_q = vsi->base_queue; pf_q < q_end; pf_q++) {
+ err = i40e_control_wait_rx_q(pf, pf_q, false);
+ if (err)
+ dev_info(&pf->pdev->dev,
+ "VSI seid %d Rx ring %d dissable timeout\n",
+ vsi->seid, pf_q);
+ }
+
+ msleep(I40E_DISABLE_TX_GAP_MSEC);
+ pf_q = vsi->base_queue;
+ for (pf_q = vsi->base_queue; pf_q < q_end; pf_q++)
+ wr32(&pf->hw, I40E_QTX_ENA(pf_q), 0);
+
+ i40e_vsi_wait_queues_disabled(vsi);
}
/**
}
if (vsi->num_queue_pairs <
(mqprio_qopt->qopt.offset[i] + mqprio_qopt->qopt.count[i])) {
+ dev_err(&vsi->back->pdev->dev,
+ "Failed to create traffic channel, insufficient number of queues.\n");
return -EINVAL;
}
if (sum_max_rate > i40e_get_link_speed(vsi)) {
.ndo_poll_controller = i40e_netpoll,
#endif
.ndo_setup_tc = __i40e_setup_tc,
+ .ndo_select_queue = i40e_lan_select_queue,
.ndo_set_features = i40e_set_features,
.ndo_set_vf_mac = i40e_ndo_set_vf_mac,
.ndo_set_vf_vlan = i40e_ndo_set_vf_port_vlan,
return -1;
}
+static u16 i40e_swdcb_skb_tx_hash(struct net_device *dev,
+ const struct sk_buff *skb,
+ u16 num_tx_queues)
+{
+ u32 jhash_initval_salt = 0xd631614b;
+ u32 hash;
+
+ if (skb->sk && skb->sk->sk_hash)
+ hash = skb->sk->sk_hash;
+ else
+ hash = (__force u16)skb->protocol ^ skb->hash;
+
+ hash = jhash_1word(hash, jhash_initval_salt);
+
+ return (u16)(((u64)hash * num_tx_queues) >> 32);
+}
+
+u16 i40e_lan_select_queue(struct net_device *netdev,
+ struct sk_buff *skb,
+ struct net_device __always_unused *sb_dev)
+{
+ struct i40e_netdev_priv *np = netdev_priv(netdev);
+ struct i40e_vsi *vsi = np->vsi;
+ struct i40e_hw *hw;
+ u16 qoffset;
+ u16 qcount;
+ u8 tclass;
+ u16 hash;
+ u8 prio;
+
+ /* is DCB enabled at all? */
+ if (vsi->tc_config.numtc == 1)
+ return i40e_swdcb_skb_tx_hash(netdev, skb,
+ netdev->real_num_tx_queues);
+
+ prio = skb->priority;
+ hw = &vsi->back->hw;
+ tclass = hw->local_dcbx_config.etscfg.prioritytable[prio];
+ /* sanity check */
+ if (unlikely(!(vsi->tc_config.enabled_tc & BIT(tclass))))
+ tclass = 0;
+
+ /* select a queue assigned for the given TC */
+ qcount = vsi->tc_config.tc_info[tclass].qcount;
+ hash = i40e_swdcb_skb_tx_hash(netdev, skb, qcount);
+
+ qoffset = vsi->tc_config.tc_info[tclass].qoffset;
+ return qoffset + hash;
+}
+
/**
* i40e_xmit_xdp_ring - transmits an XDP buffer to an XDP Tx ring
* @xdpf: data to transmit
bool i40e_alloc_rx_buffers(struct i40e_ring *rxr, u16 cleaned_count);
netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev);
+u16 i40e_lan_select_queue(struct net_device *netdev, struct sk_buff *skb,
+ struct net_device *sb_dev);
void i40e_clean_tx_ring(struct i40e_ring *tx_ring);
void i40e_clean_rx_ring(struct i40e_ring *rx_ring);
int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring);
{
struct devlink *devlink;
- devlink = devlink_alloc(&ice_devlink_ops, sizeof(struct ice_pf));
+ devlink = devlink_alloc(&ice_devlink_ops, sizeof(struct ice_pf), dev);
if (!devlink)
return NULL;
struct device *dev = ice_pf_to_dev(pf);
int err;
- err = devlink_register(devlink, dev);
+ err = devlink_register(devlink);
if (err) {
dev_err(dev, "devlink registration failed: %d\n", err);
return err;
if (!skb)
return ERR_PTR(-ENOMEM);
- skb_mark_for_recycle(skb, virt_to_page(xdp->data), pool);
+ skb_mark_for_recycle(skb);
skb_reserve(skb, xdp->data - xdp->data_hard_start);
skb_put(skb, xdp->data_end - xdp->data);
skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
skb_frag_page(frag), skb_frag_off(frag),
skb_frag_size(frag), PAGE_SIZE);
- /* We don't need to reset pp_recycle here. It's already set, so
- * just mark fragments for recycling.
- */
- page_pool_store_mem_info(skb_frag_page(frag), pool);
}
return skb;
}
if (pp)
- skb_mark_for_recycle(skb, page, pp);
+ skb_mark_for_recycle(skb);
else
dma_unmap_single_attrs(dev->dev.parent, dma_addr,
bm_pool->buf_size, DMA_FROM_DEVICE,
/* Add reference */
cgx->lmac_idmap[lmac->lmac_id] = lmac;
- cgx->mac_ops->mac_pause_frm_config(cgx, lmac->lmac_id, true);
set_bit(lmac->lmac_id, &cgx->lmac_bmap);
+ cgx->mac_ops->mac_pause_frm_config(cgx, lmac->lmac_id, true);
}
return cgx_lmac_verify_fwi_version(cgx);
#define TXSCH_RR_QTM_MAX ((1 << 24) - 1)
#define TXSCH_TL1_DFLT_RR_QTM TXSCH_RR_QTM_MAX
#define TXSCH_TL1_DFLT_RR_PRIO (0x1ull)
-#define MAX_SCHED_WEIGHT 0xFF
-#define DFLT_RR_WEIGHT 71
-#define DFLT_RR_QTM ((DFLT_RR_WEIGHT * TXSCH_RR_QTM_MAX) \
- / MAX_SCHED_WEIGHT)
+#define CN10K_MAX_DWRR_WEIGHT 16384 /* Weight is 14bit on CN10K */
/* Min/Max packet sizes, excluding FCS */
#define NIC_HW_MIN_FRS 40
struct nix_hw_info {
struct mbox_msghdr hdr;
+ u16 rsvs16;
u16 max_mtu;
u16 min_mtu;
+ u32 rpm_dwrr_mtu;
+ u32 sdp_dwrr_mtu;
+ u64 rsvd[16]; /* Add reserved fields for future expansion */
};
struct nix_bandprof_alloc_req {
* Software assigns pkind for each incoming port such as CGX
* Ethernet interfaces, LBK interfaces, etc.
*/
+#define NPC_UNRESERVED_PKIND_COUNT NPC_RX_VLAN_EXDSA_PKIND
+
enum npc_pkind_type {
+ NPC_RX_LBK_PKIND = 0ULL,
NPC_RX_VLAN_EXDSA_PKIND = 56ULL,
NPC_RX_CHLEN24B_PKIND = 57ULL,
NPC_RX_CPT_HDR_PKIND,
/* Get numVFs attached to this PF and first HWVF */
cfg = rvu_read64(rvu, BLKADDR_RVUM, RVU_PRIV_PFX_CFG(pf));
- *numvfs = (cfg >> 12) & 0xFF;
- *hwvf = cfg & 0xFFF;
+ if (numvfs)
+ *numvfs = (cfg >> 12) & 0xFF;
+ if (hwvf)
+ *hwvf = cfg & 0xFFF;
}
static int rvu_get_hwvf(struct rvu *rvu, int pcifunc)
bool nix_shaping; /* Is shaping and coloring supported */
bool nix_tx_link_bp; /* Can link backpressure TL queues ? */
bool nix_rx_multicast; /* Rx packet replication support */
+ bool nix_common_dwrr_mtu; /* Common DWRR MTU for quantum config */
bool per_pf_mbox_regs; /* PF mbox specified in per PF registers ? */
bool programmable_chans; /* Channels programmable ? */
bool ipolicer;
struct nix_cn10k_aq_enq_rsp *aq_rsp,
u16 pcifunc, u8 ctype, u32 qidx);
int rvu_get_nix_blkaddr(struct rvu *rvu, u16 pcifunc);
+u32 convert_dwrr_mtu_to_bytes(u8 dwrr_mtu);
+u32 convert_bytes_to_dwrr_mtu(u32 bytes);
/* NPC APIs */
int rvu_npc_init(struct rvu *rvu);
rvu_nix_health_reporters_destroy(rvu_dl);
}
+/* Devlink Params APIs */
+static int rvu_af_dl_dwrr_mtu_validate(struct devlink *devlink, u32 id,
+ union devlink_param_value val,
+ struct netlink_ext_ack *extack)
+{
+ struct rvu_devlink *rvu_dl = devlink_priv(devlink);
+ struct rvu *rvu = rvu_dl->rvu;
+ int dwrr_mtu = val.vu32;
+ struct nix_txsch *txsch;
+ struct nix_hw *nix_hw;
+
+ if (!rvu->hw->cap.nix_common_dwrr_mtu) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Setting DWRR_MTU is not supported on this silicon");
+ return -EOPNOTSUPP;
+ }
+
+ if ((dwrr_mtu > 65536 || !is_power_of_2(dwrr_mtu)) &&
+ (dwrr_mtu != 9728 && dwrr_mtu != 10240)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Invalid, supported MTUs are 0,2,4,8.16,32,64....4K,8K,32K,64K and 9728, 10240");
+ return -EINVAL;
+ }
+
+ nix_hw = get_nix_hw(rvu->hw, BLKADDR_NIX0);
+ if (!nix_hw)
+ return -ENODEV;
+
+ txsch = &nix_hw->txsch[NIX_TXSCH_LVL_SMQ];
+ if (rvu_rsrc_free_count(&txsch->schq) != txsch->schq.max) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Changing DWRR MTU is not supported when there are active NIXLFs");
+ NL_SET_ERR_MSG_MOD(extack,
+ "Make sure none of the PF/VF interfaces are initialized and retry");
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static int rvu_af_dl_dwrr_mtu_set(struct devlink *devlink, u32 id,
+ struct devlink_param_gset_ctx *ctx)
+{
+ struct rvu_devlink *rvu_dl = devlink_priv(devlink);
+ struct rvu *rvu = rvu_dl->rvu;
+ u64 dwrr_mtu;
+
+ dwrr_mtu = convert_bytes_to_dwrr_mtu(ctx->val.vu32);
+ rvu_write64(rvu, BLKADDR_NIX0, NIX_AF_DWRR_RPM_MTU, dwrr_mtu);
+
+ return 0;
+}
+
+static int rvu_af_dl_dwrr_mtu_get(struct devlink *devlink, u32 id,
+ struct devlink_param_gset_ctx *ctx)
+{
+ struct rvu_devlink *rvu_dl = devlink_priv(devlink);
+ struct rvu *rvu = rvu_dl->rvu;
+ u64 dwrr_mtu;
+
+ if (!rvu->hw->cap.nix_common_dwrr_mtu)
+ return -EOPNOTSUPP;
+
+ dwrr_mtu = rvu_read64(rvu, BLKADDR_NIX0, NIX_AF_DWRR_RPM_MTU);
+ ctx->val.vu32 = convert_dwrr_mtu_to_bytes(dwrr_mtu);
+
+ return 0;
+}
+
+enum rvu_af_dl_param_id {
+ RVU_AF_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX,
+ RVU_AF_DEVLINK_PARAM_ID_DWRR_MTU,
+};
+
+static const struct devlink_param rvu_af_dl_params[] = {
+ DEVLINK_PARAM_DRIVER(RVU_AF_DEVLINK_PARAM_ID_DWRR_MTU,
+ "dwrr_mtu", DEVLINK_PARAM_TYPE_U32,
+ BIT(DEVLINK_PARAM_CMODE_RUNTIME),
+ rvu_af_dl_dwrr_mtu_get, rvu_af_dl_dwrr_mtu_set,
+ rvu_af_dl_dwrr_mtu_validate),
+};
+
+/* Devlink switch mode */
static int rvu_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode)
{
struct rvu_devlink *rvu_dl = devlink_priv(devlink);
struct devlink *dl;
int err;
- dl = devlink_alloc(&rvu_devlink_ops, sizeof(struct rvu_devlink));
+ dl = devlink_alloc(&rvu_devlink_ops, sizeof(struct rvu_devlink),
+ rvu->dev);
if (!dl) {
dev_warn(rvu->dev, "devlink_alloc failed\n");
return -ENOMEM;
}
- err = devlink_register(dl, rvu->dev);
+ err = devlink_register(dl);
if (err) {
dev_err(rvu->dev, "devlink register failed with error %d\n", err);
devlink_free(dl);
rvu_dl->rvu = rvu;
rvu->rvu_dl = rvu_dl;
- return rvu_health_reporters_create(rvu);
+ err = rvu_health_reporters_create(rvu);
+ if (err) {
+ dev_err(rvu->dev,
+ "devlink health reporter creation failed with error %d\n", err);
+ goto err_dl_health;
+ }
+
+ err = devlink_params_register(dl, rvu_af_dl_params,
+ ARRAY_SIZE(rvu_af_dl_params));
+ if (err) {
+ dev_err(rvu->dev,
+ "devlink params register failed with error %d", err);
+ goto err_dl_health;
+ }
+
+ devlink_params_publish(dl);
+
+ return 0;
+
+err_dl_health:
+ rvu_health_reporters_destroy(rvu);
+ devlink_unregister(dl);
+ devlink_free(dl);
+ return err;
}
void rvu_unregister_dl(struct rvu *rvu)
if (!dl)
return;
+ devlink_params_unregister(dl, rvu_af_dl_params,
+ ARRAY_SIZE(rvu_af_dl_params));
rvu_health_reporters_destroy(rvu);
devlink_unregister(dl);
devlink_free(dl);
return NULL;
}
+u32 convert_dwrr_mtu_to_bytes(u8 dwrr_mtu)
+{
+ dwrr_mtu &= 0x1FULL;
+
+ /* MTU used for DWRR calculation is in power of 2 up until 64K bytes.
+ * Value of 4 is reserved for MTU value of 9728 bytes.
+ * Value of 5 is reserved for MTU value of 10240 bytes.
+ */
+ switch (dwrr_mtu) {
+ case 4:
+ return 9728;
+ case 5:
+ return 10240;
+ default:
+ return BIT_ULL(dwrr_mtu);
+ }
+
+ return 0;
+}
+
+u32 convert_bytes_to_dwrr_mtu(u32 bytes)
+{
+ /* MTU used for DWRR calculation is in power of 2 up until 64K bytes.
+ * Value of 4 is reserved for MTU value of 9728 bytes.
+ * Value of 5 is reserved for MTU value of 10240 bytes.
+ */
+ if (bytes > BIT_ULL(16))
+ return 0;
+
+ switch (bytes) {
+ case 9728:
+ return 4;
+ case 10240:
+ return 5;
+ default:
+ return ilog2(bytes);
+ }
+
+ return 0;
+}
+
static void nix_rx_sync(struct rvu *rvu, int blkaddr)
{
int err;
- /*Sync all in flight RX packets to LLC/DRAM */
+ /* Sync all in flight RX packets to LLC/DRAM */
+ rvu_write64(rvu, blkaddr, NIX_AF_RX_SW_SYNC, BIT_ULL(0));
+ err = rvu_poll_reg(rvu, blkaddr, NIX_AF_RX_SW_SYNC, BIT_ULL(0), true);
+ if (err)
+ dev_err(rvu->dev, "SYNC1: NIX RX software sync failed\n");
+
+ /* SW_SYNC ensures all existing transactions are finished and pkts
+ * are written to LLC/DRAM, queues should be teared down after
+ * successful SW_SYNC. Due to a HW errata, in some rare scenarios
+ * an existing transaction might end after SW_SYNC operation. To
+ * ensure operation is fully done, do the SW_SYNC twice.
+ */
rvu_write64(rvu, blkaddr, NIX_AF_RX_SW_SYNC, BIT_ULL(0));
err = rvu_poll_reg(rvu, blkaddr, NIX_AF_RX_SW_SYNC, BIT_ULL(0), true);
if (err)
- dev_err(rvu->dev, "NIX RX software sync failed\n");
+ dev_err(rvu->dev, "SYNC2: NIX RX software sync failed\n");
}
static bool is_valid_txschq(struct rvu *rvu, int blkaddr,
rvu_nix_chan_lbk(rvu, lbkid, vf + 1);
pfvf->rx_chan_cnt = 1;
pfvf->tx_chan_cnt = 1;
+ rvu_npc_set_pkind(rvu, NPC_RX_LBK_PKIND, pfvf);
rvu_npc_install_promisc_entry(rvu, pcifunc, nixlf,
pfvf->rx_chan_base,
pfvf->rx_chan_cnt);
return;
rvu_write64(rvu, blkaddr, NIX_AF_TL1X_TOPOLOGY(schq),
(TXSCH_TL1_DFLT_RR_PRIO << 1));
- rvu_write64(rvu, blkaddr, NIX_AF_TL1X_SCHEDULE(schq),
- TXSCH_TL1_DFLT_RR_QTM);
+
+ /* On OcteonTx2 the config was in bytes and newer silcons
+ * it's changed to weight.
+ */
+ if (!rvu->hw->cap.nix_common_dwrr_mtu)
+ rvu_write64(rvu, blkaddr, NIX_AF_TL1X_SCHEDULE(schq),
+ TXSCH_TL1_DFLT_RR_QTM);
+ else
+ rvu_write64(rvu, blkaddr, NIX_AF_TL1X_SCHEDULE(schq),
+ CN10K_MAX_DWRR_WEIGHT);
+
rvu_write64(rvu, blkaddr, NIX_AF_TL1X_CIR(schq), 0x00);
pfvf_map[schq] = TXSCH_SET_FLAG(pfvf_map[schq], NIX_TXSCHQ_CFG_DONE);
}
for (schq = 0; schq < txsch->schq.max; schq++)
txsch->pfvf_map[schq] = TXSCH_MAP(0, NIX_TXSCHQ_FREE);
}
+
+ /* Setup a default value of 8192 as DWRR MTU */
+ if (rvu->hw->cap.nix_common_dwrr_mtu) {
+ rvu_write64(rvu, blkaddr, NIX_AF_DWRR_RPM_MTU,
+ convert_bytes_to_dwrr_mtu(8192));
+ rvu_write64(rvu, blkaddr, NIX_AF_DWRR_SDP_MTU,
+ convert_bytes_to_dwrr_mtu(8192));
+ }
+
return 0;
}
struct nix_hw_info *rsp)
{
u16 pcifunc = req->hdr.pcifunc;
+ u64 dwrr_mtu;
int blkaddr;
blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc);
rvu_get_lmac_link_max_frs(rvu, &rsp->max_mtu);
rsp->min_mtu = NIC_HW_MIN_FRS;
+
+ if (!rvu->hw->cap.nix_common_dwrr_mtu) {
+ /* Return '1' on OTx2 */
+ rsp->rpm_dwrr_mtu = 1;
+ rsp->sdp_dwrr_mtu = 1;
+ return 0;
+ }
+
+ dwrr_mtu = rvu_read64(rvu, BLKADDR_NIX0, NIX_AF_DWRR_RPM_MTU);
+ rsp->rpm_dwrr_mtu = convert_dwrr_mtu_to_bytes(dwrr_mtu);
+
+ dwrr_mtu = rvu_read64(rvu, BLKADDR_NIX0, NIX_AF_DWRR_SDP_MTU);
+ rsp->sdp_dwrr_mtu = convert_dwrr_mtu_to_bytes(dwrr_mtu);
+
return 0;
}
return 0;
}
+static void rvu_nix_setup_capabilities(struct rvu *rvu, int blkaddr)
+{
+ struct rvu_hwinfo *hw = rvu->hw;
+ u64 hw_const;
+
+ hw_const = rvu_read64(rvu, blkaddr, NIX_AF_CONST1);
+
+ /* On OcteonTx2 DWRR quantum is directly configured into each of
+ * the transmit scheduler queues. And PF/VF drivers were free to
+ * config any value upto 2^24.
+ * On CN10K, HW is modified, the quantum configuration at scheduler
+ * queues is in terms of weight. And SW needs to setup a base DWRR MTU
+ * at NIX_AF_DWRR_RPM_MTU / NIX_AF_DWRR_SDP_MTU. HW will do
+ * 'DWRR MTU * weight' to get the quantum.
+ *
+ * Check if HW uses a common MTU for all DWRR quantum configs.
+ * On OcteonTx2 this register field is '0'.
+ */
+ if (((hw_const >> 56) & 0x10) == 0x10)
+ hw->cap.nix_common_dwrr_mtu = true;
+}
+
static int rvu_nix_block_init(struct rvu *rvu, struct nix_hw *nix_hw)
{
const struct npc_lt_def_cfg *ltdefs;
if (err)
return err;
+ /* Setup capabilities of the NIX block */
+ rvu_nix_setup_capabilities(rvu, blkaddr);
+
/* Initialize admin queue */
err = nix_aq_init(rvu, block);
if (err)
vlan = &nix_hw->txvlan;
kfree(vlan->rsrc.bmap);
mutex_destroy(&vlan->rsrc_lock);
- devm_kfree(rvu->dev, vlan->entry2pfvf_map);
mcast = &nix_hw->mcast;
qmem_free(rvu->dev, mcast->mce_ctx);
{
struct rvu_hwinfo *hw = rvu->hw;
int num_pkinds, num_kpus, idx;
- struct npc_pkind *pkind;
/* Disable all KPUs and their entries */
for (idx = 0; idx < hw->npc_kpus; idx++) {
* Check HW max count to avoid configuring junk or
* writing to unsupported CSR addresses.
*/
- pkind = &hw->pkind;
num_pkinds = rvu->kpu.pkinds;
- num_pkinds = min_t(int, pkind->rsrc.max, num_pkinds);
+ num_pkinds = min_t(int, hw->npc_pkinds, num_pkinds);
for (idx = 0; idx < num_pkinds; idx++)
npc_config_kpuaction(rvu, blkaddr, &rvu->kpu.ikpu[idx], 0, idx, true);
if (npc_const1 & BIT_ULL(63))
npc_const2 = rvu_read64(rvu, blkaddr, NPC_AF_CONST2);
- pkind->rsrc.max = (npc_const1 >> 12) & 0xFFULL;
+ pkind->rsrc.max = NPC_UNRESERVED_PKIND_COUNT;
+ hw->npc_pkinds = (npc_const1 >> 12) & 0xFFULL;
hw->npc_kpu_entries = npc_const1 & 0xFFFULL;
hw->npc_kpus = (npc_const >> 8) & 0x1FULL;
hw->npc_intfs = npc_const & 0xFULL;
err = rvu_alloc_bitmap(&pkind->rsrc);
if (err)
return err;
+ /* Reserve PKIND#0 for LBKs. Power reset value of LBK_CH_PKIND is '0',
+ * no need to configure PKIND for all LBKs separately.
+ */
+ rvu_alloc_rsrc(&pkind->rsrc);
/* Allocate mem for pkind to PF and channel mapping info */
pkind->pfchan_map = devm_kcalloc(rvu->dev, pkind->rsrc.max,
#define NIX_AF_DEBUG_NPC_RESP_DATAX(a) (0x680 | (a) << 3)
#define NIX_AF_SMQX_CFG(a) (0x700 | (a) << 16)
#define NIX_AF_SQM_DBG_CTL_STATUS (0x750)
+#define NIX_AF_DWRR_SDP_MTU (0x790)
+#define NIX_AF_DWRR_RPM_MTU (0x7A0)
#define NIX_AF_PSE_CHANNEL_LEVEL (0x800)
#define NIX_AF_PSE_SHAPER_CFG (0x810)
#define NIX_AF_TX_EXPR_CREDIT (0x830)
struct rvu_switch *rswitch = &rvu->rswitch;
u16 start = rswitch->start_entry;
struct rvu_hwinfo *hw = rvu->hw;
- int pf, vf, numvfs, hwvf;
u16 pcifunc, entry = 0;
+ int pf, vf, numvfs;
int err;
for (pf = 1; pf < hw->total_pfs; pf++) {
rswitch->entry2pcifunc[entry++] = pcifunc;
- rvu_get_pf_numvfs(rvu, pf, &numvfs, &hwvf);
- for (vf = 0; vf < numvfs; vf++, hwvf++) {
+ rvu_get_pf_numvfs(rvu, pf, &numvfs, NULL);
+ for (vf = 0; vf < numvfs; vf++) {
pcifunc = pf << 10 | ((vf + 1) & 0x3FF);
rvu_get_nix_blkaddr(rvu, pcifunc);
struct npc_mcam_free_entry_req free_req = { 0 };
struct rvu_switch *rswitch = &rvu->rswitch;
struct rvu_hwinfo *hw = rvu->hw;
- int pf, vf, numvfs, hwvf;
+ int pf, vf, numvfs;
struct msg_rsp rsp;
u16 pcifunc;
int err;
"Reverting RX rule for PF%d failed(%d)\n",
pf, err);
- for (vf = 0; vf < numvfs; vf++, hwvf++) {
+ rvu_get_pf_numvfs(rvu, pf, &numvfs, NULL);
+ for (vf = 0; vf < numvfs; vf++) {
pcifunc = pf << 10 | ((vf + 1) & 0x3FF);
err = rvu_switch_install_rx_rule(rvu, pcifunc, 0xFFF);
if (err)
aq->sq.ena = 1;
/* Only one SMQ is allocated, map all SQ's to that SMQ */
aq->sq.smq = pfvf->hw.txschq_list[NIX_TXSCH_LVL_SMQ][0];
- /* FIXME: set based on NIX_AF_DWRR_RPM_MTU*/
- aq->sq.smq_rr_weight = pfvf->netdev->mtu;
+ aq->sq.smq_rr_weight = mtu_to_dwrr_weight(pfvf, pfvf->max_frs);
aq->sq.default_chan = pfvf->hw.tx_chan_base;
aq->sq.sqe_stype = NIX_STYPE_STF; /* Cache SQB */
aq->sq.sqb_aura = sqb_aura;
#include "otx2_common.h"
+static inline int mtu_to_dwrr_weight(struct otx2_nic *pfvf, int mtu)
+{
+ u32 weight;
+
+ /* On OTx2, since AF returns DWRR_MTU as '1', this logic
+ * will work on those silicons as well.
+ */
+ weight = mtu / pfvf->hw.dwrr_mtu;
+ if (mtu % pfvf->hw.dwrr_mtu)
+ weight += 1;
+
+ return weight;
+}
+
void cn10k_refill_pool_ptrs(void *dev, struct otx2_cq_queue *cq);
void cn10k_sqe_flush(void *dev, struct otx2_snd_queue *sq, int size, int qidx);
int cn10k_sq_aq_init(void *dev, u16 qidx, u16 sqb_aura);
struct otx2_hw *hw = &pfvf->hw;
struct nix_txschq_config *req;
u64 schq, parent;
+ u64 dwrr_val;
+
+ dwrr_val = mtu_to_dwrr_weight(pfvf, pfvf->max_frs);
req = otx2_mbox_alloc_msg_nix_txschq_cfg(&pfvf->mbox);
if (!req)
req->num_regs++;
/* Set DWRR quantum */
req->reg[2] = NIX_AF_MDQX_SCHEDULE(schq);
- req->regval[2] = DFLT_RR_QTM;
+ req->regval[2] = dwrr_val;
} else if (lvl == NIX_TXSCH_LVL_TL4) {
parent = hw->txschq_list[NIX_TXSCH_LVL_TL3][0];
req->reg[0] = NIX_AF_TL4X_PARENT(schq);
req->regval[0] = parent << 16;
req->num_regs++;
req->reg[1] = NIX_AF_TL4X_SCHEDULE(schq);
- req->regval[1] = DFLT_RR_QTM;
+ req->regval[1] = dwrr_val;
} else if (lvl == NIX_TXSCH_LVL_TL3) {
parent = hw->txschq_list[NIX_TXSCH_LVL_TL2][0];
req->reg[0] = NIX_AF_TL3X_PARENT(schq);
req->regval[0] = parent << 16;
req->num_regs++;
req->reg[1] = NIX_AF_TL3X_SCHEDULE(schq);
- req->regval[1] = DFLT_RR_QTM;
+ req->regval[1] = dwrr_val;
} else if (lvl == NIX_TXSCH_LVL_TL2) {
parent = hw->txschq_list[NIX_TXSCH_LVL_TL1][0];
req->reg[0] = NIX_AF_TL2X_PARENT(schq);
req->num_regs++;
req->reg[1] = NIX_AF_TL2X_SCHEDULE(schq);
- req->regval[1] = TXSCH_TL1_DFLT_RR_PRIO << 24 | DFLT_RR_QTM;
+ req->regval[1] = TXSCH_TL1_DFLT_RR_PRIO << 24 | dwrr_val;
req->num_regs++;
req->reg[2] = NIX_AF_TL3_TL2X_LINKX_CFG(schq,
* For VF this is always ignored.
*/
- /* Set DWRR quantum */
+ /* On CN10K, if RR_WEIGHT is greater than 16384, HW will
+ * clip it to 16384, so configuring a 24bit max value
+ * will work on both OTx2 and CN10K.
+ */
req->reg[0] = NIX_AF_TL1X_SCHEDULE(schq);
req->regval[0] = TXSCH_TL1_DFLT_RR_QTM;
aq->sq.ena = 1;
/* Only one SMQ is allocated, map all SQ's to that SMQ */
aq->sq.smq = pfvf->hw.txschq_list[NIX_TXSCH_LVL_SMQ][0];
- aq->sq.smq_rr_quantum = DFLT_RR_QTM;
+ aq->sq.smq_rr_quantum = mtu_to_dwrr_weight(pfvf, pfvf->max_frs);
aq->sq.default_chan = pfvf->hw.tx_chan_base;
aq->sq.sqe_stype = NIX_STYPE_STF; /* Cache SQB */
aq->sq.sqb_aura = sqb_aura;
aq->cq.drop = RQ_DROP_LVL_CQ(pfvf->hw.rq_skid, cq->cqe_cnt);
aq->cq.drop_ena = 1;
- /* Enable receive CQ backpressure */
- aq->cq.bp_ena = 1;
- aq->cq.bpid = pfvf->bpid[0];
+ if (!is_otx2_lbkvf(pfvf->pdev)) {
+ /* Enable receive CQ backpressure */
+ aq->cq.bp_ena = 1;
+ aq->cq.bpid = pfvf->bpid[0];
- /* Set backpressure level is same as cq pass level */
- aq->cq.bp = RQ_PASS_LVL_CQ(pfvf->hw.rq_skid, qset->rqe_cnt);
+ /* Set backpressure level is same as cq pass level */
+ aq->cq.bp = RQ_PASS_LVL_CQ(pfvf->hw.rq_skid, qset->rqe_cnt);
+ }
}
/* Fill AQ info */
aq->aura.fc_hyst_bits = 0; /* Store count on all updates */
/* Enable backpressure for RQ aura */
- if (aura_id < pfvf->hw.rqpool_cnt) {
+ if (aura_id < pfvf->hw.rqpool_cnt && !is_otx2_lbkvf(pfvf->pdev)) {
aq->aura.bp_ena = 0;
aq->aura.nix0_bpid = pfvf->bpid[0];
/* Set backpressure level for RQ's Aura */
* SMQ errors
*/
max_mtu = rsp->max_mtu - 8 - OTX2_ETH_HLEN;
+
+ /* Also save DWRR MTU, needed for DWRR weight calculation */
+ pfvf->hw.dwrr_mtu = rsp->rpm_dwrr_mtu;
+ if (!pfvf->hw.dwrr_mtu)
+ pfvf->hw.dwrr_mtu = 1;
}
out:
/* NIX */
u16 txschq_list[NIX_TXSCH_LVL_CNT][MAX_TXSCHQ_PER_FUNC];
u16 matchall_ipolicer;
+ u32 dwrr_mtu;
/* HW settings, coalescing etc */
u16 rx_chan_base;
err = otx2_set_real_num_queues(dev, channel->tx_count,
channel->rx_count);
if (err)
- goto fail;
+ return err;
pfvf->hw.rx_queues = channel->rx_count;
pfvf->hw.tx_queues = channel->tx_count;
pfvf->qset.cq_cnt = pfvf->hw.tx_queues + pfvf->hw.rx_queues;
-fail:
if (if_up)
- dev->netdev_ops->ndo_open(dev);
+ err = dev->netdev_ops->ndo_open(dev);
netdev_info(dev, "Setting num Tx rings to %d, Rx rings to %d success\n",
pfvf->hw.tx_queues, pfvf->hw.rx_queues);
qs->rqe_cnt = rx_count;
if (if_up)
- netdev->netdev_ops->ndo_open(netdev);
+ return netdev->netdev_ops->ndo_open(netdev);
return 0;
}
err_tx_stop_queues:
netif_tx_stop_all_queues(netdev);
netif_carrier_off(netdev);
+ pf->flags |= OTX2_FLAG_INTF_DOWN;
err_free_cints:
otx2_free_cints(pf, qidx);
vec = pci_irq_vector(pf->pdev,
struct otx2_rss_info *rss;
int qidx, vec, wrk;
+ /* If the DOWN flag is set resources are already freed */
+ if (pf->flags & OTX2_FLAG_INTF_DOWN)
+ return 0;
+
netif_carrier_off(netdev);
netif_tx_stop_all_queues(netdev);
.trap_drop_counter_get = prestera_drop_counter_get,
};
-struct prestera_switch *prestera_devlink_alloc(void)
+struct prestera_switch *prestera_devlink_alloc(struct prestera_device *dev)
{
struct devlink *dl;
- dl = devlink_alloc(&prestera_dl_ops, sizeof(struct prestera_switch));
+ dl = devlink_alloc(&prestera_dl_ops, sizeof(struct prestera_switch),
+ dev->dev);
return devlink_priv(dl);
}
struct devlink *dl = priv_to_devlink(sw);
int err;
- err = devlink_register(dl, sw->dev->dev);
+ err = devlink_register(dl);
if (err) {
dev_err(prestera_dev(sw), "devlink_register failed: %d\n", err);
return err;
prestera_trap = &prestera_trap_items_arr[i];
devlink_traps_unregister(devlink, &prestera_trap->trap, 1);
}
+ devlink_trap_groups_unregister(devlink, prestera_trap_groups_arr,
+ groups_count);
err_groups_register:
kfree(trap_data->trap_items_arr);
err_trap_items_alloc:
#include "prestera.h"
-struct prestera_switch *prestera_devlink_alloc(void);
+struct prestera_switch *prestera_devlink_alloc(struct prestera_device *dev);
void prestera_devlink_free(struct prestera_switch *sw);
int prestera_devlink_register(struct prestera_switch *sw);
struct prestera_switch *sw;
int err;
- sw = prestera_devlink_alloc();
+ sw = prestera_devlink_alloc(dev);
if (!sw)
return -ENOMEM;
/* This driver supports yukon2 chipset only */
static const char *sky2_name(u8 chipid, char *buf, int sz)
{
- const char *name[] = {
+ static const char *const name[] = {
"XL", /* 0xb3 */
"EC Ultra", /* 0xb4 */
"Extreme", /* 0xb5 */
if (!SRIOV_VALID_STATE(dev->flags)) {
mlx4_err(dev, "Invalid SRIOV state\n");
+ err = -EINVAL;
goto err_close;
}
}
printk_once(KERN_INFO "%s", mlx4_version);
- devlink = devlink_alloc(&mlx4_devlink_ops, sizeof(*priv));
+ devlink = devlink_alloc(&mlx4_devlink_ops, sizeof(*priv), &pdev->dev);
if (!devlink)
return -ENOMEM;
priv = devlink_priv(devlink);
mutex_init(&dev->persist->interface_state_mutex);
mutex_init(&dev->persist->pci_status_mutex);
- ret = devlink_register(devlink, &pdev->dev);
+ ret = devlink_register(devlink);
if (ret)
goto err_persist_free;
ret = devlink_params_register(devlink, mlx4_devlink_params,
{
int err;
int i;
- enum mlx4_qp_state states[] = {
+ static const enum mlx4_qp_state states[] = {
MLX4_QP_STATE_RST,
MLX4_QP_STATE_INIT,
MLX4_QP_STATE_RTR,
health.o mcg.o cq.o alloc.o port.o mr.o pd.o \
transobj.o vport.o sriov.o fs_cmd.o fs_core.o pci_irq.o \
fs_counters.o fs_ft_pool.o rl.o lag.o dev.o events.o wq.o lib/gid.o \
- lib/devcom.o lib/pci_vsc.o lib/dm.o diag/fs_tracepoint.o \
+ lib/devcom.o lib/pci_vsc.o lib/dm.o lib/fs_ttc.o diag/fs_tracepoint.o \
diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o \
fw_reset.o qos.o
en/reporter_tx.o en/reporter_rx.o en/params.o en/xsk/pool.o \
en/xsk/setup.o en/xsk/rx.o en/xsk/tx.o en/devlink.o en/ptp.o \
en/qos.o en/trap.o en/fs_tt_redirect.o en/rqt.o en/tir.o \
- en/rx_res.o
+ en/rx_res.o en/channels.o
#
# Netdev extra
int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
u32 *in, int inlen, u32 *out, int outlen)
{
- int eqn = MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context), c_eqn);
+ int eqn = MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context),
+ c_eqn_or_apu_element);
u32 din[MLX5_ST_SZ_DW(destroy_cq_in)] = {};
struct mlx5_eq_comp *eq;
int err;
return 1;
}
-/* This function is called with two flows:
- * 1. During initialization of mlx5_core_dev and we don't need to lock it.
- * 2. During LAG configure stage and caller holds &mlx5_intf_mutex.
- */
+/* Must be called with intf_mutex held */
struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev)
{
struct auxiliary_device *adev;
return 0;
}
-struct devlink *mlx5_devlink_alloc(void)
+struct devlink *mlx5_devlink_alloc(struct device *dev)
{
- return devlink_alloc(&mlx5_devlink_ops, sizeof(struct mlx5_core_dev));
+ return devlink_alloc(&mlx5_devlink_ops, sizeof(struct mlx5_core_dev),
+ dev);
}
void mlx5_devlink_free(struct devlink *devlink)
ARRAY_SIZE(mlx5_trap_groups_arr));
}
-int mlx5_devlink_register(struct devlink *devlink, struct device *dev)
+int mlx5_devlink_register(struct devlink *devlink)
{
int err;
- err = devlink_register(devlink, dev);
+ err = devlink_register(devlink);
if (err)
return err;
int mlx5_devlink_traps_get_action(struct mlx5_core_dev *dev, int trap_id,
enum devlink_trap_action *action);
-struct devlink *mlx5_devlink_alloc(void);
+struct devlink *mlx5_devlink_alloc(struct device *dev);
void mlx5_devlink_free(struct devlink *devlink);
-int mlx5_devlink_register(struct devlink *devlink, struct device *dev);
+int mlx5_devlink_register(struct devlink *devlink);
void mlx5_devlink_unregister(struct devlink *devlink);
#endif /* __MLX5_DEVLINK_H__ */
#define MLX5E_METADATA_ETHER_TYPE (0x8CE4)
#define MLX5E_METADATA_ETHER_LEN 8
-#define MLX5_SET_CFG(p, f, v) MLX5_SET(create_flow_group_in, p, f, v)
-
#define MLX5E_ETH_HARD_MTU (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN)
#define MLX5E_HW2SW_MTU(params, hwmtu) ((hwmtu) - ((params)->hard_mtu))
#define MLX5E_PARAMS_DEFAULT_MIN_RX_WQES_MPW 0x2
#define MLX5E_MIN_NUM_CHANNELS 0x1
+#define MLX5E_MAX_NUM_CHANNELS (MLX5E_INDIR_RQT_SIZE / 2)
#define MLX5E_MAX_NUM_SQS (MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC)
#define MLX5E_TX_CQ_POLL_BUDGET 128
#define MLX5E_TX_XSK_POLL_BUDGET 64
u16 vid);
void mlx5e_timestamp_init(struct mlx5e_priv *priv);
-int mlx5e_modify_tirs_hash(struct mlx5e_priv *priv);
-
struct mlx5e_xsk_param;
struct mlx5e_rq_param;
void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv);
int mlx5e_ptp_rx_manage_fs_ctx(struct mlx5e_priv *priv, void *ctx);
-void mlx5e_build_default_indir_rqt(u32 *indirection_rqt, int len,
- int num_channels);
-
int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_state);
void mlx5e_activate_rq(struct mlx5e_rq *rq);
void mlx5e_deactivate_rq(struct mlx5e_rq *rq);
int mlx5e_init_di_list(struct mlx5e_rq *rq, int wq_sz, int node);
void mlx5e_free_di_list(struct mlx5e_rq *rq);
-int mlx5e_create_indirect_rqt(struct mlx5e_priv *priv);
-
-int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv, bool inner_ttc);
-void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv);
-
-int mlx5e_create_direct_rqts(struct mlx5e_priv *priv);
-void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv);
-int mlx5e_create_direct_tirs(struct mlx5e_priv *priv);
-void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv);
-
int mlx5e_create_tis(struct mlx5_core_dev *mdev, void *in, u32 *tisn);
void mlx5e_destroy_tis(struct mlx5_core_dev *mdev, u32 tisn);
void mlx5e_netdev_attach_nic_profile(struct mlx5e_priv *priv);
void mlx5e_set_netdev_mtu_boundaries(struct mlx5e_priv *priv);
void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 mtu);
-void mlx5e_build_rss_params(struct mlx5e_rss_params *rss_params,
- u16 num_channels);
void mlx5e_rx_dim_work(struct work_struct *work);
void mlx5e_tx_dim_work(struct work_struct *work);
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */
+
+#include "channels.h"
+#include "en.h"
+#include "en/ptp.h"
+
+unsigned int mlx5e_channels_get_num(struct mlx5e_channels *chs)
+{
+ return chs->num;
+}
+
+void mlx5e_channels_get_regular_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn)
+{
+ struct mlx5e_channel *c;
+
+ WARN_ON(ix >= mlx5e_channels_get_num(chs));
+ c = chs->c[ix];
+
+ *rqn = c->rq.rqn;
+}
+
+bool mlx5e_channels_get_xsk_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn)
+{
+ struct mlx5e_channel *c;
+
+ WARN_ON(ix >= mlx5e_channels_get_num(chs));
+ c = chs->c[ix];
+
+ if (!test_bit(MLX5E_CHANNEL_STATE_XSK, c->state))
+ return false;
+
+ *rqn = c->xskrq.rqn;
+ return true;
+}
+
+bool mlx5e_channels_get_ptp_rqn(struct mlx5e_channels *chs, u32 *rqn)
+{
+ struct mlx5e_ptp *c = chs->ptp;
+
+ if (!c || !test_bit(MLX5E_PTP_STATE_RX, c->state))
+ return false;
+
+ *rqn = c->rq.rqn;
+ return true;
+}
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */
+
+#ifndef __MLX5_EN_CHANNELS_H__
+#define __MLX5_EN_CHANNELS_H__
+
+#include <linux/kernel.h>
+
+struct mlx5e_channels;
+
+unsigned int mlx5e_channels_get_num(struct mlx5e_channels *chs);
+void mlx5e_channels_get_regular_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn);
+bool mlx5e_channels_get_xsk_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn);
+bool mlx5e_channels_get_ptp_rqn(struct mlx5e_channels *chs, u32 *rqn);
+
+#endif /* __MLX5_EN_CHANNELS_H__ */
#define __MLX5E_FLOW_STEER_H__
#include "mod_hdr.h"
+#include "lib/fs_ttc.h"
enum {
MLX5E_TC_FT_LEVEL = 0,
bool promisc_enabled;
};
-enum mlx5e_traffic_types {
- MLX5E_TT_IPV4_TCP,
- MLX5E_TT_IPV6_TCP,
- MLX5E_TT_IPV4_UDP,
- MLX5E_TT_IPV6_UDP,
- MLX5E_TT_IPV4_IPSEC_AH,
- MLX5E_TT_IPV6_IPSEC_AH,
- MLX5E_TT_IPV4_IPSEC_ESP,
- MLX5E_TT_IPV6_IPSEC_ESP,
- MLX5E_TT_IPV4,
- MLX5E_TT_IPV6,
- MLX5E_TT_ANY,
- MLX5E_NUM_TT,
- MLX5E_NUM_INDIR_TIRS = MLX5E_TT_ANY,
-};
+#define MLX5E_NUM_INDIR_TIRS (MLX5_NUM_TT - 1)
#define MLX5_HASH_IP (MLX5_HASH_FIELD_SEL_SRC_IP |\
MLX5_HASH_FIELD_SEL_DST_IP)
MLX5_HASH_FIELD_SEL_DST_IP |\
MLX5_HASH_FIELD_SEL_IPSEC_SPI)
-enum mlx5e_tunnel_types {
- MLX5E_TT_IPV4_GRE,
- MLX5E_TT_IPV6_GRE,
- MLX5E_TT_IPV4_IPIP,
- MLX5E_TT_IPV6_IPIP,
- MLX5E_TT_IPV4_IPV6,
- MLX5E_TT_IPV6_IPV6,
- MLX5E_NUM_TUNNEL_TT,
-};
-
-bool mlx5e_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev);
-
-struct mlx5e_ttc_rule {
- struct mlx5_flow_handle *rule;
- struct mlx5_flow_destination default_dest;
-};
-
-/* L3/L4 traffic type classifier */
-struct mlx5e_ttc_table {
- struct mlx5e_flow_table ft;
- struct mlx5e_ttc_rule rules[MLX5E_NUM_TT];
- struct mlx5_flow_handle *tunnel_rules[MLX5E_NUM_TUNNEL_TT];
-};
-
/* NIC prio FTS */
enum {
MLX5E_PROMISC_FT_LEVEL,
#endif
};
-#define MLX5E_TTC_NUM_GROUPS 3
-#define MLX5E_TTC_GROUP1_SIZE (BIT(3) + MLX5E_NUM_TUNNEL_TT)
-#define MLX5E_TTC_GROUP2_SIZE BIT(1)
-#define MLX5E_TTC_GROUP3_SIZE BIT(0)
-#define MLX5E_TTC_TABLE_SIZE (MLX5E_TTC_GROUP1_SIZE +\
- MLX5E_TTC_GROUP2_SIZE +\
- MLX5E_TTC_GROUP3_SIZE)
-
-#define MLX5E_INNER_TTC_NUM_GROUPS 3
-#define MLX5E_INNER_TTC_GROUP1_SIZE BIT(3)
-#define MLX5E_INNER_TTC_GROUP2_SIZE BIT(1)
-#define MLX5E_INNER_TTC_GROUP3_SIZE BIT(0)
-#define MLX5E_INNER_TTC_TABLE_SIZE (MLX5E_INNER_TTC_GROUP1_SIZE +\
- MLX5E_INNER_TTC_GROUP2_SIZE +\
- MLX5E_INNER_TTC_GROUP3_SIZE)
-
struct mlx5e_priv;
#ifdef CONFIG_MLX5_EN_RXNFC
struct mlx5e_promisc_table promisc;
struct mlx5e_vlan_table *vlan;
struct mlx5e_l2_table l2;
- struct mlx5e_ttc_table ttc;
- struct mlx5e_ttc_table inner_ttc;
+ struct mlx5_ttc_table *ttc;
+ struct mlx5_ttc_table *inner_ttc;
#ifdef CONFIG_MLX5_EN_ARFS
struct mlx5e_arfs_tables *arfs;
#endif
struct mlx5e_ptp_fs *ptp_fs;
};
-struct ttc_params {
- struct mlx5_flow_table_attr ft_attr;
- u32 any_tt_tirn;
- u32 indir_tirn[MLX5E_NUM_INDIR_TIRS];
- struct mlx5e_ttc_table *inner_ttc;
-};
-
-void mlx5e_set_ttc_basic_params(struct mlx5e_priv *priv, struct ttc_params *ttc_params);
-void mlx5e_set_ttc_ft_params(struct ttc_params *ttc_params);
+void mlx5e_set_ttc_params(struct mlx5e_priv *priv,
+ struct ttc_params *ttc_params, bool tunnel);
-int mlx5e_create_ttc_table(struct mlx5e_priv *priv, struct ttc_params *params,
- struct mlx5e_ttc_table *ttc);
-void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv,
- struct mlx5e_ttc_table *ttc);
+void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv);
+int mlx5e_create_ttc_table(struct mlx5e_priv *priv);
void mlx5e_destroy_flow_table(struct mlx5e_flow_table *ft);
-int mlx5e_ttc_fwd_dest(struct mlx5e_priv *priv, enum mlx5e_traffic_types type,
- struct mlx5_flow_destination *new_dest);
-struct mlx5_flow_destination
-mlx5e_ttc_get_default_dest(struct mlx5e_priv *priv, enum mlx5e_traffic_types type);
-int mlx5e_ttc_fwd_default_dest(struct mlx5e_priv *priv, enum mlx5e_traffic_types type);
void mlx5e_enable_cvlan_filter(struct mlx5e_priv *priv);
void mlx5e_disable_cvlan_filter(struct mlx5e_priv *priv);
int mlx5e_create_flow_steering(struct mlx5e_priv *priv);
void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv);
-u8 mlx5e_get_proto_by_tunnel_type(enum mlx5e_tunnel_types tt);
int mlx5e_add_vlan_trap(struct mlx5e_priv *priv, int trap_id, int tir_num);
void mlx5e_remove_vlan_trap(struct mlx5e_priv *priv);
int mlx5e_add_mac_trap(struct mlx5e_priv *priv, int trap_id, int tir_num);
}
}
-static enum mlx5e_traffic_types fs_udp2tt(enum fs_udp_type i)
+static enum mlx5_traffic_types fs_udp2tt(enum fs_udp_type i)
{
switch (i) {
case FS_IPV4_UDP:
- return MLX5E_TT_IPV4_UDP;
+ return MLX5_TT_IPV4_UDP;
default: /* FS_IPV6_UDP */
- return MLX5E_TT_IPV6_UDP;
+ return MLX5_TT_IPV6_UDP;
}
}
-static enum fs_udp_type tt2fs_udp(enum mlx5e_traffic_types i)
+static enum fs_udp_type tt2fs_udp(enum mlx5_traffic_types i)
{
switch (i) {
- case MLX5E_TT_IPV4_UDP:
+ case MLX5_TT_IPV4_UDP:
return FS_IPV4_UDP;
- case MLX5E_TT_IPV6_UDP:
+ case MLX5_TT_IPV6_UDP:
return FS_IPV6_UDP;
default:
return FS_UDP_NUM_TYPES;
struct mlx5_flow_handle *
mlx5e_fs_tt_redirect_udp_add_rule(struct mlx5e_priv *priv,
- enum mlx5e_traffic_types ttc_type,
+ enum mlx5_traffic_types ttc_type,
u32 tir_num, u16 d_port)
{
enum fs_udp_type type = tt2fs_udp(ttc_type);
fs_udp = priv->fs.udp;
fs_udp_t = &fs_udp->tables[type];
- dest = mlx5e_ttc_get_default_dest(priv, fs_udp2tt(type));
+ dest = mlx5_ttc_get_default_dest(priv->fs.ttc, fs_udp2tt(type));
rule = mlx5_add_flow_rules(fs_udp_t->t, NULL, &flow_act, &dest, 1);
if (IS_ERR(rule)) {
err = PTR_ERR(rule);
for (i = 0; i < FS_UDP_NUM_TYPES; i++) {
/* Modify ttc rules destination to point back to the indir TIRs */
- err = mlx5e_ttc_fwd_default_dest(priv, fs_udp2tt(i));
+ err = mlx5_ttc_fwd_default_dest(priv->fs.ttc, fs_udp2tt(i));
if (err) {
netdev_err(priv->netdev,
"%s: modify ttc[%d] default destination failed, err(%d)\n",
dest.ft = priv->fs.udp->tables[i].t;
/* Modify ttc rules destination to point on the accel_fs FTs */
- err = mlx5e_ttc_fwd_dest(priv, fs_udp2tt(i), &dest);
+ err = mlx5_ttc_fwd_dest(priv->fs.ttc, fs_udp2tt(i), &dest);
if (err) {
netdev_err(priv->netdev,
"%s: modify ttc[%d] destination to accel failed, err(%d)\n",
fs_any = priv->fs.any;
fs_any_t = &fs_any->table;
- dest = mlx5e_ttc_get_default_dest(priv, MLX5E_TT_ANY);
+ dest = mlx5_ttc_get_default_dest(priv->fs.ttc, MLX5_TT_ANY);
rule = mlx5_add_flow_rules(fs_any_t->t, NULL, &flow_act, &dest, 1);
if (IS_ERR(rule)) {
err = PTR_ERR(rule);
int err;
/* Modify ttc rules destination to point back to the indir TIRs */
- err = mlx5e_ttc_fwd_default_dest(priv, MLX5E_TT_ANY);
+ err = mlx5_ttc_fwd_default_dest(priv->fs.ttc, MLX5_TT_ANY);
if (err) {
netdev_err(priv->netdev,
"%s: modify ttc[%d] default destination failed, err(%d)\n",
- __func__, MLX5E_TT_ANY, err);
+ __func__, MLX5_TT_ANY, err);
return err;
}
return 0;
dest.ft = priv->fs.any->table.t;
/* Modify ttc rules destination to point on the accel_fs FTs */
- err = mlx5e_ttc_fwd_dest(priv, MLX5E_TT_ANY, &dest);
+ err = mlx5_ttc_fwd_dest(priv->fs.ttc, MLX5_TT_ANY, &dest);
if (err) {
netdev_err(priv->netdev,
"%s: modify ttc[%d] destination to accel failed, err(%d)\n",
- __func__, MLX5E_TT_ANY, err);
+ __func__, MLX5_TT_ANY, err);
return err;
}
return 0;
/* UDP traffic type redirect */
struct mlx5_flow_handle *
mlx5e_fs_tt_redirect_udp_add_rule(struct mlx5e_priv *priv,
- enum mlx5e_traffic_types ttc_type,
+ enum mlx5_traffic_types ttc_type,
u32 tir_num, u16 d_port);
void mlx5e_fs_tt_redirect_udp_destroy(struct mlx5e_priv *priv);
int mlx5e_fs_tt_redirect_udp_create(struct mlx5e_priv *priv);
#include <linux/slab.h>
#include <linux/xarray.h>
#include <linux/hashtable.h>
+#include <linux/refcount.h>
#include "mapping.h"
#define MAPPING_GRACE_PERIOD 2000
+static LIST_HEAD(shared_ctx_list);
+static DEFINE_MUTEX(shared_ctx_lock);
+
struct mapping_ctx {
struct xarray xarray;
DECLARE_HASHTABLE(ht, 8);
struct delayed_work dwork;
struct list_head pending_list;
spinlock_t pending_list_lock; /* Guards pending list */
+ u64 id;
+ u8 type;
+ struct list_head list;
+ refcount_t refcount;
};
struct mapping_item {
mutex_init(&ctx->lock);
xa_init_flags(&ctx->xarray, XA_FLAGS_ALLOC1);
+ refcount_set(&ctx->refcount, 1);
+ INIT_LIST_HEAD(&ctx->list);
+
+ return ctx;
+}
+
+struct mapping_ctx *
+mapping_create_for_id(u64 id, u8 type, size_t data_size, u32 max_id, bool delayed_removal)
+{
+ struct mapping_ctx *ctx;
+
+ mutex_lock(&shared_ctx_lock);
+ list_for_each_entry(ctx, &shared_ctx_list, list) {
+ if (ctx->id == id && ctx->type == type) {
+ if (refcount_inc_not_zero(&ctx->refcount))
+ goto unlock;
+ break;
+ }
+ }
+
+ ctx = mapping_create(data_size, max_id, delayed_removal);
+ if (IS_ERR(ctx))
+ goto unlock;
+
+ ctx->id = id;
+ ctx->type = type;
+ list_add(&ctx->list, &shared_ctx_list);
+
+unlock:
+ mutex_unlock(&shared_ctx_lock);
return ctx;
}
void mapping_destroy(struct mapping_ctx *ctx)
{
+ if (!refcount_dec_and_test(&ctx->refcount))
+ return;
+
+ mutex_lock(&shared_ctx_lock);
+ list_del(&ctx->list);
+ mutex_unlock(&shared_ctx_lock);
+
mapping_flush_work(ctx);
xa_destroy(&ctx->xarray);
mutex_destroy(&ctx->lock);
bool delayed_removal);
void mapping_destroy(struct mapping_ctx *ctx);
+/* adds mapping with an id or get an existing mapping with the same id
+ */
+struct mapping_ctx *
+mapping_create_for_id(u64 id, u8 type, size_t data_size, u32 max_id, bool delayed_removal);
+
#endif /* __MLX5_MAPPING_H__ */
param->cq_period_mode = params->rx_cq_moderation.cq_period_mode;
}
+static u8 rq_end_pad_mode(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
+{
+ bool ro = pcie_relaxed_ordering_enabled(mdev->pdev) &&
+ MLX5_CAP_GEN(mdev, relaxed_ordering_write);
+
+ return ro && params->lro_en ?
+ MLX5_WQ_END_PAD_MODE_NONE : MLX5_WQ_END_PAD_MODE_ALIGN;
+}
+
int mlx5e_build_rq_param(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk,
}
MLX5_SET(wq, wq, wq_type, params->rq_wq_type);
- MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN);
+ MLX5_SET(wq, wq, end_padding_mode, rq_end_pad_mode(mdev, params));
MLX5_SET(wq, wq, log_wq_stride,
mlx5e_get_rqwq_log_stride(params->rq_wq_type, ndsegs));
MLX5_SET(wq, wq, pd, mdev->mlx5e_res.hw_objs.pdn);
params->log_sq_size = orig->log_sq_size;
mlx5e_ptp_build_sq_param(c->mdev, params, &cparams->txq_sq_param);
}
- if (test_bit(MLX5E_PTP_STATE_RX, c->state))
+ /* RQ */
+ if (test_bit(MLX5E_PTP_STATE_RX, c->state)) {
+ params->vlan_strip_disable = orig->vlan_strip_disable;
mlx5e_ptp_build_rq_param(c->mdev, c->netdev, c->priv->q_counter, cparams);
+ }
}
static int mlx5e_init_ptp_rq(struct mlx5e_ptp *c, struct mlx5e_params *params,
int err;
rq->wq_type = params->rq_wq_type;
- rq->pdev = mdev->device;
+ rq->pdev = c->pdev;
rq->netdev = priv->netdev;
rq->priv = priv;
rq->clock = &mdev->clock;
static int mlx5e_ptp_rx_set_fs(struct mlx5e_priv *priv)
{
+ u32 tirn = mlx5e_rx_res_get_tirn_ptp(priv->rx_res);
struct mlx5e_ptp_fs *ptp_fs = priv->fs.ptp_fs;
- u32 tirn = priv->rx_res->ptp.tir.tirn;
struct mlx5_flow_handle *rule;
int err;
if (err)
goto out_free;
- rule = mlx5e_fs_tt_redirect_udp_add_rule(priv, MLX5E_TT_IPV4_UDP,
+ rule = mlx5e_fs_tt_redirect_udp_add_rule(priv, MLX5_TT_IPV4_UDP,
tirn, PTP_EV_PORT);
if (IS_ERR(rule)) {
err = PTR_ERR(rule);
}
ptp_fs->udp_v4_rule = rule;
- rule = mlx5e_fs_tt_redirect_udp_add_rule(priv, MLX5E_TT_IPV6_UDP,
+ rule = mlx5e_fs_tt_redirect_udp_add_rule(priv, MLX5_TT_IPV6_UDP,
tirn, PTP_EV_PORT);
if (IS_ERR(rule)) {
err = PTR_ERR(rule);
#include "rqt.h"
#include <linux/mlx5/transobj.h>
+void mlx5e_rss_params_indir_init_uniform(struct mlx5e_rss_params_indir *indir,
+ unsigned int num_channels)
+{
+ unsigned int i;
+
+ for (i = 0; i < MLX5E_INDIR_RQT_SIZE; i++)
+ indir->table[i] = i % num_channels;
+}
+
static int mlx5e_rqt_init(struct mlx5e_rqt *rqt, struct mlx5_core_dev *mdev,
u16 max_size, u32 *init_rqns, u16 init_size)
{
u32 table[MLX5E_INDIR_RQT_SIZE];
};
+void mlx5e_rss_params_indir_init_uniform(struct mlx5e_rss_params_indir *indir,
+ unsigned int num_channels);
+
struct mlx5e_rqt {
struct mlx5_core_dev *mdev;
u32 rqtn;
/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */
#include "rx_res.h"
+#include "channels.h"
+#include "params.h"
static const struct mlx5e_rss_params_traffic_type rss_default_config[MLX5E_NUM_INDIR_TIRS] = {
- [MLX5E_TT_IPV4_TCP] = {
+ [MLX5_TT_IPV4_TCP] = {
.l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
.l4_prot_type = MLX5_L4_PROT_TYPE_TCP,
.rx_hash_fields = MLX5_HASH_IP_L4PORTS,
},
- [MLX5E_TT_IPV6_TCP] = {
+ [MLX5_TT_IPV6_TCP] = {
.l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
.l4_prot_type = MLX5_L4_PROT_TYPE_TCP,
.rx_hash_fields = MLX5_HASH_IP_L4PORTS,
},
- [MLX5E_TT_IPV4_UDP] = {
+ [MLX5_TT_IPV4_UDP] = {
.l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
.l4_prot_type = MLX5_L4_PROT_TYPE_UDP,
.rx_hash_fields = MLX5_HASH_IP_L4PORTS,
},
- [MLX5E_TT_IPV6_UDP] = {
+ [MLX5_TT_IPV6_UDP] = {
.l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
.l4_prot_type = MLX5_L4_PROT_TYPE_UDP,
.rx_hash_fields = MLX5_HASH_IP_L4PORTS,
},
- [MLX5E_TT_IPV4_IPSEC_AH] = {
+ [MLX5_TT_IPV4_IPSEC_AH] = {
.l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
.l4_prot_type = 0,
.rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI,
},
- [MLX5E_TT_IPV6_IPSEC_AH] = {
+ [MLX5_TT_IPV6_IPSEC_AH] = {
.l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
.l4_prot_type = 0,
.rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI,
},
- [MLX5E_TT_IPV4_IPSEC_ESP] = {
+ [MLX5_TT_IPV4_IPSEC_ESP] = {
.l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
.l4_prot_type = 0,
.rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI,
},
- [MLX5E_TT_IPV6_IPSEC_ESP] = {
+ [MLX5_TT_IPV6_IPSEC_ESP] = {
.l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
.l4_prot_type = 0,
.rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI,
},
- [MLX5E_TT_IPV4] = {
+ [MLX5_TT_IPV4] = {
.l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
.l4_prot_type = 0,
.rx_hash_fields = MLX5_HASH_IP,
},
- [MLX5E_TT_IPV6] = {
+ [MLX5_TT_IPV6] = {
.l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
.l4_prot_type = 0,
.rx_hash_fields = MLX5_HASH_IP,
};
struct mlx5e_rss_params_traffic_type
-mlx5e_rss_get_default_tt_config(enum mlx5e_traffic_types tt)
+mlx5e_rss_get_default_tt_config(enum mlx5_traffic_types tt)
{
return rss_default_config[tt];
}
+struct mlx5e_rx_res {
+ struct mlx5_core_dev *mdev;
+ enum mlx5e_rx_res_features features;
+ unsigned int max_nch;
+ u32 drop_rqn;
+
+ struct {
+ struct mlx5e_rss_params_hash hash;
+ struct mlx5e_rss_params_indir indir;
+ u32 rx_hash_fields[MLX5E_NUM_INDIR_TIRS];
+ } rss_params;
+
+ struct mlx5e_rqt indir_rqt;
+ struct {
+ struct mlx5e_tir indir_tir;
+ struct mlx5e_tir inner_indir_tir;
+ } rss[MLX5E_NUM_INDIR_TIRS];
+
+ bool rss_active;
+ u32 rss_rqns[MLX5E_INDIR_RQT_SIZE];
+ unsigned int rss_nch;
+
+ struct {
+ struct mlx5e_rqt direct_rqt;
+ struct mlx5e_tir direct_tir;
+ struct mlx5e_rqt xsk_rqt;
+ struct mlx5e_tir xsk_tir;
+ } *channels;
+
+ struct {
+ struct mlx5e_rqt rqt;
+ struct mlx5e_tir tir;
+ } ptp;
+};
+
+struct mlx5e_rx_res *mlx5e_rx_res_alloc(void)
+{
+ return kvzalloc(sizeof(struct mlx5e_rx_res), GFP_KERNEL);
+}
+
+static void mlx5e_rx_res_rss_params_init(struct mlx5e_rx_res *res, unsigned int init_nch)
+{
+ enum mlx5_traffic_types tt;
+
+ res->rss_params.hash.hfunc = ETH_RSS_HASH_TOP;
+ netdev_rss_key_fill(res->rss_params.hash.toeplitz_hash_key,
+ sizeof(res->rss_params.hash.toeplitz_hash_key));
+ mlx5e_rss_params_indir_init_uniform(&res->rss_params.indir, init_nch);
+ for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
+ res->rss_params.rx_hash_fields[tt] =
+ mlx5e_rss_get_default_tt_config(tt).rx_hash_fields;
+}
+
+static int mlx5e_rx_res_rss_init(struct mlx5e_rx_res *res,
+ const struct mlx5e_lro_param *init_lro_param)
+{
+ bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT;
+ enum mlx5_traffic_types tt, max_tt;
+ struct mlx5e_tir_builder *builder;
+ u32 indir_rqtn;
+ int err;
+
+ builder = mlx5e_tir_builder_alloc(false);
+ if (!builder)
+ return -ENOMEM;
+
+ err = mlx5e_rqt_init_direct(&res->indir_rqt, res->mdev, true, res->drop_rqn);
+ if (err)
+ goto out;
+
+ indir_rqtn = mlx5e_rqt_get_rqtn(&res->indir_rqt);
+
+ for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
+ struct mlx5e_rss_params_traffic_type rss_tt;
+
+ mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn,
+ indir_rqtn, inner_ft_support);
+ mlx5e_tir_builder_build_lro(builder, init_lro_param);
+ rss_tt = mlx5e_rx_res_rss_get_current_tt_config(res, tt);
+ mlx5e_tir_builder_build_rss(builder, &res->rss_params.hash, &rss_tt, false);
+
+ err = mlx5e_tir_init(&res->rss[tt].indir_tir, builder, res->mdev, true);
+ if (err) {
+ mlx5_core_warn(res->mdev, "Failed to create an indirect TIR: err = %d, tt = %d\n",
+ err, tt);
+ goto err_destroy_tirs;
+ }
+
+ mlx5e_tir_builder_clear(builder);
+ }
+
+ if (!inner_ft_support)
+ goto out;
+
+ for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
+ struct mlx5e_rss_params_traffic_type rss_tt;
+
+ mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn,
+ indir_rqtn, inner_ft_support);
+ mlx5e_tir_builder_build_lro(builder, init_lro_param);
+ rss_tt = mlx5e_rx_res_rss_get_current_tt_config(res, tt);
+ mlx5e_tir_builder_build_rss(builder, &res->rss_params.hash, &rss_tt, true);
+
+ err = mlx5e_tir_init(&res->rss[tt].inner_indir_tir, builder, res->mdev, true);
+ if (err) {
+ mlx5_core_warn(res->mdev, "Failed to create an inner indirect TIR: err = %d, tt = %d\n",
+ err, tt);
+ goto err_destroy_inner_tirs;
+ }
+
+ mlx5e_tir_builder_clear(builder);
+ }
+
+ goto out;
+
+err_destroy_inner_tirs:
+ max_tt = tt;
+ for (tt = 0; tt < max_tt; tt++)
+ mlx5e_tir_destroy(&res->rss[tt].inner_indir_tir);
+
+ tt = MLX5E_NUM_INDIR_TIRS;
+err_destroy_tirs:
+ max_tt = tt;
+ for (tt = 0; tt < max_tt; tt++)
+ mlx5e_tir_destroy(&res->rss[tt].indir_tir);
+
+ mlx5e_rqt_destroy(&res->indir_rqt);
+
+out:
+ mlx5e_tir_builder_free(builder);
+
+ return err;
+}
+
+static int mlx5e_rx_res_channels_init(struct mlx5e_rx_res *res,
+ const struct mlx5e_lro_param *init_lro_param)
+{
+ bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT;
+ struct mlx5e_tir_builder *builder;
+ int err = 0;
+ int ix;
+
+ builder = mlx5e_tir_builder_alloc(false);
+ if (!builder)
+ return -ENOMEM;
+
+ res->channels = kvcalloc(res->max_nch, sizeof(*res->channels), GFP_KERNEL);
+ if (!res->channels) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ for (ix = 0; ix < res->max_nch; ix++) {
+ err = mlx5e_rqt_init_direct(&res->channels[ix].direct_rqt,
+ res->mdev, false, res->drop_rqn);
+ if (err) {
+ mlx5_core_warn(res->mdev, "Failed to create a direct RQT: err = %d, ix = %u\n",
+ err, ix);
+ goto err_destroy_direct_rqts;
+ }
+ }
+
+ for (ix = 0; ix < res->max_nch; ix++) {
+ mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn,
+ mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt),
+ inner_ft_support);
+ mlx5e_tir_builder_build_lro(builder, init_lro_param);
+ mlx5e_tir_builder_build_direct(builder);
+
+ err = mlx5e_tir_init(&res->channels[ix].direct_tir, builder, res->mdev, true);
+ if (err) {
+ mlx5_core_warn(res->mdev, "Failed to create a direct TIR: err = %d, ix = %u\n",
+ err, ix);
+ goto err_destroy_direct_tirs;
+ }
+
+ mlx5e_tir_builder_clear(builder);
+ }
+
+ if (!(res->features & MLX5E_RX_RES_FEATURE_XSK))
+ goto out;
+
+ for (ix = 0; ix < res->max_nch; ix++) {
+ err = mlx5e_rqt_init_direct(&res->channels[ix].xsk_rqt,
+ res->mdev, false, res->drop_rqn);
+ if (err) {
+ mlx5_core_warn(res->mdev, "Failed to create an XSK RQT: err = %d, ix = %u\n",
+ err, ix);
+ goto err_destroy_xsk_rqts;
+ }
+ }
+
+ for (ix = 0; ix < res->max_nch; ix++) {
+ mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn,
+ mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt),
+ inner_ft_support);
+ mlx5e_tir_builder_build_lro(builder, init_lro_param);
+ mlx5e_tir_builder_build_direct(builder);
+
+ err = mlx5e_tir_init(&res->channels[ix].xsk_tir, builder, res->mdev, true);
+ if (err) {
+ mlx5_core_warn(res->mdev, "Failed to create an XSK TIR: err = %d, ix = %u\n",
+ err, ix);
+ goto err_destroy_xsk_tirs;
+ }
+
+ mlx5e_tir_builder_clear(builder);
+ }
+
+ goto out;
+
+err_destroy_xsk_tirs:
+ while (--ix >= 0)
+ mlx5e_tir_destroy(&res->channels[ix].xsk_tir);
+
+ ix = res->max_nch;
+err_destroy_xsk_rqts:
+ while (--ix >= 0)
+ mlx5e_rqt_destroy(&res->channels[ix].xsk_rqt);
+
+ ix = res->max_nch;
+err_destroy_direct_tirs:
+ while (--ix >= 0)
+ mlx5e_tir_destroy(&res->channels[ix].direct_tir);
+
+ ix = res->max_nch;
+err_destroy_direct_rqts:
+ while (--ix >= 0)
+ mlx5e_rqt_destroy(&res->channels[ix].direct_rqt);
+
+ kvfree(res->channels);
+
+out:
+ mlx5e_tir_builder_free(builder);
+
+ return err;
+}
+
+static int mlx5e_rx_res_ptp_init(struct mlx5e_rx_res *res)
+{
+ bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT;
+ struct mlx5e_tir_builder *builder;
+ int err;
+
+ builder = mlx5e_tir_builder_alloc(false);
+ if (!builder)
+ return -ENOMEM;
+
+ err = mlx5e_rqt_init_direct(&res->ptp.rqt, res->mdev, false, res->drop_rqn);
+ if (err)
+ goto out;
+
+ mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn,
+ mlx5e_rqt_get_rqtn(&res->ptp.rqt),
+ inner_ft_support);
+ mlx5e_tir_builder_build_direct(builder);
+
+ err = mlx5e_tir_init(&res->ptp.tir, builder, res->mdev, true);
+ if (err)
+ goto err_destroy_ptp_rqt;
+
+ goto out;
+
+err_destroy_ptp_rqt:
+ mlx5e_rqt_destroy(&res->ptp.rqt);
+
+out:
+ mlx5e_tir_builder_free(builder);
+ return err;
+}
+
+static void mlx5e_rx_res_rss_destroy(struct mlx5e_rx_res *res)
+{
+ enum mlx5_traffic_types tt;
+
+ for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
+ mlx5e_tir_destroy(&res->rss[tt].indir_tir);
+
+ if (res->features & MLX5E_RX_RES_FEATURE_INNER_FT)
+ for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
+ mlx5e_tir_destroy(&res->rss[tt].inner_indir_tir);
+
+ mlx5e_rqt_destroy(&res->indir_rqt);
+}
+
+static void mlx5e_rx_res_channels_destroy(struct mlx5e_rx_res *res)
+{
+ unsigned int ix;
+
+ for (ix = 0; ix < res->max_nch; ix++) {
+ mlx5e_tir_destroy(&res->channels[ix].direct_tir);
+ mlx5e_rqt_destroy(&res->channels[ix].direct_rqt);
+
+ if (!(res->features & MLX5E_RX_RES_FEATURE_XSK))
+ continue;
+
+ mlx5e_tir_destroy(&res->channels[ix].xsk_tir);
+ mlx5e_rqt_destroy(&res->channels[ix].xsk_rqt);
+ }
+
+ kvfree(res->channels);
+}
+
+static void mlx5e_rx_res_ptp_destroy(struct mlx5e_rx_res *res)
+{
+ mlx5e_tir_destroy(&res->ptp.tir);
+ mlx5e_rqt_destroy(&res->ptp.rqt);
+}
+
+int mlx5e_rx_res_init(struct mlx5e_rx_res *res, struct mlx5_core_dev *mdev,
+ enum mlx5e_rx_res_features features, unsigned int max_nch,
+ u32 drop_rqn, const struct mlx5e_lro_param *init_lro_param,
+ unsigned int init_nch)
+{
+ int err;
+
+ res->mdev = mdev;
+ res->features = features;
+ res->max_nch = max_nch;
+ res->drop_rqn = drop_rqn;
+
+ mlx5e_rx_res_rss_params_init(res, init_nch);
+
+ err = mlx5e_rx_res_rss_init(res, init_lro_param);
+ if (err)
+ return err;
+
+ err = mlx5e_rx_res_channels_init(res, init_lro_param);
+ if (err)
+ goto err_rss_destroy;
+
+ err = mlx5e_rx_res_ptp_init(res);
+ if (err)
+ goto err_channels_destroy;
+
+ return 0;
+
+err_channels_destroy:
+ mlx5e_rx_res_channels_destroy(res);
+err_rss_destroy:
+ mlx5e_rx_res_rss_destroy(res);
+ return err;
+}
+
+void mlx5e_rx_res_destroy(struct mlx5e_rx_res *res)
+{
+ mlx5e_rx_res_ptp_destroy(res);
+ mlx5e_rx_res_channels_destroy(res);
+ mlx5e_rx_res_rss_destroy(res);
+}
+
+void mlx5e_rx_res_free(struct mlx5e_rx_res *res)
+{
+ kvfree(res);
+}
+
+u32 mlx5e_rx_res_get_tirn_direct(struct mlx5e_rx_res *res, unsigned int ix)
+{
+ return mlx5e_tir_get_tirn(&res->channels[ix].direct_tir);
+}
+
+u32 mlx5e_rx_res_get_tirn_xsk(struct mlx5e_rx_res *res, unsigned int ix)
+{
+ WARN_ON(!(res->features & MLX5E_RX_RES_FEATURE_XSK));
+
+ return mlx5e_tir_get_tirn(&res->channels[ix].xsk_tir);
+}
+
+u32 mlx5e_rx_res_get_tirn_rss(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt)
+{
+ return mlx5e_tir_get_tirn(&res->rss[tt].indir_tir);
+}
+
+u32 mlx5e_rx_res_get_tirn_rss_inner(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt)
+{
+ WARN_ON(!(res->features & MLX5E_RX_RES_FEATURE_INNER_FT));
+ return mlx5e_tir_get_tirn(&res->rss[tt].inner_indir_tir);
+}
+
+u32 mlx5e_rx_res_get_tirn_ptp(struct mlx5e_rx_res *res)
+{
+ WARN_ON(!(res->features & MLX5E_RX_RES_FEATURE_PTP));
+ return mlx5e_tir_get_tirn(&res->ptp.tir);
+}
+
+u32 mlx5e_rx_res_get_rqtn_direct(struct mlx5e_rx_res *res, unsigned int ix)
+{
+ return mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt);
+}
+
+static void mlx5e_rx_res_rss_enable(struct mlx5e_rx_res *res)
+{
+ int err;
+
+ res->rss_active = true;
+
+ err = mlx5e_rqt_redirect_indir(&res->indir_rqt, res->rss_rqns, res->rss_nch,
+ res->rss_params.hash.hfunc,
+ &res->rss_params.indir);
+ if (err)
+ mlx5_core_warn(res->mdev, "Failed to redirect indirect RQT %#x to channels: err = %d\n",
+ mlx5e_rqt_get_rqtn(&res->indir_rqt), err);
+}
+
+static void mlx5e_rx_res_rss_disable(struct mlx5e_rx_res *res)
+{
+ int err;
+
+ res->rss_active = false;
+
+ err = mlx5e_rqt_redirect_direct(&res->indir_rqt, res->drop_rqn);
+ if (err)
+ mlx5_core_warn(res->mdev, "Failed to redirect indirect RQT %#x to drop RQ %#x: err = %d\n",
+ mlx5e_rqt_get_rqtn(&res->indir_rqt), res->drop_rqn, err);
+}
+
+void mlx5e_rx_res_channels_activate(struct mlx5e_rx_res *res, struct mlx5e_channels *chs)
+{
+ unsigned int nch, ix;
+ int err;
+
+ nch = mlx5e_channels_get_num(chs);
+
+ for (ix = 0; ix < chs->num; ix++)
+ mlx5e_channels_get_regular_rqn(chs, ix, &res->rss_rqns[ix]);
+ res->rss_nch = chs->num;
+
+ mlx5e_rx_res_rss_enable(res);
+
+ for (ix = 0; ix < nch; ix++) {
+ u32 rqn;
+
+ mlx5e_channels_get_regular_rqn(chs, ix, &rqn);
+ err = mlx5e_rqt_redirect_direct(&res->channels[ix].direct_rqt, rqn);
+ if (err)
+ mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to RQ %#x (channel %u): err = %d\n",
+ mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt),
+ rqn, ix, err);
+
+ if (!(res->features & MLX5E_RX_RES_FEATURE_XSK))
+ continue;
+
+ if (!mlx5e_channels_get_xsk_rqn(chs, ix, &rqn))
+ rqn = res->drop_rqn;
+ err = mlx5e_rqt_redirect_direct(&res->channels[ix].xsk_rqt, rqn);
+ if (err)
+ mlx5_core_warn(res->mdev, "Failed to redirect XSK RQT %#x to RQ %#x (channel %u): err = %d\n",
+ mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt),
+ rqn, ix, err);
+ }
+ for (ix = nch; ix < res->max_nch; ix++) {
+ err = mlx5e_rqt_redirect_direct(&res->channels[ix].direct_rqt, res->drop_rqn);
+ if (err)
+ mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to drop RQ %#x (channel %u): err = %d\n",
+ mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt),
+ res->drop_rqn, ix, err);
+
+ if (!(res->features & MLX5E_RX_RES_FEATURE_XSK))
+ continue;
+
+ err = mlx5e_rqt_redirect_direct(&res->channels[ix].xsk_rqt, res->drop_rqn);
+ if (err)
+ mlx5_core_warn(res->mdev, "Failed to redirect XSK RQT %#x to drop RQ %#x (channel %u): err = %d\n",
+ mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt),
+ res->drop_rqn, ix, err);
+ }
+
+ if (res->features & MLX5E_RX_RES_FEATURE_PTP) {
+ u32 rqn;
+
+ if (mlx5e_channels_get_ptp_rqn(chs, &rqn))
+ rqn = res->drop_rqn;
+
+ err = mlx5e_rqt_redirect_direct(&res->ptp.rqt, rqn);
+ if (err)
+ mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to RQ %#x (PTP): err = %d\n",
+ mlx5e_rqt_get_rqtn(&res->ptp.rqt),
+ rqn, err);
+ }
+}
+
+void mlx5e_rx_res_channels_deactivate(struct mlx5e_rx_res *res)
+{
+ unsigned int ix;
+ int err;
+
+ mlx5e_rx_res_rss_disable(res);
+
+ for (ix = 0; ix < res->max_nch; ix++) {
+ err = mlx5e_rqt_redirect_direct(&res->channels[ix].direct_rqt, res->drop_rqn);
+ if (err)
+ mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to drop RQ %#x (channel %u): err = %d\n",
+ mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt),
+ res->drop_rqn, ix, err);
+
+ if (!(res->features & MLX5E_RX_RES_FEATURE_XSK))
+ continue;
+
+ err = mlx5e_rqt_redirect_direct(&res->channels[ix].xsk_rqt, res->drop_rqn);
+ if (err)
+ mlx5_core_warn(res->mdev, "Failed to redirect XSK RQT %#x to drop RQ %#x (channel %u): err = %d\n",
+ mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt),
+ res->drop_rqn, ix, err);
+ }
+
+ if (res->features & MLX5E_RX_RES_FEATURE_PTP) {
+ err = mlx5e_rqt_redirect_direct(&res->ptp.rqt, res->drop_rqn);
+ if (err)
+ mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to drop RQ %#x (PTP): err = %d\n",
+ mlx5e_rqt_get_rqtn(&res->ptp.rqt),
+ res->drop_rqn, err);
+ }
+}
+
+int mlx5e_rx_res_xsk_activate(struct mlx5e_rx_res *res, struct mlx5e_channels *chs,
+ unsigned int ix)
+{
+ u32 rqn;
+ int err;
+
+ if (!mlx5e_channels_get_xsk_rqn(chs, ix, &rqn))
+ return -EINVAL;
+
+ err = mlx5e_rqt_redirect_direct(&res->channels[ix].xsk_rqt, rqn);
+ if (err)
+ mlx5_core_warn(res->mdev, "Failed to redirect XSK RQT %#x to XSK RQ %#x (channel %u): err = %d\n",
+ mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt),
+ rqn, ix, err);
+ return err;
+}
+
+int mlx5e_rx_res_xsk_deactivate(struct mlx5e_rx_res *res, unsigned int ix)
+{
+ int err;
+
+ err = mlx5e_rqt_redirect_direct(&res->channels[ix].xsk_rqt, res->drop_rqn);
+ if (err)
+ mlx5_core_warn(res->mdev, "Failed to redirect XSK RQT %#x to drop RQ %#x (channel %u): err = %d\n",
+ mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt),
+ res->drop_rqn, ix, err);
+ return err;
+}
+
struct mlx5e_rss_params_traffic_type
-mlx5e_rx_res_rss_get_current_tt_config(struct mlx5e_rx_res *res, enum mlx5e_traffic_types tt)
+mlx5e_rx_res_rss_get_current_tt_config(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt)
{
struct mlx5e_rss_params_traffic_type rss_tt;
rss_tt.rx_hash_fields = res->rss_params.rx_hash_fields[tt];
return rss_tt;
}
+
+void mlx5e_rx_res_rss_set_indir_uniform(struct mlx5e_rx_res *res, unsigned int nch)
+{
+ mlx5e_rss_params_indir_init_uniform(&res->rss_params.indir, nch);
+
+ if (!res->rss_active)
+ return;
+
+ mlx5e_rx_res_rss_enable(res);
+}
+
+void mlx5e_rx_res_rss_get_rxfh(struct mlx5e_rx_res *res, u32 *indir, u8 *key, u8 *hfunc)
+{
+ unsigned int i;
+
+ if (indir)
+ for (i = 0; i < MLX5E_INDIR_RQT_SIZE; i++)
+ indir[i] = res->rss_params.indir.table[i];
+
+ if (key)
+ memcpy(key, res->rss_params.hash.toeplitz_hash_key,
+ sizeof(res->rss_params.hash.toeplitz_hash_key));
+
+ if (hfunc)
+ *hfunc = res->rss_params.hash.hfunc;
+}
+
+static int mlx5e_rx_res_rss_update_tir(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt,
+ bool inner)
+{
+ struct mlx5e_rss_params_traffic_type rss_tt;
+ struct mlx5e_tir_builder *builder;
+ struct mlx5e_tir *tir;
+ int err;
+
+ builder = mlx5e_tir_builder_alloc(true);
+ if (!builder)
+ return -ENOMEM;
+
+ rss_tt = mlx5e_rx_res_rss_get_current_tt_config(res, tt);
+
+ mlx5e_tir_builder_build_rss(builder, &res->rss_params.hash, &rss_tt, inner);
+ tir = inner ? &res->rss[tt].inner_indir_tir : &res->rss[tt].indir_tir;
+ err = mlx5e_tir_modify(tir, builder);
+
+ mlx5e_tir_builder_free(builder);
+ return err;
+}
+
+int mlx5e_rx_res_rss_set_rxfh(struct mlx5e_rx_res *res, const u32 *indir,
+ const u8 *key, const u8 *hfunc)
+{
+ enum mlx5_traffic_types tt;
+ bool changed_indir = false;
+ bool changed_hash = false;
+ int err;
+
+ if (hfunc && *hfunc != res->rss_params.hash.hfunc) {
+ switch (*hfunc) {
+ case ETH_RSS_HASH_XOR:
+ case ETH_RSS_HASH_TOP:
+ break;
+ default:
+ return -EINVAL;
+ }
+ changed_hash = true;
+ changed_indir = true;
+ res->rss_params.hash.hfunc = *hfunc;
+ }
+
+ if (key) {
+ if (res->rss_params.hash.hfunc == ETH_RSS_HASH_TOP)
+ changed_hash = true;
+ memcpy(res->rss_params.hash.toeplitz_hash_key, key,
+ sizeof(res->rss_params.hash.toeplitz_hash_key));
+ }
+
+ if (indir) {
+ unsigned int i;
+
+ changed_indir = true;
+
+ for (i = 0; i < MLX5E_INDIR_RQT_SIZE; i++)
+ res->rss_params.indir.table[i] = indir[i];
+ }
+
+ if (changed_indir && res->rss_active) {
+ err = mlx5e_rqt_redirect_indir(&res->indir_rqt, res->rss_rqns, res->rss_nch,
+ res->rss_params.hash.hfunc,
+ &res->rss_params.indir);
+ if (err)
+ mlx5_core_warn(res->mdev, "Failed to redirect indirect RQT %#x to channels: err = %d\n",
+ mlx5e_rqt_get_rqtn(&res->indir_rqt), err);
+ }
+
+ if (changed_hash)
+ for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
+ err = mlx5e_rx_res_rss_update_tir(res, tt, false);
+ if (err)
+ mlx5_core_warn(res->mdev, "Failed to update RSS hash of indirect TIR for traffic type %d: err = %d\n",
+ tt, err);
+
+ if (!(res->features & MLX5E_RX_RES_FEATURE_INNER_FT))
+ continue;
+
+ err = mlx5e_rx_res_rss_update_tir(res, tt, true);
+ if (err)
+ mlx5_core_warn(res->mdev, "Failed to update RSS hash of inner indirect TIR for traffic type %d: err = %d\n",
+ tt, err);
+ }
+
+ return 0;
+}
+
+u8 mlx5e_rx_res_rss_get_hash_fields(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt)
+{
+ return res->rss_params.rx_hash_fields[tt];
+}
+
+int mlx5e_rx_res_rss_set_hash_fields(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt,
+ u8 rx_hash_fields)
+{
+ u8 old_rx_hash_fields;
+ int err;
+
+ old_rx_hash_fields = res->rss_params.rx_hash_fields[tt];
+
+ if (old_rx_hash_fields == rx_hash_fields)
+ return 0;
+
+ res->rss_params.rx_hash_fields[tt] = rx_hash_fields;
+
+ err = mlx5e_rx_res_rss_update_tir(res, tt, false);
+ if (err) {
+ res->rss_params.rx_hash_fields[tt] = old_rx_hash_fields;
+ mlx5_core_warn(res->mdev, "Failed to update RSS hash fields of indirect TIR for traffic type %d: err = %d\n",
+ tt, err);
+ return err;
+ }
+
+ if (!(res->features & MLX5E_RX_RES_FEATURE_INNER_FT))
+ return 0;
+
+ err = mlx5e_rx_res_rss_update_tir(res, tt, true);
+ if (err) {
+ /* Partial update happened. Try to revert - it may fail too, but
+ * there is nothing more we can do.
+ */
+ res->rss_params.rx_hash_fields[tt] = old_rx_hash_fields;
+ mlx5_core_warn(res->mdev, "Failed to update RSS hash fields of inner indirect TIR for traffic type %d: err = %d\n",
+ tt, err);
+ if (mlx5e_rx_res_rss_update_tir(res, tt, false))
+ mlx5_core_warn(res->mdev, "Partial update of RSS hash fields happened: failed to revert indirect TIR for traffic type %d to the old values\n",
+ tt);
+ }
+
+ return err;
+}
+
+int mlx5e_rx_res_lro_set_param(struct mlx5e_rx_res *res, struct mlx5e_lro_param *lro_param)
+{
+ struct mlx5e_tir_builder *builder;
+ enum mlx5_traffic_types tt;
+ int err, final_err;
+ unsigned int ix;
+
+ builder = mlx5e_tir_builder_alloc(true);
+ if (!builder)
+ return -ENOMEM;
+
+ mlx5e_tir_builder_build_lro(builder, lro_param);
+
+ final_err = 0;
+
+ for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
+ err = mlx5e_tir_modify(&res->rss[tt].indir_tir, builder);
+ if (err) {
+ mlx5_core_warn(res->mdev, "Failed to update LRO state of indirect TIR %#x for traffic type %d: err = %d\n",
+ mlx5e_tir_get_tirn(&res->rss[tt].indir_tir), tt, err);
+ if (!final_err)
+ final_err = err;
+ }
+
+ if (!(res->features & MLX5E_RX_RES_FEATURE_INNER_FT))
+ continue;
+
+ err = mlx5e_tir_modify(&res->rss[tt].inner_indir_tir, builder);
+ if (err) {
+ mlx5_core_warn(res->mdev, "Failed to update LRO state of inner indirect TIR %#x for traffic type %d: err = %d\n",
+ mlx5e_tir_get_tirn(&res->rss[tt].inner_indir_tir), tt, err);
+ if (!final_err)
+ final_err = err;
+ }
+ }
+
+ for (ix = 0; ix < res->max_nch; ix++) {
+ err = mlx5e_tir_modify(&res->channels[ix].direct_tir, builder);
+ if (err) {
+ mlx5_core_warn(res->mdev, "Failed to update LRO state of direct TIR %#x for channel %u: err = %d\n",
+ mlx5e_tir_get_tirn(&res->channels[ix].direct_tir), ix, err);
+ if (!final_err)
+ final_err = err;
+ }
+ }
+
+ mlx5e_tir_builder_free(builder);
+ return final_err;
+}
+
+struct mlx5e_rss_params_hash mlx5e_rx_res_get_current_hash(struct mlx5e_rx_res *res)
+{
+ return res->rss_params.hash;
+}
#include "tir.h"
#include "fs.h"
-#define MLX5E_MAX_NUM_CHANNELS (MLX5E_INDIR_RQT_SIZE / 2)
+struct mlx5e_rx_res;
-struct mlx5e_rss_params {
- struct mlx5e_rss_params_hash hash;
- struct mlx5e_rss_params_indir indir;
- u32 rx_hash_fields[MLX5E_NUM_INDIR_TIRS];
-};
+struct mlx5e_channels;
+struct mlx5e_rss_params_hash;
-struct mlx5e_rx_res {
- struct mlx5e_rss_params rss_params;
-
- struct mlx5e_rqt indir_rqt;
- struct {
- struct mlx5e_tir indir_tir;
- struct mlx5e_tir inner_indir_tir;
- } rss[MLX5E_NUM_INDIR_TIRS];
-
- struct {
- struct mlx5e_rqt direct_rqt;
- struct mlx5e_tir direct_tir;
- struct mlx5e_rqt xsk_rqt;
- struct mlx5e_tir xsk_tir;
- } channels[MLX5E_MAX_NUM_CHANNELS];
-
- struct {
- struct mlx5e_rqt rqt;
- struct mlx5e_tir tir;
- } ptp;
+enum mlx5e_rx_res_features {
+ MLX5E_RX_RES_FEATURE_INNER_FT = BIT(0),
+ MLX5E_RX_RES_FEATURE_XSK = BIT(1),
+ MLX5E_RX_RES_FEATURE_PTP = BIT(2),
};
struct mlx5e_rss_params_traffic_type
-mlx5e_rss_get_default_tt_config(enum mlx5e_traffic_types tt);
+mlx5e_rss_get_default_tt_config(enum mlx5_traffic_types tt);
+
+/* Setup */
+struct mlx5e_rx_res *mlx5e_rx_res_alloc(void);
+int mlx5e_rx_res_init(struct mlx5e_rx_res *res, struct mlx5_core_dev *mdev,
+ enum mlx5e_rx_res_features features, unsigned int max_nch,
+ u32 drop_rqn, const struct mlx5e_lro_param *init_lro_param,
+ unsigned int init_nch);
+void mlx5e_rx_res_destroy(struct mlx5e_rx_res *res);
+void mlx5e_rx_res_free(struct mlx5e_rx_res *res);
+
+/* TIRN getters for flow steering */
+u32 mlx5e_rx_res_get_tirn_direct(struct mlx5e_rx_res *res, unsigned int ix);
+u32 mlx5e_rx_res_get_tirn_xsk(struct mlx5e_rx_res *res, unsigned int ix);
+u32 mlx5e_rx_res_get_tirn_rss(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt);
+u32 mlx5e_rx_res_get_tirn_rss_inner(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt);
+u32 mlx5e_rx_res_get_tirn_ptp(struct mlx5e_rx_res *res);
+
+/* RQTN getters for modules that create their own TIRs */
+u32 mlx5e_rx_res_get_rqtn_direct(struct mlx5e_rx_res *res, unsigned int ix);
+
+/* Activate/deactivate API */
+void mlx5e_rx_res_channels_activate(struct mlx5e_rx_res *res, struct mlx5e_channels *chs);
+void mlx5e_rx_res_channels_deactivate(struct mlx5e_rx_res *res);
+int mlx5e_rx_res_xsk_activate(struct mlx5e_rx_res *res, struct mlx5e_channels *chs,
+ unsigned int ix);
+int mlx5e_rx_res_xsk_deactivate(struct mlx5e_rx_res *res, unsigned int ix);
+
+/* Configuration API */
struct mlx5e_rss_params_traffic_type
-mlx5e_rx_res_rss_get_current_tt_config(struct mlx5e_rx_res *res, enum mlx5e_traffic_types tt);
+mlx5e_rx_res_rss_get_current_tt_config(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt);
+void mlx5e_rx_res_rss_set_indir_uniform(struct mlx5e_rx_res *res, unsigned int nch);
+void mlx5e_rx_res_rss_get_rxfh(struct mlx5e_rx_res *res, u32 *indir, u8 *key, u8 *hfunc);
+int mlx5e_rx_res_rss_set_rxfh(struct mlx5e_rx_res *res, const u32 *indir,
+ const u8 *key, const u8 *hfunc);
+u8 mlx5e_rx_res_rss_get_hash_fields(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt);
+int mlx5e_rx_res_rss_set_hash_fields(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt,
+ u8 rx_hash_fields);
+int mlx5e_rx_res_lro_set_param(struct mlx5e_rx_res *res, struct mlx5e_lro_param *lro_param);
+
+/* Workaround for hairpin */
+struct mlx5e_rss_params_hash mlx5e_rx_res_get_current_hash(struct mlx5e_rx_res *res);
#endif /* __MLX5_EN_RX_RES_H__ */
struct mlx5_tc_ct_priv *ct_priv;
struct mlx5_core_dev *dev;
const char *msg;
+ u64 mapping_id;
int err;
dev = priv->mdev;
if (!ct_priv)
goto err_alloc;
- ct_priv->zone_mapping = mapping_create(sizeof(u16), 0, true);
+ mapping_id = mlx5_query_nic_system_image_guid(dev);
+
+ ct_priv->zone_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_ZONE,
+ sizeof(u16), 0, true);
if (IS_ERR(ct_priv->zone_mapping)) {
err = PTR_ERR(ct_priv->zone_mapping);
goto err_mapping_zone;
}
- ct_priv->labels_mapping = mapping_create(sizeof(u32) * 4, 0, true);
+ ct_priv->labels_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_LABELS,
+ sizeof(u32) * 4, 0, true);
if (IS_ERR(ct_priv->labels_mapping)) {
err = PTR_ERR(ct_priv->labels_mapping);
goto err_mapping_labels;
struct mlx5e_priv *priv = t->priv;
rq->wq_type = params->rq_wq_type;
- rq->pdev = mdev->device;
+ rq->pdev = t->pdev;
rq->netdev = priv->netdev;
rq->priv = priv;
rq->clock = &mdev->clock;
* any Fill Ring entries at the setup stage.
*/
- err = mlx5e_xsk_redirect_rqt_to_channel(priv, priv->channels.c[ix]);
+ err = mlx5e_rx_res_xsk_activate(priv->rx_res, &priv->channels, ix);
if (unlikely(err))
goto err_deactivate;
goto remove_pool;
c = priv->channels.c[ix];
- mlx5e_xsk_redirect_rqt_to_drop(priv, ix);
+ mlx5e_rx_res_xsk_deactivate(priv->rx_res, ix);
mlx5e_deactivate_xsk(c);
mlx5e_close_xsk(c);
mlx5e_deactivate_rq(&c->xskrq);
/* TX queue is disabled on close. */
}
-
-int mlx5e_xsk_redirect_rqt_to_channel(struct mlx5e_priv *priv, struct mlx5e_channel *c)
-{
- return mlx5e_rqt_redirect_direct(&priv->rx_res->channels[c->ix].xsk_rqt, c->xskrq.rqn);
-}
-
-int mlx5e_xsk_redirect_rqt_to_drop(struct mlx5e_priv *priv, u16 ix)
-{
- return mlx5e_rqt_redirect_direct(&priv->rx_res->channels[ix].xsk_rqt, priv->drop_rq.rqn);
-}
-
-int mlx5e_xsk_redirect_rqts_to_channels(struct mlx5e_priv *priv, struct mlx5e_channels *chs)
-{
- int err, i;
-
- if (!priv->xsk.refcnt)
- return 0;
-
- for (i = 0; i < chs->num; i++) {
- struct mlx5e_channel *c = chs->c[i];
-
- if (!test_bit(MLX5E_CHANNEL_STATE_XSK, c->state))
- continue;
-
- err = mlx5e_xsk_redirect_rqt_to_channel(priv, c);
- if (unlikely(err))
- goto err_stop;
- }
-
- return 0;
-
-err_stop:
- for (i--; i >= 0; i--) {
- if (!test_bit(MLX5E_CHANNEL_STATE_XSK, chs->c[i]->state))
- continue;
-
- mlx5e_xsk_redirect_rqt_to_drop(priv, i);
- }
-
- return err;
-}
-
-void mlx5e_xsk_redirect_rqts_to_drop(struct mlx5e_priv *priv, struct mlx5e_channels *chs)
-{
- int i;
-
- if (!priv->xsk.refcnt)
- return;
-
- for (i = 0; i < chs->num; i++) {
- if (!test_bit(MLX5E_CHANNEL_STATE_XSK, chs->c[i]->state))
- continue;
-
- mlx5e_xsk_redirect_rqt_to_drop(priv, i);
- }
-}
void mlx5e_close_xsk(struct mlx5e_channel *c);
void mlx5e_activate_xsk(struct mlx5e_channel *c);
void mlx5e_deactivate_xsk(struct mlx5e_channel *c);
-int mlx5e_xsk_redirect_rqt_to_channel(struct mlx5e_priv *priv, struct mlx5e_channel *c);
-int mlx5e_xsk_redirect_rqt_to_drop(struct mlx5e_priv *priv, u16 ix);
-int mlx5e_xsk_redirect_rqts_to_channels(struct mlx5e_priv *priv, struct mlx5e_channels *chs);
-void mlx5e_xsk_redirect_rqts_to_drop(struct mlx5e_priv *priv, struct mlx5e_channels *chs);
#endif /* __MLX5_EN_XSK_SETUP_H__ */
struct mlx5_flow_handle *default_rules[ACCEL_FS_TCP_NUM_TYPES];
};
-static enum mlx5e_traffic_types fs_accel2tt(enum accel_fs_tcp_type i)
+static enum mlx5_traffic_types fs_accel2tt(enum accel_fs_tcp_type i)
{
switch (i) {
case ACCEL_FS_IPV4_TCP:
- return MLX5E_TT_IPV4_TCP;
+ return MLX5_TT_IPV4_TCP;
default: /* ACCEL_FS_IPV6_TCP */
- return MLX5E_TT_IPV6_TCP;
+ return MLX5_TT_IPV6_TCP;
}
}
fs_tcp = priv->fs.accel_tcp;
accel_fs_t = &fs_tcp->tables[type];
- dest = mlx5e_ttc_get_default_dest(priv, fs_accel2tt(type));
+ dest = mlx5_ttc_get_default_dest(priv->fs.ttc, fs_accel2tt(type));
rule = mlx5_add_flow_rules(accel_fs_t->t, NULL, &flow_act, &dest, 1);
if (IS_ERR(rule)) {
err = PTR_ERR(rule);
for (i = 0; i < ACCEL_FS_TCP_NUM_TYPES; i++) {
/* Modify ttc rules destination to point back to the indir TIRs */
- err = mlx5e_ttc_fwd_default_dest(priv, fs_accel2tt(i));
+ err = mlx5_ttc_fwd_default_dest(priv->fs.ttc, fs_accel2tt(i));
if (err) {
netdev_err(priv->netdev,
"%s: modify ttc[%d] default destination failed, err(%d)\n",
dest.ft = priv->fs.accel_tcp->tables[i].t;
/* Modify ttc rules destination to point on the accel_fs FTs */
- err = mlx5e_ttc_fwd_dest(priv, fs_accel2tt(i), &dest);
+ err = mlx5_ttc_fwd_dest(priv->fs.ttc, fs_accel2tt(i), &dest);
if (err) {
netdev_err(priv->netdev,
"%s: modify ttc[%d] destination to accel failed, err(%d)\n",
};
/* IPsec RX flow steering */
-static enum mlx5e_traffic_types fs_esp2tt(enum accel_fs_esp_type i)
+static enum mlx5_traffic_types fs_esp2tt(enum accel_fs_esp_type i)
{
if (i == ACCEL_FS_ESP4)
- return MLX5E_TT_IPV4_IPSEC_ESP;
- return MLX5E_TT_IPV6_IPSEC_ESP;
+ return MLX5_TT_IPV4_IPSEC_ESP;
+ return MLX5_TT_IPV6_IPSEC_ESP;
}
static int rx_err_add_rule(struct mlx5e_priv *priv,
accel_esp = priv->ipsec->rx_fs;
fs_prot = &accel_esp->fs_prot[type];
- fs_prot->default_dest = mlx5e_ttc_get_default_dest(priv, fs_esp2tt(type));
+ fs_prot->default_dest =
+ mlx5_ttc_get_default_dest(priv->fs.ttc, fs_esp2tt(type));
err = rx_err_create_ft(priv, fs_prot, &fs_prot->rx_err);
if (err)
/* connect */
dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
dest.ft = fs_prot->ft;
- mlx5e_ttc_fwd_dest(priv, fs_esp2tt(type), &dest);
+ mlx5_ttc_fwd_dest(priv->fs.ttc, fs_esp2tt(type), &dest);
out:
mutex_unlock(&fs_prot->prot_mutex);
goto out;
/* disconnect */
- mlx5e_ttc_fwd_default_dest(priv, fs_esp2tt(type));
+ mlx5_ttc_fwd_default_dest(priv->fs.ttc, fs_esp2tt(type));
/* remove FT */
rx_destroy(priv, type);
priv_rx->sw_stats = &priv->tls->sw_stats;
mlx5e_set_ktls_rx_priv_ctx(tls_ctx, priv_rx);
- rqtn = mlx5e_rqt_get_rqtn(&priv->rx_res->channels[rxq].direct_rqt);
+ rqtn = mlx5e_rx_res_get_rqtn_direct(priv->rx_res, rxq);
err = mlx5e_ktls_create_tir(mdev, &priv_rx->tir, rqtn);
if (err)
for (j = 0; j < ARFS_HASH_SIZE; j++) \
hlist_for_each_entry_safe(hn, tmp, &hash[j], hlist)
-static enum mlx5e_traffic_types arfs_get_tt(enum arfs_type type)
+static enum mlx5_traffic_types arfs_get_tt(enum arfs_type type)
{
switch (type) {
case ARFS_IPV4_TCP:
- return MLX5E_TT_IPV4_TCP;
+ return MLX5_TT_IPV4_TCP;
case ARFS_IPV4_UDP:
- return MLX5E_TT_IPV4_UDP;
+ return MLX5_TT_IPV4_UDP;
case ARFS_IPV6_TCP:
- return MLX5E_TT_IPV6_TCP;
+ return MLX5_TT_IPV6_TCP;
case ARFS_IPV6_UDP:
- return MLX5E_TT_IPV6_UDP;
+ return MLX5_TT_IPV6_UDP;
default:
return -EINVAL;
}
for (i = 0; i < ARFS_NUM_TYPES; i++) {
/* Modify ttc rules destination back to their default */
- err = mlx5e_ttc_fwd_default_dest(priv, arfs_get_tt(i));
+ err = mlx5_ttc_fwd_default_dest(priv->fs.ttc, arfs_get_tt(i));
if (err) {
netdev_err(priv->netdev,
"%s: modify ttc[%d] default destination failed, err(%d)\n",
for (i = 0; i < ARFS_NUM_TYPES; i++) {
dest.ft = priv->fs.arfs->arfs_tables[i].ft.t;
/* Modify ttc rules destination to point on the aRFS FTs */
- err = mlx5e_ttc_fwd_dest(priv, arfs_get_tt(i), &dest);
+ err = mlx5_ttc_fwd_dest(priv->fs.ttc, arfs_get_tt(i), &dest);
if (err) {
netdev_err(priv->netdev,
"%s: modify ttc[%d] dest to arfs, failed err(%d)\n",
struct arfs_table *arfs_t = &priv->fs.arfs->arfs_tables[type];
struct mlx5_flow_destination dest = {};
MLX5_DECLARE_FLOW_ACT(flow_act);
- enum mlx5e_traffic_types tt;
+ enum mlx5_traffic_types tt;
int err = 0;
dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
return -EINVAL;
}
- /* FIXME: Must use mlx5e_ttc_get_default_dest(),
+ /* FIXME: Must use mlx5_ttc_get_default_dest(),
* but can't since TTC default is not setup yet !
*/
- dest.tir_num = priv->rx_res->rss[tt].indir_tir.tirn;
+ dest.tir_num = mlx5e_rx_res_get_tirn_rss(priv->rx_res, tt);
arfs_t->default_rule = mlx5_add_flow_rules(arfs_t->ft.t, NULL,
&flow_act,
&dest, 1);
16);
}
dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
- dest.tir_num = priv->rx_res->channels[arfs_rule->rxq].direct_tir.tirn;
+ dest.tir_num = mlx5e_rx_res_get_tirn_direct(priv->rx_res, arfs_rule->rxq);
rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
if (IS_ERR(rule)) {
err = PTR_ERR(rule);
int err = 0;
dst.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
- dst.tir_num = priv->rx_res->channels[rxq].direct_tir.tirn;
+ dst.tir_num = mlx5e_rx_res_get_tirn_direct(priv->rx_res, rxq);
err = mlx5_modify_rule_destination(rule, &dst, NULL);
if (err)
netdev_warn(priv->netdev,
u32 mlx5e_ethtool_get_rxfh_key_size(struct mlx5e_priv *priv)
{
- return sizeof(priv->rx_res->rss_params.hash.toeplitz_hash_key);
+ return sizeof_field(struct mlx5e_rss_params_hash, toeplitz_hash_key);
}
static u32 mlx5e_get_rxfh_key_size(struct net_device *netdev)
u8 *hfunc)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
- struct mlx5e_rss_params *rss;
- rss = &priv->rx_res->rss_params;
-
- if (indir)
- memcpy(indir, rss->indir.table, sizeof(rss->indir.table));
-
- if (key)
- memcpy(key, rss->hash.toeplitz_hash_key, sizeof(rss->hash.toeplitz_hash_key));
-
- if (hfunc)
- *hfunc = rss->hash.hfunc;
+ mutex_lock(&priv->state_lock);
+ mlx5e_rx_res_rss_get_rxfh(priv->rx_res, indir, key, hfunc);
+ mutex_unlock(&priv->state_lock);
return 0;
}
const u8 *key, const u8 hfunc)
{
struct mlx5e_priv *priv = netdev_priv(dev);
- struct mlx5e_rss_params *rss;
- bool refresh_tirs = false;
- bool refresh_rqt = false;
-
- if ((hfunc != ETH_RSS_HASH_NO_CHANGE) &&
- (hfunc != ETH_RSS_HASH_XOR) &&
- (hfunc != ETH_RSS_HASH_TOP))
- return -EINVAL;
+ int err;
mutex_lock(&priv->state_lock);
-
- rss = &priv->rx_res->rss_params;
-
- if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != rss->hash.hfunc) {
- rss->hash.hfunc = hfunc;
- refresh_rqt = true;
- refresh_tirs = true;
- }
-
- if (indir) {
- memcpy(rss->indir.table, indir, sizeof(rss->indir.table));
- refresh_rqt = true;
- }
-
- if (key) {
- memcpy(rss->hash.toeplitz_hash_key, key, sizeof(rss->hash.toeplitz_hash_key));
- refresh_tirs = refresh_tirs || rss->hash.hfunc == ETH_RSS_HASH_TOP;
- }
-
- if (refresh_rqt && test_bit(MLX5E_STATE_OPENED, &priv->state)) {
- u32 *rqns;
-
- rqns = kvmalloc_array(priv->channels.num, sizeof(*rqns), GFP_KERNEL);
- if (rqns) {
- unsigned int ix;
-
- for (ix = 0; ix < priv->channels.num; ix++)
- rqns[ix] = priv->channels.c[ix]->rq.rqn;
-
- mlx5e_rqt_redirect_indir(&priv->rx_res->indir_rqt, rqns,
- priv->channels.num,
- rss->hash.hfunc, &rss->indir);
- kvfree(rqns);
- }
- }
-
- if (refresh_tirs)
- mlx5e_modify_tirs_hash(priv);
-
+ err = mlx5e_rx_res_rss_set_rxfh(priv->rx_res, indir, key,
+ hfunc == ETH_RSS_HASH_NO_CHANGE ? NULL : &hfunc);
mutex_unlock(&priv->state_lock);
-
- return 0;
+ return err;
}
#define MLX5E_PFC_PREVEN_AUTO_TOUT_MSEC 100
if (!spec)
return -ENOMEM;
dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
- dest.ft = priv->fs.ttc.ft.t;
+ dest.ft = mlx5_get_ttc_flow_table(priv->fs.ttc);
rule_p = &priv->fs.promisc.rule;
*rule_p = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
ft->t = NULL;
}
-static void mlx5e_cleanup_ttc_rules(struct mlx5e_ttc_table *ttc)
-{
- int i;
-
- for (i = 0; i < MLX5E_NUM_TT; i++) {
- if (!IS_ERR_OR_NULL(ttc->rules[i].rule)) {
- mlx5_del_flow_rules(ttc->rules[i].rule);
- ttc->rules[i].rule = NULL;
- }
- }
-
- for (i = 0; i < MLX5E_NUM_TUNNEL_TT; i++) {
- if (!IS_ERR_OR_NULL(ttc->tunnel_rules[i])) {
- mlx5_del_flow_rules(ttc->tunnel_rules[i]);
- ttc->tunnel_rules[i] = NULL;
- }
- }
-}
-
-struct mlx5e_etype_proto {
- u16 etype;
- u8 proto;
-};
-
-static struct mlx5e_etype_proto ttc_rules[] = {
- [MLX5E_TT_IPV4_TCP] = {
- .etype = ETH_P_IP,
- .proto = IPPROTO_TCP,
- },
- [MLX5E_TT_IPV6_TCP] = {
- .etype = ETH_P_IPV6,
- .proto = IPPROTO_TCP,
- },
- [MLX5E_TT_IPV4_UDP] = {
- .etype = ETH_P_IP,
- .proto = IPPROTO_UDP,
- },
- [MLX5E_TT_IPV6_UDP] = {
- .etype = ETH_P_IPV6,
- .proto = IPPROTO_UDP,
- },
- [MLX5E_TT_IPV4_IPSEC_AH] = {
- .etype = ETH_P_IP,
- .proto = IPPROTO_AH,
- },
- [MLX5E_TT_IPV6_IPSEC_AH] = {
- .etype = ETH_P_IPV6,
- .proto = IPPROTO_AH,
- },
- [MLX5E_TT_IPV4_IPSEC_ESP] = {
- .etype = ETH_P_IP,
- .proto = IPPROTO_ESP,
- },
- [MLX5E_TT_IPV6_IPSEC_ESP] = {
- .etype = ETH_P_IPV6,
- .proto = IPPROTO_ESP,
- },
- [MLX5E_TT_IPV4] = {
- .etype = ETH_P_IP,
- .proto = 0,
- },
- [MLX5E_TT_IPV6] = {
- .etype = ETH_P_IPV6,
- .proto = 0,
- },
- [MLX5E_TT_ANY] = {
- .etype = 0,
- .proto = 0,
- },
-};
-
-static struct mlx5e_etype_proto ttc_tunnel_rules[] = {
- [MLX5E_TT_IPV4_GRE] = {
- .etype = ETH_P_IP,
- .proto = IPPROTO_GRE,
- },
- [MLX5E_TT_IPV6_GRE] = {
- .etype = ETH_P_IPV6,
- .proto = IPPROTO_GRE,
- },
- [MLX5E_TT_IPV4_IPIP] = {
- .etype = ETH_P_IP,
- .proto = IPPROTO_IPIP,
- },
- [MLX5E_TT_IPV6_IPIP] = {
- .etype = ETH_P_IPV6,
- .proto = IPPROTO_IPIP,
- },
- [MLX5E_TT_IPV4_IPV6] = {
- .etype = ETH_P_IP,
- .proto = IPPROTO_IPV6,
- },
- [MLX5E_TT_IPV6_IPV6] = {
- .etype = ETH_P_IPV6,
- .proto = IPPROTO_IPV6,
- },
-
-};
-
-u8 mlx5e_get_proto_by_tunnel_type(enum mlx5e_tunnel_types tt)
-{
- return ttc_tunnel_rules[tt].proto;
-}
-
-static bool mlx5e_tunnel_proto_supported_rx(struct mlx5_core_dev *mdev, u8 proto_type)
-{
- switch (proto_type) {
- case IPPROTO_GRE:
- return MLX5_CAP_ETH(mdev, tunnel_stateless_gre);
- case IPPROTO_IPIP:
- case IPPROTO_IPV6:
- return (MLX5_CAP_ETH(mdev, tunnel_stateless_ip_over_ip) ||
- MLX5_CAP_ETH(mdev, tunnel_stateless_ip_over_ip_rx));
- default:
- return false;
- }
-}
-
-static bool mlx5e_tunnel_any_rx_proto_supported(struct mlx5_core_dev *mdev)
-{
- int tt;
-
- for (tt = 0; tt < MLX5E_NUM_TUNNEL_TT; tt++) {
- if (mlx5e_tunnel_proto_supported_rx(mdev, ttc_tunnel_rules[tt].proto))
- return true;
- }
- return false;
-}
-
-bool mlx5e_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev)
-{
- return (mlx5e_tunnel_any_rx_proto_supported(mdev) &&
- MLX5_CAP_FLOWTABLE_NIC_RX(mdev, ft_field_support.inner_ip_version));
-}
-
-static u8 mlx5e_etype_to_ipv(u16 ethertype)
-{
- if (ethertype == ETH_P_IP)
- return 4;
-
- if (ethertype == ETH_P_IPV6)
- return 6;
-
- return 0;
-}
-
-static struct mlx5_flow_handle *
-mlx5e_generate_ttc_rule(struct mlx5e_priv *priv,
- struct mlx5_flow_table *ft,
- struct mlx5_flow_destination *dest,
- u16 etype,
- u8 proto)
-{
- int match_ipv_outer = MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ft_field_support.outer_ip_version);
- MLX5_DECLARE_FLOW_ACT(flow_act);
- struct mlx5_flow_handle *rule;
- struct mlx5_flow_spec *spec;
- int err = 0;
- u8 ipv;
-
- spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
- if (!spec)
- return ERR_PTR(-ENOMEM);
-
- if (proto) {
- spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
- MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_protocol);
- MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_protocol, proto);
- }
-
- ipv = mlx5e_etype_to_ipv(etype);
- if (match_ipv_outer && ipv) {
- spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
- MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_version);
- MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version, ipv);
- } else if (etype) {
- spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
- MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ethertype);
- MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype, etype);
- }
-
- rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, 1);
- if (IS_ERR(rule)) {
- err = PTR_ERR(rule);
- netdev_err(priv->netdev, "%s: add rule failed\n", __func__);
- }
-
- kvfree(spec);
- return err ? ERR_PTR(err) : rule;
-}
-
-static int mlx5e_generate_ttc_table_rules(struct mlx5e_priv *priv,
- struct ttc_params *params,
- struct mlx5e_ttc_table *ttc)
-{
- struct mlx5_flow_destination dest = {};
- struct mlx5_flow_handle **trules;
- struct mlx5e_ttc_rule *rules;
- struct mlx5_flow_table *ft;
- int tt;
- int err;
-
- ft = ttc->ft.t;
- rules = ttc->rules;
-
- dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
- for (tt = 0; tt < MLX5E_NUM_TT; tt++) {
- struct mlx5e_ttc_rule *rule = &rules[tt];
-
- if (tt == MLX5E_TT_ANY)
- dest.tir_num = params->any_tt_tirn;
- else
- dest.tir_num = params->indir_tirn[tt];
-
- rule->rule = mlx5e_generate_ttc_rule(priv, ft, &dest,
- ttc_rules[tt].etype,
- ttc_rules[tt].proto);
- if (IS_ERR(rule->rule)) {
- err = PTR_ERR(rule->rule);
- rule->rule = NULL;
- goto del_rules;
- }
- rule->default_dest = dest;
- }
-
- if (!params->inner_ttc || !mlx5e_tunnel_inner_ft_supported(priv->mdev))
- return 0;
-
- trules = ttc->tunnel_rules;
- dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
- dest.ft = params->inner_ttc->ft.t;
- for (tt = 0; tt < MLX5E_NUM_TUNNEL_TT; tt++) {
- if (!mlx5e_tunnel_proto_supported_rx(priv->mdev,
- ttc_tunnel_rules[tt].proto))
- continue;
- trules[tt] = mlx5e_generate_ttc_rule(priv, ft, &dest,
- ttc_tunnel_rules[tt].etype,
- ttc_tunnel_rules[tt].proto);
- if (IS_ERR(trules[tt])) {
- err = PTR_ERR(trules[tt]);
- trules[tt] = NULL;
- goto del_rules;
- }
- }
-
- return 0;
-
-del_rules:
- mlx5e_cleanup_ttc_rules(ttc);
- return err;
-}
-
-static int mlx5e_create_ttc_table_groups(struct mlx5e_ttc_table *ttc,
- bool use_ipv)
-{
- int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
- struct mlx5e_flow_table *ft = &ttc->ft;
- int ix = 0;
- u32 *in;
- int err;
- u8 *mc;
-
- ft->g = kcalloc(MLX5E_TTC_NUM_GROUPS,
- sizeof(*ft->g), GFP_KERNEL);
- if (!ft->g)
- return -ENOMEM;
- in = kvzalloc(inlen, GFP_KERNEL);
- if (!in) {
- kfree(ft->g);
- ft->g = NULL;
- return -ENOMEM;
- }
-
- /* L4 Group */
- mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
- MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol);
- if (use_ipv)
- MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_version);
- else
- MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype);
- MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
- MLX5_SET_CFG(in, start_flow_index, ix);
- ix += MLX5E_TTC_GROUP1_SIZE;
- MLX5_SET_CFG(in, end_flow_index, ix - 1);
- ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
- if (IS_ERR(ft->g[ft->num_groups]))
- goto err;
- ft->num_groups++;
-
- /* L3 Group */
- MLX5_SET(fte_match_param, mc, outer_headers.ip_protocol, 0);
- MLX5_SET_CFG(in, start_flow_index, ix);
- ix += MLX5E_TTC_GROUP2_SIZE;
- MLX5_SET_CFG(in, end_flow_index, ix - 1);
- ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
- if (IS_ERR(ft->g[ft->num_groups]))
- goto err;
- ft->num_groups++;
-
- /* Any Group */
- memset(in, 0, inlen);
- MLX5_SET_CFG(in, start_flow_index, ix);
- ix += MLX5E_TTC_GROUP3_SIZE;
- MLX5_SET_CFG(in, end_flow_index, ix - 1);
- ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
- if (IS_ERR(ft->g[ft->num_groups]))
- goto err;
- ft->num_groups++;
-
- kvfree(in);
- return 0;
-
-err:
- err = PTR_ERR(ft->g[ft->num_groups]);
- ft->g[ft->num_groups] = NULL;
- kvfree(in);
-
- return err;
-}
-
-static struct mlx5_flow_handle *
-mlx5e_generate_inner_ttc_rule(struct mlx5e_priv *priv,
- struct mlx5_flow_table *ft,
- struct mlx5_flow_destination *dest,
- u16 etype, u8 proto)
-{
- MLX5_DECLARE_FLOW_ACT(flow_act);
- struct mlx5_flow_handle *rule;
- struct mlx5_flow_spec *spec;
- int err = 0;
- u8 ipv;
-
- spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
- if (!spec)
- return ERR_PTR(-ENOMEM);
-
- ipv = mlx5e_etype_to_ipv(etype);
- if (etype && ipv) {
- spec->match_criteria_enable = MLX5_MATCH_INNER_HEADERS;
- MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, inner_headers.ip_version);
- MLX5_SET(fte_match_param, spec->match_value, inner_headers.ip_version, ipv);
- }
-
- if (proto) {
- spec->match_criteria_enable = MLX5_MATCH_INNER_HEADERS;
- MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, inner_headers.ip_protocol);
- MLX5_SET(fte_match_param, spec->match_value, inner_headers.ip_protocol, proto);
- }
-
- rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, 1);
- if (IS_ERR(rule)) {
- err = PTR_ERR(rule);
- netdev_err(priv->netdev, "%s: add rule failed\n", __func__);
- }
-
- kvfree(spec);
- return err ? ERR_PTR(err) : rule;
-}
-
-static int mlx5e_generate_inner_ttc_table_rules(struct mlx5e_priv *priv,
- struct ttc_params *params,
- struct mlx5e_ttc_table *ttc)
-{
- struct mlx5_flow_destination dest = {};
- struct mlx5e_ttc_rule *rules;
- struct mlx5_flow_table *ft;
- int err;
- int tt;
-
- ft = ttc->ft.t;
- rules = ttc->rules;
- dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
-
- for (tt = 0; tt < MLX5E_NUM_TT; tt++) {
- struct mlx5e_ttc_rule *rule = &rules[tt];
-
- if (tt == MLX5E_TT_ANY)
- dest.tir_num = params->any_tt_tirn;
- else
- dest.tir_num = params->indir_tirn[tt];
-
- rule->rule = mlx5e_generate_inner_ttc_rule(priv, ft, &dest,
- ttc_rules[tt].etype,
- ttc_rules[tt].proto);
- if (IS_ERR(rule->rule)) {
- err = PTR_ERR(rule->rule);
- rule->rule = NULL;
- goto del_rules;
- }
- rule->default_dest = dest;
- }
-
- return 0;
-
-del_rules:
-
- mlx5e_cleanup_ttc_rules(ttc);
- return err;
-}
-
-static int mlx5e_create_inner_ttc_table_groups(struct mlx5e_ttc_table *ttc)
-{
- int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
- struct mlx5e_flow_table *ft = &ttc->ft;
- int ix = 0;
- u32 *in;
- int err;
- u8 *mc;
-
- ft->g = kcalloc(MLX5E_INNER_TTC_NUM_GROUPS, sizeof(*ft->g), GFP_KERNEL);
- if (!ft->g)
- return -ENOMEM;
- in = kvzalloc(inlen, GFP_KERNEL);
- if (!in) {
- kfree(ft->g);
- ft->g = NULL;
- return -ENOMEM;
- }
-
- /* L4 Group */
- mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
- MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ip_protocol);
- MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ip_version);
- MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_INNER_HEADERS);
- MLX5_SET_CFG(in, start_flow_index, ix);
- ix += MLX5E_INNER_TTC_GROUP1_SIZE;
- MLX5_SET_CFG(in, end_flow_index, ix - 1);
- ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
- if (IS_ERR(ft->g[ft->num_groups]))
- goto err;
- ft->num_groups++;
-
- /* L3 Group */
- MLX5_SET(fte_match_param, mc, inner_headers.ip_protocol, 0);
- MLX5_SET_CFG(in, start_flow_index, ix);
- ix += MLX5E_INNER_TTC_GROUP2_SIZE;
- MLX5_SET_CFG(in, end_flow_index, ix - 1);
- ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
- if (IS_ERR(ft->g[ft->num_groups]))
- goto err;
- ft->num_groups++;
-
- /* Any Group */
- memset(in, 0, inlen);
- MLX5_SET_CFG(in, start_flow_index, ix);
- ix += MLX5E_INNER_TTC_GROUP3_SIZE;
- MLX5_SET_CFG(in, end_flow_index, ix - 1);
- ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
- if (IS_ERR(ft->g[ft->num_groups]))
- goto err;
- ft->num_groups++;
-
- kvfree(in);
- return 0;
-
-err:
- err = PTR_ERR(ft->g[ft->num_groups]);
- ft->g[ft->num_groups] = NULL;
- kvfree(in);
-
- return err;
-}
-
-void mlx5e_set_ttc_basic_params(struct mlx5e_priv *priv,
- struct ttc_params *ttc_params)
-{
- ttc_params->any_tt_tirn = priv->rx_res->channels[0].direct_tir.tirn;
- ttc_params->inner_ttc = &priv->fs.inner_ttc;
-}
-
-static void mlx5e_set_inner_ttc_ft_params(struct ttc_params *ttc_params)
+static void mlx5e_set_inner_ttc_params(struct mlx5e_priv *priv,
+ struct ttc_params *ttc_params)
{
struct mlx5_flow_table_attr *ft_attr = &ttc_params->ft_attr;
+ int tt;
- ft_attr->max_fte = MLX5E_INNER_TTC_TABLE_SIZE;
+ memset(ttc_params, 0, sizeof(*ttc_params));
+ ttc_params->ns = mlx5_get_flow_namespace(priv->mdev,
+ MLX5_FLOW_NAMESPACE_KERNEL);
ft_attr->level = MLX5E_INNER_TTC_FT_LEVEL;
ft_attr->prio = MLX5E_NIC_PRIO;
+
+ for (tt = 0; tt < MLX5_NUM_TT; tt++) {
+ ttc_params->dests[tt].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+ ttc_params->dests[tt].tir_num =
+ tt == MLX5_TT_ANY ?
+ mlx5e_rx_res_get_tirn_direct(priv->rx_res, 0) :
+ mlx5e_rx_res_get_tirn_rss_inner(priv->rx_res,
+ tt);
+ }
}
-void mlx5e_set_ttc_ft_params(struct ttc_params *ttc_params)
+void mlx5e_set_ttc_params(struct mlx5e_priv *priv,
+ struct ttc_params *ttc_params, bool tunnel)
{
struct mlx5_flow_table_attr *ft_attr = &ttc_params->ft_attr;
+ int tt;
- ft_attr->max_fte = MLX5E_TTC_TABLE_SIZE;
+ memset(ttc_params, 0, sizeof(*ttc_params));
+ ttc_params->ns = mlx5_get_flow_namespace(priv->mdev,
+ MLX5_FLOW_NAMESPACE_KERNEL);
ft_attr->level = MLX5E_TTC_FT_LEVEL;
ft_attr->prio = MLX5E_NIC_PRIO;
-}
-
-static int mlx5e_create_inner_ttc_table(struct mlx5e_priv *priv, struct ttc_params *params,
- struct mlx5e_ttc_table *ttc)
-{
- struct mlx5e_flow_table *ft = &ttc->ft;
- int err;
- ft->t = mlx5_create_flow_table(priv->fs.ns, ¶ms->ft_attr);
- if (IS_ERR(ft->t)) {
- err = PTR_ERR(ft->t);
- ft->t = NULL;
- return err;
+ for (tt = 0; tt < MLX5_NUM_TT; tt++) {
+ ttc_params->dests[tt].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+ ttc_params->dests[tt].tir_num =
+ tt == MLX5_TT_ANY ?
+ mlx5e_rx_res_get_tirn_direct(priv->rx_res, 0) :
+ mlx5e_rx_res_get_tirn_rss(priv->rx_res, tt);
}
- err = mlx5e_create_inner_ttc_table_groups(ttc);
- if (err)
- goto err;
-
- err = mlx5e_generate_inner_ttc_table_rules(priv, params, ttc);
- if (err)
- goto err;
-
- return 0;
-
-err:
- mlx5e_destroy_flow_table(ft);
- return err;
-}
-
-static void mlx5e_destroy_inner_ttc_table(struct mlx5e_priv *priv,
- struct mlx5e_ttc_table *ttc)
-{
- mlx5e_cleanup_ttc_rules(ttc);
- mlx5e_destroy_flow_table(&ttc->ft);
-}
-
-void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv,
- struct mlx5e_ttc_table *ttc)
-{
- mlx5e_cleanup_ttc_rules(ttc);
- mlx5e_destroy_flow_table(&ttc->ft);
-}
-
-int mlx5e_create_ttc_table(struct mlx5e_priv *priv, struct ttc_params *params,
- struct mlx5e_ttc_table *ttc)
-{
- bool match_ipv_outer = MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ft_field_support.outer_ip_version);
- struct mlx5e_flow_table *ft = &ttc->ft;
- int err;
+ ttc_params->inner_ttc = tunnel;
+ if (!tunnel || !mlx5_tunnel_inner_ft_supported(priv->mdev))
+ return;
- ft->t = mlx5_create_flow_table(priv->fs.ns, ¶ms->ft_attr);
- if (IS_ERR(ft->t)) {
- err = PTR_ERR(ft->t);
- ft->t = NULL;
- return err;
+ for (tt = 0; tt < MLX5_NUM_TUNNEL_TT; tt++) {
+ ttc_params->tunnel_dests[tt].type =
+ MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ ttc_params->tunnel_dests[tt].ft =
+ mlx5_get_ttc_flow_table(priv->fs.inner_ttc);
}
-
- err = mlx5e_create_ttc_table_groups(ttc, match_ipv_outer);
- if (err)
- goto err;
-
- err = mlx5e_generate_ttc_table_rules(priv, params, ttc);
- if (err)
- goto err;
-
- return 0;
-err:
- mlx5e_destroy_flow_table(ft);
- return err;
-}
-
-int mlx5e_ttc_fwd_dest(struct mlx5e_priv *priv, enum mlx5e_traffic_types type,
- struct mlx5_flow_destination *new_dest)
-{
- return mlx5_modify_rule_destination(priv->fs.ttc.rules[type].rule, new_dest, NULL);
-}
-
-struct mlx5_flow_destination
-mlx5e_ttc_get_default_dest(struct mlx5e_priv *priv, enum mlx5e_traffic_types type)
-{
- struct mlx5_flow_destination *dest = &priv->fs.ttc.rules[type].default_dest;
-
- WARN_ONCE(dest->type != MLX5_FLOW_DESTINATION_TYPE_TIR,
- "TTC[%d] default dest is not setup yet", type);
-
- return *dest;
-}
-
-int mlx5e_ttc_fwd_default_dest(struct mlx5e_priv *priv, enum mlx5e_traffic_types type)
-{
- struct mlx5_flow_destination dest = mlx5e_ttc_get_default_dest(priv, type);
-
- return mlx5e_ttc_fwd_dest(priv, type, &dest);
}
static void mlx5e_del_l2_flow_rule(struct mlx5e_priv *priv,
outer_headers.dmac_47_16);
dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
- dest.ft = priv->fs.ttc.ft.t;
+ dest.ft = mlx5_get_ttc_flow_table(priv->fs.ttc);
switch (type) {
case MLX5E_FULLMATCH:
kvfree(priv->fs.vlan);
}
-int mlx5e_create_flow_steering(struct mlx5e_priv *priv)
+static void mlx5e_destroy_inner_ttc_table(struct mlx5e_priv *priv)
+{
+ if (!mlx5_tunnel_inner_ft_supported(priv->mdev))
+ return;
+ mlx5_destroy_ttc_table(priv->fs.inner_ttc);
+}
+
+void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv)
+{
+ mlx5_destroy_ttc_table(priv->fs.ttc);
+}
+
+static int mlx5e_create_inner_ttc_table(struct mlx5e_priv *priv)
{
struct ttc_params ttc_params = {};
- int tt, err;
+
+ if (!mlx5_tunnel_inner_ft_supported(priv->mdev))
+ return 0;
+
+ mlx5e_set_inner_ttc_params(priv, &ttc_params);
+ priv->fs.inner_ttc = mlx5_create_ttc_table(priv->mdev, &ttc_params);
+ if (IS_ERR(priv->fs.inner_ttc))
+ return PTR_ERR(priv->fs.inner_ttc);
+ return 0;
+}
+
+int mlx5e_create_ttc_table(struct mlx5e_priv *priv)
+{
+ struct ttc_params ttc_params = {};
+
+ mlx5e_set_ttc_params(priv, &ttc_params, true);
+ priv->fs.ttc = mlx5_create_ttc_table(priv->mdev, &ttc_params);
+ if (IS_ERR(priv->fs.ttc))
+ return PTR_ERR(priv->fs.ttc);
+ return 0;
+}
+
+int mlx5e_create_flow_steering(struct mlx5e_priv *priv)
+{
+ int err;
priv->fs.ns = mlx5_get_flow_namespace(priv->mdev,
MLX5_FLOW_NAMESPACE_KERNEL);
priv->netdev->hw_features &= ~NETIF_F_NTUPLE;
}
- mlx5e_set_ttc_basic_params(priv, &ttc_params);
-
- if (mlx5e_tunnel_inner_ft_supported(priv->mdev)) {
- mlx5e_set_inner_ttc_ft_params(&ttc_params);
- for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
- ttc_params.indir_tirn[tt] = priv->rx_res->rss[tt].inner_indir_tir.tirn;
-
- err = mlx5e_create_inner_ttc_table(priv, &ttc_params, &priv->fs.inner_ttc);
- if (err) {
- netdev_err(priv->netdev, "Failed to create inner ttc table, err=%d\n",
- err);
- goto err_destroy_arfs_tables;
- }
+ err = mlx5e_create_inner_ttc_table(priv);
+ if (err) {
+ netdev_err(priv->netdev,
+ "Failed to create inner ttc table, err=%d\n",
+ err);
+ goto err_destroy_arfs_tables;
}
- mlx5e_set_ttc_ft_params(&ttc_params);
- for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
- ttc_params.indir_tirn[tt] = priv->rx_res->rss[tt].indir_tir.tirn;
-
- err = mlx5e_create_ttc_table(priv, &ttc_params, &priv->fs.ttc);
+ err = mlx5e_create_ttc_table(priv);
if (err) {
netdev_err(priv->netdev, "Failed to create ttc table, err=%d\n",
err);
err_destroy_l2_table:
mlx5e_destroy_l2_table(priv);
err_destroy_ttc_table:
- mlx5e_destroy_ttc_table(priv, &priv->fs.ttc);
+ mlx5e_destroy_ttc_table(priv);
err_destroy_inner_ttc_table:
- if (mlx5e_tunnel_inner_ft_supported(priv->mdev))
- mlx5e_destroy_inner_ttc_table(priv, &priv->fs.inner_ttc);
+ mlx5e_destroy_inner_ttc_table(priv);
err_destroy_arfs_tables:
mlx5e_arfs_destroy_tables(priv);
mlx5e_ptp_free_rx_fs(priv);
mlx5e_destroy_vlan_table(priv);
mlx5e_destroy_l2_table(priv);
- mlx5e_destroy_ttc_table(priv, &priv->fs.ttc);
- if (mlx5e_tunnel_inner_ft_supported(priv->mdev))
- mlx5e_destroy_inner_ttc_table(priv, &priv->fs.inner_ttc);
+ mlx5e_destroy_ttc_table(priv);
+ mlx5e_destroy_inner_ttc_table(priv);
mlx5e_arfs_destroy_tables(priv);
mlx5e_ethtool_cleanup_steering(priv);
}
dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR;
if (group == MLX5E_RQ_GROUP_XSK)
- dst->tir_num = priv->rx_res->channels[ix].xsk_tir.tirn;
+ dst->tir_num = mlx5e_rx_res_get_tirn_xsk(priv->rx_res, ix);
else
- dst->tir_num = priv->rx_res->channels[ix].direct_tir.tirn;
+ dst->tir_num = mlx5e_rx_res_get_tirn_direct(priv->rx_res, ix);
flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
}
INIT_LIST_HEAD(&priv->fs.ethtool.rules);
}
-static enum mlx5e_traffic_types flow_type_to_traffic_type(u32 flow_type)
+static int flow_type_to_traffic_type(u32 flow_type)
{
switch (flow_type) {
case TCP_V4_FLOW:
- return MLX5E_TT_IPV4_TCP;
+ return MLX5_TT_IPV4_TCP;
case TCP_V6_FLOW:
- return MLX5E_TT_IPV6_TCP;
+ return MLX5_TT_IPV6_TCP;
case UDP_V4_FLOW:
- return MLX5E_TT_IPV4_UDP;
+ return MLX5_TT_IPV4_UDP;
case UDP_V6_FLOW:
- return MLX5E_TT_IPV6_UDP;
+ return MLX5_TT_IPV6_UDP;
case AH_V4_FLOW:
- return MLX5E_TT_IPV4_IPSEC_AH;
+ return MLX5_TT_IPV4_IPSEC_AH;
case AH_V6_FLOW:
- return MLX5E_TT_IPV6_IPSEC_AH;
+ return MLX5_TT_IPV6_IPSEC_AH;
case ESP_V4_FLOW:
- return MLX5E_TT_IPV4_IPSEC_ESP;
+ return MLX5_TT_IPV4_IPSEC_ESP;
case ESP_V6_FLOW:
- return MLX5E_TT_IPV6_IPSEC_ESP;
+ return MLX5_TT_IPV6_IPSEC_ESP;
case IPV4_FLOW:
- return MLX5E_TT_IPV4;
+ return MLX5_TT_IPV4;
case IPV6_FLOW:
- return MLX5E_TT_IPV6;
+ return MLX5_TT_IPV6;
default:
- return MLX5E_NUM_INDIR_TIRS;
+ return -EINVAL;
}
}
static int mlx5e_set_rss_hash_opt(struct mlx5e_priv *priv,
struct ethtool_rxnfc *nfc)
{
- enum mlx5e_traffic_types tt;
u8 rx_hash_field = 0;
+ int err;
+ int tt;
tt = flow_type_to_traffic_type(nfc->flow_type);
- if (tt == MLX5E_NUM_INDIR_TIRS)
- return -EINVAL;
+ if (tt < 0)
+ return tt;
/* RSS does not support anything other than hashing to queues
* on src IP, dest IP, TCP/UDP src port and TCP/UDP dest
rx_hash_field |= MLX5_HASH_FIELD_SEL_L4_DPORT;
mutex_lock(&priv->state_lock);
-
- if (rx_hash_field == priv->rx_res->rss_params.rx_hash_fields[tt])
- goto out;
-
- priv->rx_res->rss_params.rx_hash_fields[tt] = rx_hash_field;
- mlx5e_modify_tirs_hash(priv);
-
-out:
+ err = mlx5e_rx_res_rss_set_hash_fields(priv->rx_res, tt, rx_hash_field);
mutex_unlock(&priv->state_lock);
- return 0;
+
+ return err;
}
static int mlx5e_get_rss_hash_opt(struct mlx5e_priv *priv,
struct ethtool_rxnfc *nfc)
{
- enum mlx5e_traffic_types tt;
u32 hash_field = 0;
+ int tt;
tt = flow_type_to_traffic_type(nfc->flow_type);
- if (tt == MLX5E_NUM_INDIR_TIRS)
- return -EINVAL;
+ if (tt < 0)
+ return tt;
- hash_field = priv->rx_res->rss_params.rx_hash_fields[tt];
+ hash_field = mlx5e_rx_res_rss_get_hash_fields(priv->rx_res, tt);
nfc->data = 0;
if (hash_field & MLX5_HASH_FIELD_SEL_SRC_IP)
(__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas));
MLX5_SET(cqc, cqc, cq_period_mode, param->cq_period_mode);
- MLX5_SET(cqc, cqc, c_eqn, eqn);
+ MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
MLX5_SET(cqc, cqc, uar_page, mdev->priv.uar->index);
MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift -
MLX5_ADAPTER_PAGE_SHIFT);
chs->num = 0;
}
-int mlx5e_create_indirect_rqt(struct mlx5e_priv *priv)
-{
- int err;
-
- err = mlx5e_rqt_init_direct(&priv->rx_res->indir_rqt, priv->mdev, true,
- priv->drop_rq.rqn);
- if (err)
- mlx5_core_warn(priv->mdev, "create indirect rqts failed, %d\n", err);
- return err;
-}
-
-int mlx5e_create_direct_rqts(struct mlx5e_priv *priv)
-{
- int err;
- int ix;
-
- for (ix = 0; ix < priv->max_nch; ix++) {
- err = mlx5e_rqt_init_direct(&priv->rx_res->channels[ix].direct_rqt,
- priv->mdev, false, priv->drop_rq.rqn);
- if (unlikely(err))
- goto err_destroy_rqts;
- }
-
- return 0;
-
-err_destroy_rqts:
- mlx5_core_warn(priv->mdev, "create direct rqts failed, %d\n", err);
- while (--ix >= 0)
- mlx5e_rqt_destroy(&priv->rx_res->channels[ix].direct_rqt);
-
- return err;
-}
-
-static int mlx5e_create_xsk_rqts(struct mlx5e_priv *priv)
-{
- int err;
- int ix;
-
- for (ix = 0; ix < priv->max_nch; ix++) {
- err = mlx5e_rqt_init_direct(&priv->rx_res->channels[ix].xsk_rqt,
- priv->mdev, false, priv->drop_rq.rqn);
- if (unlikely(err))
- goto err_destroy_rqts;
- }
-
- return 0;
-
-err_destroy_rqts:
- mlx5_core_warn(priv->mdev, "create xsk rqts failed, %d\n", err);
- while (--ix >= 0)
- mlx5e_rqt_destroy(&priv->rx_res->channels[ix].xsk_rqt);
-
- return err;
-}
-
-void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv)
-{
- unsigned int ix;
-
- for (ix = 0; ix < priv->max_nch; ix++)
- mlx5e_rqt_destroy(&priv->rx_res->channels[ix].direct_rqt);
-}
-
-static void mlx5e_destroy_xsk_rqts(struct mlx5e_priv *priv)
-{
- unsigned int ix;
-
- for (ix = 0; ix < priv->max_nch; ix++)
- mlx5e_rqt_destroy(&priv->rx_res->channels[ix].xsk_rqt);
-}
-
-static void mlx5e_redirect_rqts_to_channels(struct mlx5e_priv *priv,
- struct mlx5e_channels *chs)
-{
- struct mlx5e_rx_res *res = priv->rx_res;
- unsigned int ix;
- u32 *rqns;
-
- rqns = kvmalloc_array(chs->num, sizeof(*rqns), GFP_KERNEL);
- if (rqns) {
- for (ix = 0; ix < chs->num; ix++)
- rqns[ix] = chs->c[ix]->rq.rqn;
-
- mlx5e_rqt_redirect_indir(&res->indir_rqt, rqns, chs->num,
- res->rss_params.hash.hfunc,
- &res->rss_params.indir);
- kvfree(rqns);
- }
-
- for (ix = 0; ix < priv->max_nch; ix++) {
- u32 rqn = priv->drop_rq.rqn;
-
- if (ix < chs->num)
- rqn = chs->c[ix]->rq.rqn;
-
- mlx5e_rqt_redirect_direct(&res->channels[ix].direct_rqt, rqn);
- }
-
- if (priv->profile->rx_ptp_support) {
- u32 rqn;
-
- if (mlx5e_ptp_get_rqn(priv->channels.ptp, &rqn))
- rqn = priv->drop_rq.rqn;
-
- mlx5e_rqt_redirect_direct(&res->ptp.rqt, rqn);
- }
-}
-
-static void mlx5e_redirect_rqts_to_drop(struct mlx5e_priv *priv)
-{
- struct mlx5e_rx_res *res = priv->rx_res;
- unsigned int ix;
-
- mlx5e_rqt_redirect_direct(&res->indir_rqt, priv->drop_rq.rqn);
-
- for (ix = 0; ix < priv->max_nch; ix++)
- mlx5e_rqt_redirect_direct(&res->channels[ix].direct_rqt, priv->drop_rq.rqn);
-
- if (priv->profile->rx_ptp_support)
- mlx5e_rqt_redirect_direct(&res->ptp.rqt, priv->drop_rq.rqn);
-}
-
-int mlx5e_modify_tirs_hash(struct mlx5e_priv *priv)
-{
- struct mlx5e_rss_params_hash *rss_hash = &priv->rx_res->rss_params.hash;
- struct mlx5e_rss_params_traffic_type rss_tt;
- struct mlx5e_rx_res *res = priv->rx_res;
- struct mlx5e_tir_builder *builder;
- enum mlx5e_traffic_types tt;
-
- builder = mlx5e_tir_builder_alloc(true);
- if (!builder)
- return -ENOMEM;
-
- for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
- rss_tt = mlx5e_rx_res_rss_get_current_tt_config(res, tt);
- mlx5e_tir_builder_build_rss(builder, rss_hash, &rss_tt, false);
- mlx5e_tir_modify(&res->rss[tt].indir_tir, builder);
- mlx5e_tir_builder_clear(builder);
- }
-
- /* Verify inner tirs resources allocated */
- if (!res->rss[0].inner_indir_tir.tirn)
- goto out;
-
- for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
- rss_tt = mlx5e_rx_res_rss_get_current_tt_config(res, tt);
- mlx5e_tir_builder_build_rss(builder, rss_hash, &rss_tt, true);
- mlx5e_tir_modify(&res->rss[tt].indir_tir, builder);
- mlx5e_tir_builder_clear(builder);
- }
-
-out:
- mlx5e_tir_builder_free(builder);
- return 0;
-}
-
static int mlx5e_modify_tirs_lro(struct mlx5e_priv *priv)
{
struct mlx5e_rx_res *res = priv->rx_res;
- struct mlx5e_tir_builder *builder;
struct mlx5e_lro_param lro_param;
- enum mlx5e_traffic_types tt;
- int err;
- int ix;
-
- builder = mlx5e_tir_builder_alloc(true);
- if (!builder)
- return -ENOMEM;
lro_param = mlx5e_get_lro_param(&priv->channels.params);
- mlx5e_tir_builder_build_lro(builder, &lro_param);
-
- for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
- err = mlx5e_tir_modify(&res->rss[tt].indir_tir, builder);
- if (err)
- goto err_free_builder;
-
- /* Verify inner tirs resources allocated */
- if (!res->rss[0].inner_indir_tir.tirn)
- continue;
-
- err = mlx5e_tir_modify(&res->rss[tt].inner_indir_tir, builder);
- if (err)
- goto err_free_builder;
- }
-
- for (ix = 0; ix < priv->max_nch; ix++) {
- err = mlx5e_tir_modify(&res->channels[ix].direct_tir, builder);
- if (err)
- goto err_free_builder;
- }
-err_free_builder:
- mlx5e_tir_builder_free(builder);
- return err;
+ return mlx5e_rx_res_lro_set_param(res, &lro_param);
}
static MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(mlx5e_modify_tirs_lro);
/* This function may be called on attach, before priv->rx_res is created. */
if (!netif_is_rxfh_configured(priv->netdev) && priv->rx_res)
- mlx5e_build_default_indir_rqt(priv->rx_res->rss_params.indir.table,
- MLX5E_INDIR_RQT_SIZE, count);
+ mlx5e_rx_res_rss_set_indir_uniform(priv->rx_res, count);
return 0;
}
mlx5e_wait_channels_min_rx_wqes(&priv->channels);
- if (priv->rx_res) {
- mlx5e_redirect_rqts_to_channels(priv, &priv->channels);
- mlx5e_xsk_redirect_rqts_to_channels(priv, &priv->channels);
- }
+ if (priv->rx_res)
+ mlx5e_rx_res_channels_activate(priv->rx_res, &priv->channels);
}
void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv)
{
- if (priv->rx_res) {
- mlx5e_xsk_redirect_rqts_to_drop(priv, &priv->channels);
- mlx5e_redirect_rqts_to_drop(priv);
- }
+ if (priv->rx_res)
+ mlx5e_rx_res_channels_deactivate(priv->rx_res);
if (mlx5e_is_vport_rep(priv))
mlx5e_remove_sqs_fwd_rules(priv);
mlx5e_destroy_tises(priv);
}
-int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv, bool inner_ttc)
-{
- struct mlx5e_rss_params_hash *rss_hash = &priv->rx_res->rss_params.hash;
- bool inner_ft_support = priv->channels.params.tunneled_offload_en;
- struct mlx5e_rss_params_traffic_type rss_tt;
- struct mlx5e_rx_res *res = priv->rx_res;
- enum mlx5e_traffic_types tt, max_tt;
- struct mlx5e_tir_builder *builder;
- struct mlx5e_lro_param lro_param;
- u32 indir_rqtn;
- int err = 0;
-
- builder = mlx5e_tir_builder_alloc(false);
- if (!builder)
- return -ENOMEM;
-
- lro_param = mlx5e_get_lro_param(&priv->channels.params);
- indir_rqtn = mlx5e_rqt_get_rqtn(&res->indir_rqt);
-
- for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
- mlx5e_tir_builder_build_rqt(builder, priv->mdev->mlx5e_res.hw_objs.td.tdn,
- indir_rqtn, inner_ft_support);
- mlx5e_tir_builder_build_lro(builder, &lro_param);
- rss_tt = mlx5e_rx_res_rss_get_current_tt_config(res, tt);
- mlx5e_tir_builder_build_rss(builder, rss_hash, &rss_tt, false);
-
- err = mlx5e_tir_init(&res->rss[tt].indir_tir, builder, priv->mdev, true);
- if (err) {
- mlx5_core_warn(priv->mdev, "create indirect tirs failed, %d\n", err);
- goto err_destroy_tirs;
- }
-
- mlx5e_tir_builder_clear(builder);
- }
-
- if (!inner_ttc || !mlx5e_tunnel_inner_ft_supported(priv->mdev))
- goto out;
-
- for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
- mlx5e_tir_builder_build_rqt(builder, priv->mdev->mlx5e_res.hw_objs.td.tdn,
- indir_rqtn, inner_ft_support);
- mlx5e_tir_builder_build_lro(builder, &lro_param);
- rss_tt = mlx5e_rx_res_rss_get_current_tt_config(res, tt);
- mlx5e_tir_builder_build_rss(builder, rss_hash, &rss_tt, true);
-
- err = mlx5e_tir_init(&res->rss[tt].inner_indir_tir, builder, priv->mdev, true);
- if (err) {
- mlx5_core_warn(priv->mdev, "create inner indirect tirs failed, %d\n", err);
- goto err_destroy_inner_tirs;
- }
-
- mlx5e_tir_builder_clear(builder);
- }
-
- goto out;
-
-err_destroy_inner_tirs:
- max_tt = tt;
- for (tt = 0; tt < max_tt; tt++)
- mlx5e_tir_destroy(&res->rss[tt].inner_indir_tir);
-
- tt = MLX5E_NUM_INDIR_TIRS;
-err_destroy_tirs:
- max_tt = tt;
- for (tt = 0; tt < max_tt; tt++)
- mlx5e_tir_destroy(&res->rss[tt].indir_tir);
-
-out:
- mlx5e_tir_builder_free(builder);
-
- return err;
-}
-
-static int mlx5e_create_direct_tir(struct mlx5e_priv *priv, struct mlx5e_tir *tir,
- struct mlx5e_tir_builder *builder, struct mlx5e_rqt *rqt)
-{
- bool inner_ft_support = priv->channels.params.tunneled_offload_en;
- struct mlx5e_lro_param lro_param;
- int err = 0;
-
- lro_param = mlx5e_get_lro_param(&priv->channels.params);
-
- mlx5e_tir_builder_build_rqt(builder, priv->mdev->mlx5e_res.hw_objs.td.tdn,
- mlx5e_rqt_get_rqtn(rqt), inner_ft_support);
- mlx5e_tir_builder_build_lro(builder, &lro_param);
- mlx5e_tir_builder_build_direct(builder);
-
- err = mlx5e_tir_init(tir, builder, priv->mdev, true);
- if (unlikely(err))
- mlx5_core_warn(priv->mdev, "create tirs failed, %d\n", err);
-
- mlx5e_tir_builder_clear(builder);
-
- return err;
-}
-
-int mlx5e_create_direct_tirs(struct mlx5e_priv *priv)
-{
- struct mlx5e_rx_res *res = priv->rx_res;
- struct mlx5e_tir_builder *builder;
- int err = 0;
- int ix;
-
- builder = mlx5e_tir_builder_alloc(false);
- if (!builder)
- return -ENOMEM;
-
- for (ix = 0; ix < priv->max_nch; ix++) {
- err = mlx5e_create_direct_tir(priv, &res->channels[ix].direct_tir,
- builder, &res->channels[ix].direct_rqt);
- if (err)
- goto err_destroy_tirs;
- }
-
- goto out;
-
-err_destroy_tirs:
- while (--ix >= 0)
- mlx5e_tir_destroy(&res->channels[ix].direct_tir);
-
-out:
- mlx5e_tir_builder_free(builder);
-
- return err;
-}
-
-static int mlx5e_create_xsk_tirs(struct mlx5e_priv *priv)
-{
- struct mlx5e_rx_res *res = priv->rx_res;
- struct mlx5e_tir_builder *builder;
- int err;
- int ix;
-
- builder = mlx5e_tir_builder_alloc(false);
- if (!builder)
- return -ENOMEM;
-
- for (ix = 0; ix < priv->max_nch; ix++) {
- err = mlx5e_create_direct_tir(priv, &res->channels[ix].xsk_tir,
- builder, &res->channels[ix].xsk_rqt);
- if (err)
- goto err_destroy_tirs;
- }
-
- goto out;
-
-err_destroy_tirs:
- while (--ix >= 0)
- mlx5e_tir_destroy(&res->channels[ix].xsk_tir);
-
-out:
- mlx5e_tir_builder_free(builder);
-
- return err;
-}
-
-void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv)
-{
- struct mlx5e_rx_res *res = priv->rx_res;
- enum mlx5e_traffic_types tt;
-
- for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
- mlx5e_tir_destroy(&res->rss[tt].indir_tir);
-
- /* Verify inner tirs resources allocated */
- if (!res->rss[0].inner_indir_tir.tirn)
- return;
-
- for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
- mlx5e_tir_destroy(&res->rss[tt].inner_indir_tir);
-}
-
-void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv)
-{
- unsigned int ix;
-
- for (ix = 0; ix < priv->max_nch; ix++)
- mlx5e_tir_destroy(&priv->rx_res->channels[ix].direct_tir);
-}
-
-static void mlx5e_destroy_xsk_tirs(struct mlx5e_priv *priv)
-{
- unsigned int ix;
-
- for (ix = 0; ix < priv->max_nch; ix++)
- mlx5e_tir_destroy(&priv->rx_res->channels[ix].xsk_tir);
-}
-
static int mlx5e_modify_channels_scatter_fcs(struct mlx5e_channels *chs, bool enable)
{
int err = 0;
static int mlx5e_modify_channels_vsd(struct mlx5e_channels *chs, bool vsd)
{
- int err = 0;
+ int err;
int i;
for (i = 0; i < chs->num; i++) {
if (err)
return err;
}
+ if (chs->ptp && test_bit(MLX5E_PTP_STATE_RX, chs->ptp->state))
+ return mlx5e_modify_rq_vsd(&chs->ptp->rq, vsd);
return 0;
}
return 0;
}
+static netdev_features_t mlx5e_fix_uplink_rep_features(struct net_device *netdev,
+ netdev_features_t features)
+{
+ features &= ~NETIF_F_HW_TLS_RX;
+ if (netdev->features & NETIF_F_HW_TLS_RX)
+ netdev_warn(netdev, "Disabling hw_tls_rx, not supported in switchdev mode\n");
+
+ features &= ~NETIF_F_HW_TLS_TX;
+ if (netdev->features & NETIF_F_HW_TLS_TX)
+ netdev_warn(netdev, "Disabling hw_tls_tx, not supported in switchdev mode\n");
+
+ features &= ~NETIF_F_NTUPLE;
+ if (netdev->features & NETIF_F_NTUPLE)
+ netdev_warn(netdev, "Disabling ntuple, not supported in switchdev mode\n");
+
+ return features;
+}
+
static netdev_features_t mlx5e_fix_features(struct net_device *netdev,
netdev_features_t features)
{
netdev_warn(netdev, "Disabling rxhash, not supported when CQE compress is active\n");
}
- if (mlx5e_is_uplink_rep(priv)) {
- features &= ~NETIF_F_HW_TLS_RX;
- if (netdev->features & NETIF_F_HW_TLS_RX)
- netdev_warn(netdev, "Disabling hw_tls_rx, not supported in switchdev mode\n");
-
- features &= ~NETIF_F_HW_TLS_TX;
- if (netdev->features & NETIF_F_HW_TLS_TX)
- netdev_warn(netdev, "Disabling hw_tls_tx, not supported in switchdev mode\n");
- }
+ if (mlx5e_is_uplink_rep(priv))
+ features = mlx5e_fix_uplink_rep_features(netdev, features);
mutex_unlock(&priv->state_lock);
.ndo_get_devlink_port = mlx5e_get_devlink_port,
};
-void mlx5e_build_default_indir_rqt(u32 *indirection_rqt, int len,
- int num_channels)
-{
- int i;
-
- for (i = 0; i < len; i++)
- indirection_rqt[i] = i % num_channels;
-}
-
static u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout)
{
int i;
return MLX5_CAP_ETH(mdev, lro_timer_supported_periods[i]);
}
-void mlx5e_build_rss_params(struct mlx5e_rss_params *rss_params,
- u16 num_channels)
-{
- enum mlx5e_traffic_types tt;
-
- rss_params->hash.hfunc = ETH_RSS_HASH_TOP;
- netdev_rss_key_fill(rss_params->hash.toeplitz_hash_key,
- sizeof(rss_params->hash.toeplitz_hash_key));
- mlx5e_build_default_indir_rqt(rss_params->indir.table,
- MLX5E_INDIR_RQT_SIZE, num_channels);
- for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
- rss_params->rx_hash_fields[tt] =
- mlx5e_rss_get_default_tt_config(tt).rx_hash_fields;
-}
-
void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 mtu)
{
struct mlx5e_params *params = &priv->channels.params;
/* TX inline */
mlx5_query_min_inline(mdev, ¶ms->tx_min_inline_mode);
- params->tunneled_offload_en = mlx5e_tunnel_inner_ft_supported(mdev);
+ params->tunneled_offload_en = mlx5_tunnel_inner_ft_supported(mdev);
/* AF_XDP */
params->xsk = xsk;
{
int tt;
- for (tt = 0; tt < MLX5E_NUM_TUNNEL_TT; tt++) {
- if (mlx5e_tunnel_proto_supported_tx(mdev, mlx5e_get_proto_by_tunnel_type(tt)))
+ for (tt = 0; tt < MLX5_NUM_TUNNEL_TT; tt++) {
+ if (mlx5e_tunnel_proto_supported_tx(mdev, mlx5_get_proto_by_tunnel_type(tt)))
return true;
}
return (mlx5_vxlan_allowed(mdev->vxlan) || mlx5_geneve_tx_allowed(mdev));
if (MLX5_CAP_ETH(mdev, scatter_fcs))
netdev->hw_features |= NETIF_F_RXFCS;
+ if (mlx5_qos_is_supported(mdev))
+ netdev->hw_features |= NETIF_F_HW_TC;
+
netdev->features = netdev->hw_features;
/* Defaults */
netdev->hw_features |= NETIF_F_NTUPLE;
#endif
}
- if (mlx5_qos_is_supported(mdev))
- netdev->features |= NETIF_F_HW_TC;
netdev->features |= NETIF_F_HIGHDMA;
netdev->features |= NETIF_F_HW_VLAN_STAG_FILTER;
static int mlx5e_init_nic_rx(struct mlx5e_priv *priv)
{
struct mlx5_core_dev *mdev = priv->mdev;
- struct mlx5e_tir_builder *tir_builder;
+ enum mlx5e_rx_res_features features;
+ struct mlx5e_lro_param lro_param;
int err;
- priv->rx_res = kvzalloc(sizeof(*priv->rx_res), GFP_KERNEL);
+ priv->rx_res = mlx5e_rx_res_alloc();
if (!priv->rx_res)
return -ENOMEM;
- mlx5e_build_rss_params(&priv->rx_res->rss_params, priv->channels.params.num_channels);
-
mlx5e_create_q_counters(priv);
err = mlx5e_open_drop_rq(priv, &priv->drop_rq);
goto err_destroy_q_counters;
}
- err = mlx5e_create_indirect_rqt(priv);
+ features = MLX5E_RX_RES_FEATURE_XSK | MLX5E_RX_RES_FEATURE_PTP;
+ if (priv->channels.params.tunneled_offload_en)
+ features |= MLX5E_RX_RES_FEATURE_INNER_FT;
+ lro_param = mlx5e_get_lro_param(&priv->channels.params);
+ err = mlx5e_rx_res_init(priv->rx_res, priv->mdev, features,
+ priv->max_nch, priv->drop_rq.rqn, &lro_param,
+ priv->channels.params.num_channels);
if (err)
goto err_close_drop_rq;
- err = mlx5e_create_direct_rqts(priv);
- if (err)
- goto err_destroy_indirect_rqts;
-
- err = mlx5e_create_indirect_tirs(priv, true);
- if (err)
- goto err_destroy_direct_rqts;
-
- err = mlx5e_create_direct_tirs(priv);
- if (err)
- goto err_destroy_indirect_tirs;
-
- err = mlx5e_create_xsk_rqts(priv);
- if (unlikely(err))
- goto err_destroy_direct_tirs;
-
- err = mlx5e_create_xsk_tirs(priv);
- if (unlikely(err))
- goto err_destroy_xsk_rqts;
-
- err = mlx5e_rqt_init_direct(&priv->rx_res->ptp.rqt, priv->mdev, false,
- priv->drop_rq.rqn);
- if (err)
- goto err_destroy_xsk_tirs;
-
- tir_builder = mlx5e_tir_builder_alloc(false);
- if (!tir_builder) {
- err = -ENOMEM;
- goto err_destroy_ptp_rqt;
- }
- err = mlx5e_create_direct_tir(priv, &priv->rx_res->ptp.tir, tir_builder,
- &priv->rx_res->ptp.rqt);
- mlx5e_tir_builder_free(tir_builder);
- if (err)
- goto err_destroy_ptp_rqt;
-
err = mlx5e_create_flow_steering(priv);
if (err) {
mlx5_core_warn(mdev, "create flow steering failed, %d\n", err);
- goto err_destroy_ptp_direct_tir;
+ goto err_destroy_rx_res;
}
err = mlx5e_tc_nic_init(priv);
mlx5e_tc_nic_cleanup(priv);
err_destroy_flow_steering:
mlx5e_destroy_flow_steering(priv);
-err_destroy_ptp_direct_tir:
- mlx5e_tir_destroy(&priv->rx_res->ptp.tir);
-err_destroy_ptp_rqt:
- mlx5e_rqt_destroy(&priv->rx_res->ptp.rqt);
-err_destroy_xsk_tirs:
- mlx5e_destroy_xsk_tirs(priv);
-err_destroy_xsk_rqts:
- mlx5e_destroy_xsk_rqts(priv);
-err_destroy_direct_tirs:
- mlx5e_destroy_direct_tirs(priv);
-err_destroy_indirect_tirs:
- mlx5e_destroy_indirect_tirs(priv);
-err_destroy_direct_rqts:
- mlx5e_destroy_direct_rqts(priv);
-err_destroy_indirect_rqts:
- mlx5e_rqt_destroy(&priv->rx_res->indir_rqt);
+err_destroy_rx_res:
+ mlx5e_rx_res_destroy(priv->rx_res);
err_close_drop_rq:
mlx5e_close_drop_rq(&priv->drop_rq);
err_destroy_q_counters:
mlx5e_destroy_q_counters(priv);
- kvfree(priv->rx_res);
+ mlx5e_rx_res_free(priv->rx_res);
priv->rx_res = NULL;
return err;
}
mlx5e_accel_cleanup_rx(priv);
mlx5e_tc_nic_cleanup(priv);
mlx5e_destroy_flow_steering(priv);
- mlx5e_tir_destroy(&priv->rx_res->ptp.tir);
- mlx5e_rqt_destroy(&priv->rx_res->ptp.rqt);
- mlx5e_destroy_xsk_tirs(priv);
- mlx5e_destroy_xsk_rqts(priv);
- mlx5e_destroy_direct_tirs(priv);
- mlx5e_destroy_indirect_tirs(priv);
- mlx5e_destroy_direct_rqts(priv);
- mlx5e_rqt_destroy(&priv->rx_res->indir_rqt);
+ mlx5e_rx_res_destroy(priv->rx_res);
mlx5e_close_drop_rq(&priv->drop_rq);
mlx5e_destroy_q_counters(priv);
- kvfree(priv->rx_res);
+ mlx5e_rx_res_free(priv->rx_res);
priv->rx_res = NULL;
}
#include "en/devlink.h"
#include "fs_core.h"
#include "lib/mlx5.h"
+#include "lib/devcom.h"
#define CREATE_TRACE_POINTS
#include "diag/en_rep_tracepoint.h"
#include "en_accel/ipsec.h"
rpriv = mlx5e_rep_to_rep_priv(rep);
list_for_each_entry_safe(rep_sq, tmp, &rpriv->vport_sqs_list, list) {
mlx5_eswitch_del_send_to_vport_rule(rep_sq->send_to_vport_rule);
+ if (rep_sq->send_to_vport_rule_peer)
+ mlx5_eswitch_del_send_to_vport_rule(rep_sq->send_to_vport_rule_peer);
list_del(&rep_sq->list);
kfree(rep_sq);
}
struct mlx5_eswitch_rep *rep,
u32 *sqns_array, int sqns_num)
{
+ struct mlx5_eswitch *peer_esw = NULL;
struct mlx5_flow_handle *flow_rule;
struct mlx5e_rep_priv *rpriv;
struct mlx5e_rep_sq *rep_sq;
return 0;
rpriv = mlx5e_rep_to_rep_priv(rep);
+ if (mlx5_devcom_is_paired(esw->dev->priv.devcom, MLX5_DEVCOM_ESW_OFFLOADS))
+ peer_esw = mlx5_devcom_get_peer_data(esw->dev->priv.devcom,
+ MLX5_DEVCOM_ESW_OFFLOADS);
+
for (i = 0; i < sqns_num; i++) {
rep_sq = kzalloc(sizeof(*rep_sq), GFP_KERNEL);
if (!rep_sq) {
}
/* Add re-inject rule to the PF/representor sqs */
- flow_rule = mlx5_eswitch_add_send_to_vport_rule(esw, rep,
+ flow_rule = mlx5_eswitch_add_send_to_vport_rule(esw, esw, rep,
sqns_array[i]);
if (IS_ERR(flow_rule)) {
err = PTR_ERR(flow_rule);
goto out_err;
}
rep_sq->send_to_vport_rule = flow_rule;
+ rep_sq->sqn = sqns_array[i];
+
+ if (peer_esw) {
+ flow_rule = mlx5_eswitch_add_send_to_vport_rule(peer_esw, esw,
+ rep, sqns_array[i]);
+ if (IS_ERR(flow_rule)) {
+ err = PTR_ERR(flow_rule);
+ mlx5_eswitch_del_send_to_vport_rule(rep_sq->send_to_vport_rule);
+ kfree(rep_sq);
+ goto out_err;
+ }
+ rep_sq->send_to_vport_rule_peer = flow_rule;
+ }
+
list_add(&rep_sq->list, &rpriv->vport_sqs_list);
}
+
+ if (peer_esw)
+ mlx5_devcom_release_peer_data(esw->dev->priv.devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+
return 0;
out_err:
mlx5e_sqs2vport_stop(esw, rep);
+
+ if (peer_esw)
+ mlx5_devcom_release_peer_data(esw->dev->priv.devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+
return err;
}
{
struct mlx5e_rep_priv *rpriv = priv->ppriv;
struct mlx5_eswitch_rep *rep = rpriv->rep;
- struct mlx5e_rx_res *res = priv->rx_res;
struct ttc_params ttc_params = {};
- int tt, err;
+ int err;
priv->fs.ns = mlx5_get_flow_namespace(priv->mdev,
MLX5_FLOW_NAMESPACE_KERNEL);
/* The inner_ttc in the ttc params is intentionally not set */
- ttc_params.any_tt_tirn = res->channels[0].direct_tir.tirn;
- mlx5e_set_ttc_ft_params(&ttc_params);
+ mlx5e_set_ttc_params(priv, &ttc_params, false);
if (rep->vport != MLX5_VPORT_UPLINK)
/* To give uplik rep TTC a lower level for chaining from root ft */
ttc_params.ft_attr.level = MLX5E_TTC_FT_LEVEL + 1;
- for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
- ttc_params.indir_tirn[tt] = res->rss[tt].indir_tir.tirn;
-
- err = mlx5e_create_ttc_table(priv, &ttc_params, &priv->fs.ttc);
- if (err) {
- netdev_err(priv->netdev, "Failed to create rep ttc table, err=%d\n", err);
+ priv->fs.ttc = mlx5_create_ttc_table(priv->mdev, &ttc_params);
+ if (IS_ERR(priv->fs.ttc)) {
+ err = PTR_ERR(priv->fs.ttc);
+ netdev_err(priv->netdev, "Failed to create rep ttc table, err=%d\n",
+ err);
return err;
}
return 0;
/* non uplik reps will skip any bypass tables and go directly to
* their own ttc
*/
- rpriv->root_ft = priv->fs.ttc.ft.t;
+ rpriv->root_ft = mlx5_get_ttc_flow_table(priv->fs.ttc);
return 0;
}
static int mlx5e_init_rep_rx(struct mlx5e_priv *priv)
{
struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5e_lro_param lro_param;
int err;
- priv->rx_res = kvzalloc(sizeof(*priv->rx_res), GFP_KERNEL);
+ priv->rx_res = mlx5e_rx_res_alloc();
if (!priv->rx_res)
return -ENOMEM;
- mlx5e_build_rss_params(&priv->rx_res->rss_params, priv->channels.params.num_channels);
-
mlx5e_init_l2_addr(priv);
err = mlx5e_open_drop_rq(priv, &priv->drop_rq);
return err;
}
- err = mlx5e_create_indirect_rqt(priv);
+ lro_param = mlx5e_get_lro_param(&priv->channels.params);
+ err = mlx5e_rx_res_init(priv->rx_res, priv->mdev, 0,
+ priv->max_nch, priv->drop_rq.rqn, &lro_param,
+ priv->channels.params.num_channels);
if (err)
goto err_close_drop_rq;
- err = mlx5e_create_direct_rqts(priv);
- if (err)
- goto err_destroy_indirect_rqts;
-
- err = mlx5e_create_indirect_tirs(priv, false);
- if (err)
- goto err_destroy_direct_rqts;
-
- err = mlx5e_create_direct_tirs(priv);
- if (err)
- goto err_destroy_indirect_tirs;
-
err = mlx5e_create_rep_ttc_table(priv);
if (err)
- goto err_destroy_direct_tirs;
+ goto err_destroy_rx_res;
err = mlx5e_create_rep_root_ft(priv);
if (err)
err_destroy_root_ft:
mlx5e_destroy_rep_root_ft(priv);
err_destroy_ttc_table:
- mlx5e_destroy_ttc_table(priv, &priv->fs.ttc);
-err_destroy_direct_tirs:
- mlx5e_destroy_direct_tirs(priv);
-err_destroy_indirect_tirs:
- mlx5e_destroy_indirect_tirs(priv);
-err_destroy_direct_rqts:
- mlx5e_destroy_direct_rqts(priv);
-err_destroy_indirect_rqts:
- mlx5e_rqt_destroy(&priv->rx_res->indir_rqt);
+ mlx5_destroy_ttc_table(priv->fs.ttc);
+err_destroy_rx_res:
+ mlx5e_rx_res_destroy(priv->rx_res);
err_close_drop_rq:
mlx5e_close_drop_rq(&priv->drop_rq);
- kvfree(priv->rx_res);
+ mlx5e_rx_res_free(priv->rx_res);
priv->rx_res = NULL;
return err;
}
mlx5e_ethtool_cleanup_steering(priv);
rep_vport_rx_rule_destroy(priv);
mlx5e_destroy_rep_root_ft(priv);
- mlx5e_destroy_ttc_table(priv, &priv->fs.ttc);
- mlx5e_destroy_direct_tirs(priv);
- mlx5e_destroy_indirect_tirs(priv);
- mlx5e_destroy_direct_rqts(priv);
- mlx5e_rqt_destroy(&priv->rx_res->indir_rqt);
+ mlx5_destroy_ttc_table(priv->fs.ttc);
+ mlx5e_rx_res_destroy(priv->rx_res);
mlx5e_close_drop_rq(&priv->drop_rq);
- kvfree(priv->rx_res);
+ mlx5e_rx_res_free(priv->rx_res);
priv->rx_res = NULL;
}
return rpriv->netdev;
}
+static void mlx5e_vport_rep_event_unpair(struct mlx5_eswitch_rep *rep)
+{
+ struct mlx5e_rep_priv *rpriv;
+ struct mlx5e_rep_sq *rep_sq;
+
+ rpriv = mlx5e_rep_to_rep_priv(rep);
+ list_for_each_entry(rep_sq, &rpriv->vport_sqs_list, list) {
+ if (!rep_sq->send_to_vport_rule_peer)
+ continue;
+ mlx5_eswitch_del_send_to_vport_rule(rep_sq->send_to_vport_rule_peer);
+ rep_sq->send_to_vport_rule_peer = NULL;
+ }
+}
+
+static int mlx5e_vport_rep_event_pair(struct mlx5_eswitch *esw,
+ struct mlx5_eswitch_rep *rep,
+ struct mlx5_eswitch *peer_esw)
+{
+ struct mlx5_flow_handle *flow_rule;
+ struct mlx5e_rep_priv *rpriv;
+ struct mlx5e_rep_sq *rep_sq;
+
+ rpriv = mlx5e_rep_to_rep_priv(rep);
+ list_for_each_entry(rep_sq, &rpriv->vport_sqs_list, list) {
+ if (rep_sq->send_to_vport_rule_peer)
+ continue;
+ flow_rule = mlx5_eswitch_add_send_to_vport_rule(peer_esw, esw, rep, rep_sq->sqn);
+ if (IS_ERR(flow_rule))
+ goto err_out;
+ rep_sq->send_to_vport_rule_peer = flow_rule;
+ }
+
+ return 0;
+err_out:
+ mlx5e_vport_rep_event_unpair(rep);
+ return PTR_ERR(flow_rule);
+}
+
+static int mlx5e_vport_rep_event(struct mlx5_eswitch *esw,
+ struct mlx5_eswitch_rep *rep,
+ enum mlx5_switchdev_event event,
+ void *data)
+{
+ int err = 0;
+
+ if (event == MLX5_SWITCHDEV_EVENT_PAIR)
+ err = mlx5e_vport_rep_event_pair(esw, rep, data);
+ else if (event == MLX5_SWITCHDEV_EVENT_UNPAIR)
+ mlx5e_vport_rep_event_unpair(rep);
+
+ return err;
+}
+
static const struct mlx5_eswitch_rep_ops rep_ops = {
.load = mlx5e_vport_rep_load,
.unload = mlx5e_vport_rep_unload,
- .get_proto_dev = mlx5e_vport_rep_get_proto_dev
+ .get_proto_dev = mlx5e_vport_rep_get_proto_dev,
+ .event = mlx5e_vport_rep_event,
};
static int mlx5e_rep_probe(struct auxiliary_device *adev,
struct mlx5e_rep_sq {
struct mlx5_flow_handle *send_to_vport_rule;
+ struct mlx5_flow_handle *send_to_vport_rule_peer;
+ u32 sqn;
struct list_head list;
};
#include <net/flow_offload.h>
#include <net/sch_generic.h>
#include <net/pkt_cls.h>
-#include <net/tc_act/tc_gact.h>
-#include <net/tc_act/tc_skbedit.h>
#include <linux/mlx5/fs.h>
#include <linux/mlx5/device.h>
#include <linux/rhashtable.h>
#include <linux/refcount.h>
#include <linux/completion.h>
-#include <net/tc_act/tc_mirred.h>
-#include <net/tc_act/tc_vlan.h>
-#include <net/tc_act/tc_tunnel_key.h>
#include <net/tc_act/tc_pedit.h>
#include <net/tc_act/tc_csum.h>
-#include <net/tc_act/tc_mpls.h>
#include <net/psample.h>
#include <net/arp.h>
#include <net/ipv6_stubs.h>
int num_channels;
struct mlx5e_rqt indir_rqt;
struct mlx5e_tir indir_tir[MLX5E_NUM_INDIR_TIRS];
- struct mlx5e_ttc_table ttc;
+ struct mlx5_ttc_table *ttc;
};
struct mlx5e_hairpin_entry {
static
struct mlx5_core_dev *mlx5e_hairpin_get_mdev(struct net *net, int ifindex)
{
+ struct mlx5_core_dev *mdev;
struct net_device *netdev;
struct mlx5e_priv *priv;
- netdev = __dev_get_by_index(net, ifindex);
+ netdev = dev_get_by_index(net, ifindex);
+ if (!netdev)
+ return ERR_PTR(-ENODEV);
+
priv = netdev_priv(netdev);
- return priv->mdev;
+ mdev = priv->mdev;
+ dev_put(netdev);
+
+ /* Mirred tc action holds a refcount on the ifindex net_device (see
+ * net/sched/act_mirred.c:tcf_mirred_get_dev). So, it's okay to continue using mdev
+ * after dev_put(netdev), while we're in the context of adding a tc flow.
+ *
+ * The mdev pointer corresponds to the peer/out net_device of a hairpin. It is then
+ * stored in a hairpin object, which exists until all flows, that refer to it, get
+ * removed.
+ *
+ * On the other hand, after a hairpin object has been created, the peer net_device may
+ * be removed/unbound while there are still some hairpin flows that are using it. This
+ * case is handled by mlx5e_tc_hairpin_update_dead_peer, which is hooked to
+ * NETDEV_UNREGISTER event of the peer net_device.
+ */
+ return mdev;
}
static int mlx5e_hairpin_create_transport(struct mlx5e_hairpin *hp)
if (!indir)
return -ENOMEM;
- mlx5e_build_default_indir_rqt(indir->table, MLX5E_INDIR_RQT_SIZE, hp->num_channels);
+ mlx5e_rss_params_indir_init_uniform(indir, hp->num_channels);
err = mlx5e_rqt_init_indir(&hp->indir_rqt, mdev, hp->pair->rqn, hp->num_channels,
- priv->rx_res->rss_params.hash.hfunc, indir);
+ mlx5e_rx_res_get_current_hash(priv->rx_res).hfunc,
+ indir);
kvfree(indir);
return err;
static int mlx5e_hairpin_create_indirect_tirs(struct mlx5e_hairpin *hp)
{
struct mlx5e_priv *priv = hp->func_priv;
- struct mlx5e_rss_params_hash *rss_hash;
- enum mlx5e_traffic_types tt, max_tt;
+ struct mlx5e_rss_params_hash rss_hash;
+ enum mlx5_traffic_types tt, max_tt;
struct mlx5e_tir_builder *builder;
int err = 0;
if (!builder)
return -ENOMEM;
- rss_hash = &priv->rx_res->rss_params.hash;
+ rss_hash = mlx5e_rx_res_get_current_hash(priv->rx_res);
for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
struct mlx5e_rss_params_traffic_type rss_tt;
mlx5e_tir_builder_build_rqt(builder, hp->tdn,
mlx5e_rqt_get_rqtn(&hp->indir_rqt),
false);
- mlx5e_tir_builder_build_rss(builder, rss_hash, &rss_tt, false);
+ mlx5e_tir_builder_build_rss(builder, &rss_hash, &rss_tt, false);
err = mlx5e_tir_init(&hp->indir_tir[tt], builder, hp->func_mdev, false);
if (err) {
memset(ttc_params, 0, sizeof(*ttc_params));
- ttc_params->any_tt_tirn = mlx5e_tir_get_tirn(&hp->direct_tir);
-
- for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
- ttc_params->indir_tirn[tt] = mlx5e_tir_get_tirn(&hp->indir_tir[tt]);
+ ttc_params->ns = mlx5_get_flow_namespace(hp->func_mdev,
+ MLX5_FLOW_NAMESPACE_KERNEL);
+ for (tt = 0; tt < MLX5_NUM_TT; tt++) {
+ ttc_params->dests[tt].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+ ttc_params->dests[tt].tir_num =
+ tt == MLX5_TT_ANY ?
+ mlx5e_tir_get_tirn(&hp->direct_tir) :
+ mlx5e_tir_get_tirn(&hp->indir_tir[tt]);
+ }
- ft_attr->max_fte = MLX5E_TTC_TABLE_SIZE;
ft_attr->level = MLX5E_TC_TTC_FT_LEVEL;
ft_attr->prio = MLX5E_TC_PRIO;
}
goto err_create_indirect_tirs;
mlx5e_hairpin_set_ttc_params(hp, &ttc_params);
- err = mlx5e_create_ttc_table(priv, &ttc_params, &hp->ttc);
- if (err)
+ hp->ttc = mlx5_create_ttc_table(priv->mdev, &ttc_params);
+ if (IS_ERR(hp->ttc)) {
+ err = PTR_ERR(hp->ttc);
goto err_create_ttc_table;
+ }
netdev_dbg(priv->netdev, "add hairpin: using %d channels rss ttc table id %x\n",
- hp->num_channels, hp->ttc.ft.t->id);
+ hp->num_channels,
+ mlx5_get_ttc_flow_table(priv->fs.ttc)->id);
return 0;
static void mlx5e_hairpin_rss_cleanup(struct mlx5e_hairpin *hp)
{
- struct mlx5e_priv *priv = hp->func_priv;
-
- mlx5e_destroy_ttc_table(priv, &hp->ttc);
+ mlx5_destroy_ttc_table(hp->ttc);
mlx5e_hairpin_destroy_indirect_tirs(hp);
mlx5e_rqt_destroy(&hp->indir_rqt);
}
func_mdev = priv->mdev;
peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex);
+ if (IS_ERR(peer_mdev)) {
+ err = PTR_ERR(peer_mdev);
+ goto create_pair_err;
+ }
pair = mlx5_core_hairpin_create(func_mdev, peer_mdev, params);
if (IS_ERR(pair)) {
int err;
peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex);
+ if (IS_ERR(peer_mdev)) {
+ NL_SET_ERR_MSG_MOD(extack, "invalid ifindex of mirred device");
+ return PTR_ERR(peer_mdev);
+ }
+
if (!MLX5_CAP_GEN(priv->mdev, hairpin) || !MLX5_CAP_GEN(peer_mdev, hairpin)) {
NL_SET_ERR_MSG_MOD(extack, "hairpin is not supported");
return -EOPNOTSUPP;
attach_flow:
if (hpe->hp->num_channels > 1) {
flow_flag_set(flow, HAIRPIN_RSS);
- flow->attr->nic_attr->hairpin_ft = hpe->hp->ttc.ft.t;
+ flow->attr->nic_attr->hairpin_ft =
+ mlx5_get_ttc_flow_table(hpe->hp->ttc);
} else {
flow->attr->nic_attr->hairpin_tirn = mlx5e_tir_get_tirn(&hpe->hp->direct_tir);
}
static int
mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
- struct mlx5e_tc_flow_parse_attr *parse_attr,
struct mlx5e_tc_flow *flow,
struct netlink_ext_ack *extack)
{
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
struct mlx5_flow_attr *attr = flow->attr;
struct mlx5_core_dev *dev = priv->mdev;
- struct mlx5_fc *counter = NULL;
+ struct mlx5_fc *counter;
int err;
+ parse_attr = attr->parse_attr;
+
if (flow_flag_test(flow, HAIRPIN)) {
err = mlx5e_hairpin_flow_add(priv, flow, parse_attr, extack);
if (err)
bool vf_tun = false, encap_valid = true;
struct net_device *encap_dev = NULL;
struct mlx5_esw_flow_attr *esw_attr;
- struct mlx5_fc *counter = NULL;
struct mlx5e_rep_priv *rpriv;
struct mlx5e_priv *out_priv;
+ struct mlx5_fc *counter;
u32 max_prio, max_chain;
int err = 0;
int out_index;
static int parse_tc_nic_actions(struct mlx5e_priv *priv,
struct flow_action *flow_action,
- struct mlx5e_tc_flow_parse_attr *parse_attr,
struct mlx5e_tc_flow *flow,
struct netlink_ext_ack *extack)
{
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
struct mlx5_flow_attr *attr = flow->attr;
struct pedit_headers_action hdrs[2] = {};
const struct flow_action_entry *act;
return -EOPNOTSUPP;
nic_attr = attr->nic_attr;
-
nic_attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
+ parse_attr = attr->parse_attr;
flow_action_for_each(i, act, flow_action) {
switch (act->id) {
MLX5_FLOW_CONTEXT_ACTION_COUNT;
break;
case FLOW_ACTION_DROP:
- action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
- if (MLX5_CAP_FLOWTABLE(priv->mdev,
- flow_table_properties_nic_receive.flow_counter))
- action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ action |= MLX5_FLOW_CONTEXT_ACTION_DROP |
+ MLX5_FLOW_CONTEXT_ACTION_COUNT;
break;
case FLOW_ACTION_MANGLE:
case FLOW_ACTION_ADD:
"device is not on same HW, can't offload");
netdev_warn(priv->netdev, "device %s not on same HW, can't offload\n",
peer_dev->name);
- return -EINVAL;
+ return -EOPNOTSUPP;
}
}
break;
if (mark & ~MLX5E_TC_FLOW_ID_MASK) {
NL_SET_ERR_MSG_MOD(extack,
"Bad flow mark - only 16 bit is supported");
- return -EINVAL;
+ return -EOPNOTSUPP;
}
nic_attr->flow_tag = mark;
static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
struct flow_action *flow_action,
struct mlx5e_tc_flow *flow,
- struct netlink_ext_ack *extack,
- struct net_device *filter_dev)
+ struct netlink_ext_ack *extack)
{
struct pedit_headers_action hdrs[2] = {};
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
"mpls pop supported only as first action");
return -EOPNOTSUPP;
}
- if (!netif_is_bareudp(filter_dev)) {
+ if (!netif_is_bareudp(parse_attr->filter_dev)) {
NL_SET_ERR_MSG_MOD(extack,
"mpls pop supported only on bareudp devices");
return -EOPNOTSUPP;
"devices %s %s not on same switch HW, can't offload forwarding\n",
priv->netdev->name,
out_dev->name);
- return -EINVAL;
+ return -EOPNOTSUPP;
}
}
break;
if (err)
goto err_free;
- err = parse_tc_fdb_actions(priv, &rule->action, flow, extack, filter_dev);
+ err = parse_tc_fdb_actions(priv, &rule->action, flow, extack);
if (err)
goto err_free;
if (err)
goto err_free;
- err = parse_tc_nic_actions(priv, &rule->action, parse_attr, flow, extack);
+ err = parse_tc_nic_actions(priv, &rule->action, flow, extack);
if (err)
goto err_free;
- err = mlx5e_tc_add_nic_flow(priv, parse_attr, flow, extack);
+ err = mlx5e_tc_add_nic_flow(priv, flow, extack);
if (err)
goto err_free;
struct mlx5_core_dev *dev = priv->mdev;
struct mapping_ctx *chains_mapping;
struct mlx5_chains_attr attr = {};
+ u64 mapping_id;
int err;
mlx5e_mod_hdr_tbl_init(&tc->mod_hdr);
lockdep_set_class(&tc->ht.mutex, &tc_ht_lock_key);
- chains_mapping = mapping_create(sizeof(struct mlx5_mapped_obj),
- MLX5E_TC_TABLE_CHAIN_TAG_MASK, true);
+ mapping_id = mlx5_query_nic_system_image_guid(dev);
+
+ chains_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_CHAIN,
+ sizeof(struct mlx5_mapped_obj),
+ MLX5E_TC_TABLE_CHAIN_TAG_MASK, true);
+
if (IS_ERR(chains_mapping)) {
err = PTR_ERR(chains_mapping);
goto err_mapping;
struct mapping_ctx *mapping;
struct mlx5_eswitch *esw;
struct mlx5e_priv *priv;
+ u64 mapping_id;
int err = 0;
uplink_priv = container_of(tc_ht, struct mlx5_rep_uplink_priv, tc_ht);
uplink_priv->esw_psample = mlx5_esw_sample_init(netdev_priv(priv->netdev));
#endif
- mapping = mapping_create(sizeof(struct tunnel_match_key),
- TUNNEL_INFO_BITS_MASK, true);
+ mapping_id = mlx5_query_nic_system_image_guid(esw->dev);
+
+ mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_TUNNEL,
+ sizeof(struct tunnel_match_key),
+ TUNNEL_INFO_BITS_MASK, true);
+
if (IS_ERR(mapping)) {
err = PTR_ERR(mapping);
goto err_tun_mapping;
uplink_priv->tunnel_mapping = mapping;
/* 0xFFF is reserved for stack devices slow path table mark */
- mapping = mapping_create(sz_enc_opts, ENC_OPTS_BITS_MASK - 1, true);
+ mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_TUNNEL_ENC_OPTS,
+ sz_enc_opts, ENC_OPTS_BITS_MASK - 1, true);
if (IS_ERR(mapping)) {
err = PTR_ERR(mapping);
goto err_enc_opts_mapping;
vport->egress.offloads.fwd_rule = NULL;
}
+static void esw_acl_egress_ofld_bounce_rule_destroy(struct mlx5_vport *vport)
+{
+ if (!vport->egress.offloads.bounce_rule)
+ return;
+
+ mlx5_del_flow_rules(vport->egress.offloads.bounce_rule);
+ vport->egress.offloads.bounce_rule = NULL;
+}
+
static int esw_acl_egress_ofld_fwd2vport_create(struct mlx5_eswitch *esw,
struct mlx5_vport *vport,
struct mlx5_flow_destination *fwd_dest)
{
esw_acl_egress_vlan_destroy(vport);
esw_acl_egress_ofld_fwd2vport_destroy(vport);
+ esw_acl_egress_ofld_bounce_rule_destroy(vport);
}
static int esw_acl_egress_ofld_groups_create(struct mlx5_eswitch *esw,
mlx5_destroy_flow_group(vport->egress.offloads.fwd_grp);
vport->egress.offloads.fwd_grp = NULL;
}
+
+ if (!IS_ERR_OR_NULL(vport->egress.offloads.bounce_grp)) {
+ mlx5_destroy_flow_group(vport->egress.offloads.bounce_grp);
+ vport->egress.offloads.bounce_grp = NULL;
+ }
+
esw_acl_egress_vlan_grp_destroy(vport);
}
esw->mode = mode;
- mlx5_lag_update(esw->dev);
-
if (mode == MLX5_ESWITCH_LEGACY) {
err = esw_legacy_enable(esw);
} else {
if (!mlx5_esw_allowed(esw))
return 0;
+ mlx5_lag_disable_change(esw->dev);
down_write(&esw->mode_lock);
if (esw->mode == MLX5_ESWITCH_NONE) {
ret = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_LEGACY, num_vfs);
esw->esw_funcs.num_vfs = num_vfs;
}
up_write(&esw->mode_lock);
+ mlx5_lag_enable_change(esw->dev);
return ret;
}
old_mode = esw->mode;
esw->mode = MLX5_ESWITCH_NONE;
- mlx5_lag_update(esw->dev);
-
if (old_mode == MLX5_ESWITCH_OFFLOADS)
mlx5_rescan_drivers(esw->dev);
if (!mlx5_esw_allowed(esw))
return;
+ mlx5_lag_disable_change(esw->dev);
down_write(&esw->mode_lock);
mlx5_eswitch_disable_locked(esw, clear_vf);
esw->esw_funcs.num_vfs = 0;
up_write(&esw->mode_lock);
+ mlx5_lag_enable_change(esw->dev);
}
static int mlx5_query_hca_cap_host_pf(struct mlx5_core_dev *dev, void *out)
ida_init(&esw->offloads.vport_metadata_ida);
xa_init_flags(&esw->offloads.vhca_map, XA_FLAGS_ALLOC);
mutex_init(&esw->state_lock);
+ lockdep_register_key(&esw->mode_lock_key);
init_rwsem(&esw->mode_lock);
+ lockdep_set_class(&esw->mode_lock, &esw->mode_lock_key);
esw->enabled_vports = 0;
esw->mode = MLX5_ESWITCH_NONE;
esw->dev->priv.eswitch = NULL;
destroy_workqueue(esw->work_queue);
+ lockdep_unregister_key(&esw->mode_lock_key);
mutex_destroy(&esw->state_lock);
WARN_ON(!xa_empty(&esw->offloads.vhca_map));
xa_destroy(&esw->offloads.vhca_map);
mlx5_esw_is_sf_vport(esw, vport_num);
}
-int mlx5_devlink_port_function_hw_addr_get(struct devlink *devlink,
- struct devlink_port *port,
+int mlx5_devlink_port_function_hw_addr_get(struct devlink_port *port,
u8 *hw_addr, int *hw_addr_len,
struct netlink_ext_ack *extack)
{
int err = -EOPNOTSUPP;
u16 vport_num;
- esw = mlx5_devlink_eswitch_get(devlink);
+ esw = mlx5_devlink_eswitch_get(port->devlink);
if (IS_ERR(esw))
return PTR_ERR(esw);
return err;
}
-int mlx5_devlink_port_function_hw_addr_set(struct devlink *devlink,
- struct devlink_port *port,
+int mlx5_devlink_port_function_hw_addr_set(struct devlink_port *port,
const u8 *hw_addr, int hw_addr_len,
struct netlink_ext_ack *extack)
{
int err = -EOPNOTSUPP;
u16 vport_num;
- esw = mlx5_devlink_eswitch_get(devlink);
+ esw = mlx5_devlink_eswitch_get(port->devlink);
if (IS_ERR(esw)) {
NL_SET_ERR_MSG_MOD(extack, "Eswitch doesn't support set hw_addr");
return PTR_ERR(esw);
*/
void mlx5_esw_unlock(struct mlx5_eswitch *esw)
{
+ if (!mlx5_esw_allowed(esw))
+ return;
up_write(&esw->mode_lock);
}
+/**
+ * mlx5_esw_lock() - Take write lock on esw mode lock
+ * @esw: eswitch device.
+ */
+void mlx5_esw_lock(struct mlx5_eswitch *esw)
+{
+ if (!mlx5_esw_allowed(esw))
+ return;
+ down_write(&esw->mode_lock);
+}
+
/**
* mlx5_eswitch_get_total_vports - Get total vports of the eswitch
*
return mlx5_esw_allowed(esw) ? esw->total_vports : 0;
}
EXPORT_SYMBOL_GPL(mlx5_eswitch_get_total_vports);
+
+/**
+ * mlx5_eswitch_get_core_dev - Get the mdev device
+ * @esw : eswitch device.
+ *
+ * Return the mellanox core device which manages the eswitch.
+ */
+struct mlx5_core_dev *mlx5_eswitch_get_core_dev(struct mlx5_eswitch *esw)
+{
+ return mlx5_esw_allowed(esw) ? esw->dev : NULL;
+}
+EXPORT_SYMBOL(mlx5_eswitch_get_core_dev);
#define esw_chains(esw) \
((esw)->fdb_table.offloads.esw_chains_priv)
+enum {
+ MAPPING_TYPE_CHAIN,
+ MAPPING_TYPE_TUNNEL,
+ MAPPING_TYPE_TUNNEL_ENC_OPTS,
+ MAPPING_TYPE_LABELS,
+ MAPPING_TYPE_ZONE,
+};
+
struct vport_ingress {
struct mlx5_flow_table *acl;
struct mlx5_flow_handle *allow_rule;
struct {
struct mlx5_flow_group *fwd_grp;
struct mlx5_flow_handle *fwd_rule;
+ struct mlx5_flow_handle *bounce_rule;
+ struct mlx5_flow_group *bounce_grp;
} offloads;
};
};
u32 large_group_num;
} params;
struct blocking_notifier_head n_head;
+ struct lock_class_key mode_lock_key;
};
void esw_offloads_disable(struct mlx5_eswitch *esw);
struct netlink_ext_ack *extack);
int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink,
enum devlink_eswitch_encap_mode *encap);
-int mlx5_devlink_port_function_hw_addr_get(struct devlink *devlink,
- struct devlink_port *port,
+int mlx5_devlink_port_function_hw_addr_get(struct devlink_port *port,
u8 *hw_addr, int *hw_addr_len,
struct netlink_ext_ack *extack);
-int mlx5_devlink_port_function_hw_addr_set(struct devlink *devlink,
- struct devlink_port *port,
+int mlx5_devlink_port_function_hw_addr_set(struct devlink_port *port,
const u8 *hw_addr, int hw_addr_len,
struct netlink_ext_ack *extack);
};
struct mlx5_vport_tbl_attr {
- u16 chain;
+ u32 chain;
u16 prio;
u16 vport;
const struct esw_vport_tbl_namespace *vport_ns;
void mlx5_esw_put(struct mlx5_core_dev *dev);
int mlx5_esw_try_lock(struct mlx5_eswitch *esw);
void mlx5_esw_unlock(struct mlx5_eswitch *esw);
+void mlx5_esw_lock(struct mlx5_eswitch *esw);
void esw_vport_change_handle_locked(struct mlx5_vport *vport);
bool mlx5_esw_offloads_controller_valid(const struct mlx5_eswitch *esw, u32 controller);
+int mlx5_eswitch_offloads_config_single_fdb(struct mlx5_eswitch *master_esw,
+ struct mlx5_eswitch *slave_esw);
+void mlx5_eswitch_offloads_destroy_single_fdb(struct mlx5_eswitch *master_esw,
+ struct mlx5_eswitch *slave_esw);
+int mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw);
+
#else /* CONFIG_MLX5_ESWITCH */
/* eswitch API stubs */
static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; }
return ERR_PTR(-EOPNOTSUPP);
}
+static inline void mlx5_esw_unlock(struct mlx5_eswitch *esw) { return; }
+static inline void mlx5_esw_lock(struct mlx5_eswitch *esw) { return; }
+
static inline struct mlx5_flow_handle *
esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag)
{
{
return vport_num;
}
+
+static inline int
+mlx5_eswitch_offloads_config_single_fdb(struct mlx5_eswitch *master_esw,
+ struct mlx5_eswitch *slave_esw)
+{
+ return 0;
+}
+
+static inline void
+mlx5_eswitch_offloads_destroy_single_fdb(struct mlx5_eswitch *master_esw,
+ struct mlx5_eswitch *slave_esw) {}
+
+static inline int
+mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw)
+{
+ return 0;
+}
#endif /* CONFIG_MLX5_ESWITCH */
#endif /* __MLX5_ESWITCH_H__ */
{
dest[dest_idx].type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
dest[dest_idx].vport.num = esw_attr->dests[attr_idx].rep->vport;
- dest[dest_idx].vport.vhca_id =
- MLX5_CAP_GEN(esw_attr->dests[attr_idx].mdev, vhca_id);
- if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
+ if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) {
+ dest[dest_idx].vport.vhca_id =
+ MLX5_CAP_GEN(esw_attr->dests[attr_idx].mdev, vhca_id);
dest[dest_idx].vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
+ }
if (esw_attr->dests[attr_idx].flags & MLX5_ESW_DEST_ENCAP) {
if (pkt_reformat) {
flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
struct mlx5_flow_handle *
mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *on_esw,
+ struct mlx5_eswitch *from_esw,
struct mlx5_eswitch_rep *rep,
u32 sqn)
{
misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
MLX5_SET(fte_match_set_misc, misc, source_sqn, sqn);
/* source vport is the esw manager */
- MLX5_SET(fte_match_set_misc, misc, source_port, rep->esw->manager_vport);
+ MLX5_SET(fte_match_set_misc, misc, source_port, from_esw->manager_vport);
if (MLX5_CAP_ESW(on_esw->dev, merged_eswitch))
MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id,
- MLX5_CAP_GEN(rep->esw->dev, vhca_id));
+ MLX5_CAP_GEN(from_esw->dev, vhca_id));
misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_sqn);
dest.vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ if (rep->vport == MLX5_VPORT_UPLINK)
+ spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT;
+
flow_rule = mlx5_add_flow_rules(on_esw->fdb_table.offloads.slow_fdb,
spec, &flow_act, &dest, 1);
if (IS_ERR(flow_rule))
goto ns_err;
}
- table_size = esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ +
+ /* To be strictly correct:
+ * MLX5_MAX_PORTS * (esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ)
+ * should be:
+ * esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ +
+ * peer_esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ
+ * but as the peer device might not be in switchdev mode it's not
+ * possible. We use the fact that by default FW sets max vfs and max sfs
+ * to the same value on both devices. If it needs to be changed in the future note
+ * the peer miss group should also be created based on the number of
+ * total vports of the peer (currently is also uses esw->total_vports).
+ */
+ table_size = MLX5_MAX_PORTS * (esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ) +
MLX5_ESW_MISS_FLOWS + esw->total_vports + esw->esw_funcs.num_vfs;
/* create the slow path fdb with encap set, so further table instances
source_eswitch_owner_vhca_id_valid, 1);
}
- ix = esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ;
+ /* See comment above table_size calculation */
+ ix = MLX5_MAX_PORTS * (esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ);
MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix - 1);
mlx5_esw_offloads_devlink_port_unregister(esw, vport_num);
}
+static int esw_set_uplink_slave_ingress_root(struct mlx5_core_dev *master,
+ struct mlx5_core_dev *slave)
+{
+ u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)] = {};
+ u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)] = {};
+ struct mlx5_eswitch *esw;
+ struct mlx5_flow_root_namespace *root;
+ struct mlx5_flow_namespace *ns;
+ struct mlx5_vport *vport;
+ int err;
+
+ MLX5_SET(set_flow_table_root_in, in, opcode,
+ MLX5_CMD_OP_SET_FLOW_TABLE_ROOT);
+ MLX5_SET(set_flow_table_root_in, in, table_type, FS_FT_ESW_INGRESS_ACL);
+ MLX5_SET(set_flow_table_root_in, in, other_vport, 1);
+ MLX5_SET(set_flow_table_root_in, in, vport_number, MLX5_VPORT_UPLINK);
+
+ if (master) {
+ esw = master->priv.eswitch;
+ vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK);
+ MLX5_SET(set_flow_table_root_in, in, table_of_other_vport, 1);
+ MLX5_SET(set_flow_table_root_in, in, table_vport_number,
+ MLX5_VPORT_UPLINK);
+
+ ns = mlx5_get_flow_vport_acl_namespace(master,
+ MLX5_FLOW_NAMESPACE_ESW_INGRESS,
+ vport->index);
+ root = find_root(&ns->node);
+ mutex_lock(&root->chain_lock);
+
+ MLX5_SET(set_flow_table_root_in, in,
+ table_eswitch_owner_vhca_id_valid, 1);
+ MLX5_SET(set_flow_table_root_in, in,
+ table_eswitch_owner_vhca_id,
+ MLX5_CAP_GEN(master, vhca_id));
+ MLX5_SET(set_flow_table_root_in, in, table_id,
+ root->root_ft->id);
+ } else {
+ esw = slave->priv.eswitch;
+ vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK);
+ ns = mlx5_get_flow_vport_acl_namespace(slave,
+ MLX5_FLOW_NAMESPACE_ESW_INGRESS,
+ vport->index);
+ root = find_root(&ns->node);
+ mutex_lock(&root->chain_lock);
+ MLX5_SET(set_flow_table_root_in, in, table_id, root->root_ft->id);
+ }
+
+ err = mlx5_cmd_exec(slave, in, sizeof(in), out, sizeof(out));
+ mutex_unlock(&root->chain_lock);
+
+ return err;
+}
+
+static int esw_set_slave_root_fdb(struct mlx5_core_dev *master,
+ struct mlx5_core_dev *slave)
+{
+ u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)] = {};
+ u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)] = {};
+ struct mlx5_flow_root_namespace *root;
+ struct mlx5_flow_namespace *ns;
+ int err;
+
+ MLX5_SET(set_flow_table_root_in, in, opcode,
+ MLX5_CMD_OP_SET_FLOW_TABLE_ROOT);
+ MLX5_SET(set_flow_table_root_in, in, table_type,
+ FS_FT_FDB);
+
+ if (master) {
+ ns = mlx5_get_flow_namespace(master,
+ MLX5_FLOW_NAMESPACE_FDB);
+ root = find_root(&ns->node);
+ mutex_lock(&root->chain_lock);
+ MLX5_SET(set_flow_table_root_in, in,
+ table_eswitch_owner_vhca_id_valid, 1);
+ MLX5_SET(set_flow_table_root_in, in,
+ table_eswitch_owner_vhca_id,
+ MLX5_CAP_GEN(master, vhca_id));
+ MLX5_SET(set_flow_table_root_in, in, table_id,
+ root->root_ft->id);
+ } else {
+ ns = mlx5_get_flow_namespace(slave,
+ MLX5_FLOW_NAMESPACE_FDB);
+ root = find_root(&ns->node);
+ mutex_lock(&root->chain_lock);
+ MLX5_SET(set_flow_table_root_in, in, table_id,
+ root->root_ft->id);
+ }
+
+ err = mlx5_cmd_exec(slave, in, sizeof(in), out, sizeof(out));
+ mutex_unlock(&root->chain_lock);
+
+ return err;
+}
+
+static int __esw_set_master_egress_rule(struct mlx5_core_dev *master,
+ struct mlx5_core_dev *slave,
+ struct mlx5_vport *vport,
+ struct mlx5_flow_table *acl)
+{
+ struct mlx5_flow_handle *flow_rule = NULL;
+ struct mlx5_flow_destination dest = {};
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_spec *spec;
+ int err = 0;
+ void *misc;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return -ENOMEM;
+
+ spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters);
+ MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_UPLINK);
+ MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id,
+ MLX5_CAP_GEN(slave, vhca_id));
+
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc,
+ source_eswitch_owner_vhca_id);
+
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+ dest.vport.num = slave->priv.eswitch->manager_vport;
+ dest.vport.vhca_id = MLX5_CAP_GEN(slave, vhca_id);
+ dest.vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
+
+ flow_rule = mlx5_add_flow_rules(acl, spec, &flow_act,
+ &dest, 1);
+ if (IS_ERR(flow_rule))
+ err = PTR_ERR(flow_rule);
+ else
+ vport->egress.offloads.bounce_rule = flow_rule;
+
+ kvfree(spec);
+ return err;
+}
+
+static int esw_set_master_egress_rule(struct mlx5_core_dev *master,
+ struct mlx5_core_dev *slave)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_eswitch *esw = master->priv.eswitch;
+ struct mlx5_flow_table_attr ft_attr = {
+ .max_fte = 1, .prio = 0, .level = 0,
+ };
+ struct mlx5_flow_namespace *egress_ns;
+ struct mlx5_flow_table *acl;
+ struct mlx5_flow_group *g;
+ struct mlx5_vport *vport;
+ void *match_criteria;
+ u32 *flow_group_in;
+ int err;
+
+ vport = mlx5_eswitch_get_vport(esw, esw->manager_vport);
+ if (IS_ERR(vport))
+ return PTR_ERR(vport);
+
+ egress_ns = mlx5_get_flow_vport_acl_namespace(master,
+ MLX5_FLOW_NAMESPACE_ESW_EGRESS,
+ vport->index);
+ if (!egress_ns)
+ return -EINVAL;
+
+ if (vport->egress.acl)
+ return -EINVAL;
+
+ flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!flow_group_in)
+ return -ENOMEM;
+
+ acl = mlx5_create_vport_flow_table(egress_ns, &ft_attr, vport->vport);
+ if (IS_ERR(acl)) {
+ err = PTR_ERR(acl);
+ goto out;
+ }
+
+ match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
+ match_criteria);
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria,
+ misc_parameters.source_port);
+ MLX5_SET_TO_ONES(fte_match_param, match_criteria,
+ misc_parameters.source_eswitch_owner_vhca_id);
+ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+ MLX5_MATCH_MISC_PARAMETERS);
+
+ MLX5_SET(create_flow_group_in, flow_group_in,
+ source_eswitch_owner_vhca_id_valid, 1);
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0);
+
+ g = mlx5_create_flow_group(acl, flow_group_in);
+ if (IS_ERR(g)) {
+ err = PTR_ERR(g);
+ goto err_group;
+ }
+
+ err = __esw_set_master_egress_rule(master, slave, vport, acl);
+ if (err)
+ goto err_rule;
+
+ vport->egress.acl = acl;
+ vport->egress.offloads.bounce_grp = g;
+
+ kvfree(flow_group_in);
+
+ return 0;
+
+err_rule:
+ mlx5_destroy_flow_group(g);
+err_group:
+ mlx5_destroy_flow_table(acl);
+out:
+ kvfree(flow_group_in);
+ return err;
+}
+
+static void esw_unset_master_egress_rule(struct mlx5_core_dev *dev)
+{
+ struct mlx5_vport *vport;
+
+ vport = mlx5_eswitch_get_vport(dev->priv.eswitch,
+ dev->priv.eswitch->manager_vport);
+
+ esw_acl_egress_ofld_cleanup(vport);
+}
+
+int mlx5_eswitch_offloads_config_single_fdb(struct mlx5_eswitch *master_esw,
+ struct mlx5_eswitch *slave_esw)
+{
+ int err;
+
+ err = esw_set_uplink_slave_ingress_root(master_esw->dev,
+ slave_esw->dev);
+ if (err)
+ return -EINVAL;
+
+ err = esw_set_slave_root_fdb(master_esw->dev,
+ slave_esw->dev);
+ if (err)
+ goto err_fdb;
+
+ err = esw_set_master_egress_rule(master_esw->dev,
+ slave_esw->dev);
+ if (err)
+ goto err_acl;
+
+ return err;
+
+err_acl:
+ esw_set_slave_root_fdb(NULL, slave_esw->dev);
+
+err_fdb:
+ esw_set_uplink_slave_ingress_root(NULL, slave_esw->dev);
+
+ return err;
+}
+
+void mlx5_eswitch_offloads_destroy_single_fdb(struct mlx5_eswitch *master_esw,
+ struct mlx5_eswitch *slave_esw)
+{
+ esw_unset_master_egress_rule(master_esw->dev);
+ esw_set_slave_root_fdb(NULL, slave_esw->dev);
+ esw_set_uplink_slave_ingress_root(NULL, slave_esw->dev);
+}
+
#define ESW_OFFLOADS_DEVCOM_PAIR (0)
#define ESW_OFFLOADS_DEVCOM_UNPAIR (1)
-static int mlx5_esw_offloads_pair(struct mlx5_eswitch *esw,
- struct mlx5_eswitch *peer_esw)
+static void mlx5_esw_offloads_rep_event_unpair(struct mlx5_eswitch *esw)
{
+ const struct mlx5_eswitch_rep_ops *ops;
+ struct mlx5_eswitch_rep *rep;
+ unsigned long i;
+ u8 rep_type;
- return esw_add_fdb_peer_miss_rules(esw, peer_esw->dev);
+ mlx5_esw_for_each_rep(esw, i, rep) {
+ rep_type = NUM_REP_TYPES;
+ while (rep_type--) {
+ ops = esw->offloads.rep_ops[rep_type];
+ if (atomic_read(&rep->rep_data[rep_type].state) == REP_LOADED &&
+ ops->event)
+ ops->event(esw, rep, MLX5_SWITCHDEV_EVENT_UNPAIR, NULL);
+ }
+ }
}
static void mlx5_esw_offloads_unpair(struct mlx5_eswitch *esw)
#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
mlx5e_tc_clean_fdb_peer_flows(esw);
#endif
+ mlx5_esw_offloads_rep_event_unpair(esw);
esw_del_fdb_peer_miss_rules(esw);
}
+static int mlx5_esw_offloads_pair(struct mlx5_eswitch *esw,
+ struct mlx5_eswitch *peer_esw)
+{
+ const struct mlx5_eswitch_rep_ops *ops;
+ struct mlx5_eswitch_rep *rep;
+ unsigned long i;
+ u8 rep_type;
+ int err;
+
+ err = esw_add_fdb_peer_miss_rules(esw, peer_esw->dev);
+ if (err)
+ return err;
+
+ mlx5_esw_for_each_rep(esw, i, rep) {
+ for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) {
+ ops = esw->offloads.rep_ops[rep_type];
+ if (atomic_read(&rep->rep_data[rep_type].state) == REP_LOADED &&
+ ops->event) {
+ err = ops->event(esw, rep, MLX5_SWITCHDEV_EVENT_PAIR, peer_esw);
+ if (err)
+ goto err_out;
+ }
+ }
+ }
+
+ return 0;
+
+err_out:
+ mlx5_esw_offloads_unpair(esw);
+ return err;
+}
+
static int mlx5_esw_offloads_set_ns_peer(struct mlx5_eswitch *esw,
struct mlx5_eswitch *peer_esw,
bool pair)
switch (event) {
case ESW_OFFLOADS_DEVCOM_PAIR:
+ if (mlx5_get_next_phys_dev(esw->dev) != peer_esw->dev)
+ break;
+
if (mlx5_eswitch_vport_match_metadata_enabled(esw) !=
mlx5_eswitch_vport_match_metadata_enabled(peer_esw))
break;
esw_vport_destroy_offloads_acl_tables(esw, vport);
}
+int mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw)
+{
+ struct mlx5_eswitch_rep *rep;
+ unsigned long i;
+ int ret;
+
+ if (!esw || esw->mode != MLX5_ESWITCH_OFFLOADS)
+ return 0;
+
+ rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK);
+ if (atomic_read(&rep->rep_data[REP_ETH].state) != REP_LOADED)
+ return 0;
+
+ ret = mlx5_esw_offloads_rep_load(esw, MLX5_VPORT_UPLINK);
+ if (ret)
+ return ret;
+
+ mlx5_esw_for_each_rep(esw, i, rep) {
+ if (atomic_read(&rep->rep_data[REP_ETH].state) == REP_LOADED)
+ mlx5_esw_offloads_rep_load(esw, rep->vport);
+ }
+
+ return 0;
+}
+
static int esw_offloads_steering_init(struct mlx5_eswitch *esw)
{
struct mlx5_esw_indir_table *indir;
struct mapping_ctx *reg_c0_obj_pool;
struct mlx5_vport *vport;
unsigned long i;
+ u64 mapping_id;
int err;
if (MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, reformat) &&
if (err)
goto err_vport_metadata;
- reg_c0_obj_pool = mapping_create(sizeof(struct mlx5_mapped_obj),
- ESW_REG_C0_USER_DATA_METADATA_MASK,
- true);
+ mapping_id = mlx5_query_nic_system_image_guid(esw->dev);
+
+ reg_c0_obj_pool = mapping_create_for_id(mapping_id, MAPPING_TYPE_CHAIN,
+ sizeof(struct mlx5_mapped_obj),
+ ESW_REG_C0_USER_DATA_METADATA_MASK,
+ true);
+
if (IS_ERR(reg_c0_obj_pool)) {
err = PTR_ERR(reg_c0_obj_pool);
goto err_pool;
if (esw_mode_from_devlink(mode, &mlx5_mode))
return -EINVAL;
+ mlx5_lag_disable_change(esw->dev);
err = mlx5_esw_try_lock(esw);
if (err < 0) {
NL_SET_ERR_MSG_MOD(extack, "Can't change mode, E-Switch is busy");
- return err;
+ goto enable_lag;
}
cur_mlx5_mode = err;
err = 0;
unlock:
mlx5_esw_unlock(esw);
+enable_lag:
+ mlx5_lag_enable_change(esw->dev);
return err;
}
switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) {
case MLX5_CAP_INLINE_MODE_NOT_REQUIRED:
- if (mode == DEVLINK_ESWITCH_INLINE_MODE_NONE)
+ if (mode == DEVLINK_ESWITCH_INLINE_MODE_NONE) {
+ err = 0;
goto out;
+ }
+
fallthrough;
case MLX5_CAP_INLINE_MODE_L2:
NL_SET_ERR_MSG_MOD(extack, "Inline mode can't be set");
cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
MLX5_SET(cqc, cqc, log_cq_size, ilog2(cq_size));
- MLX5_SET(cqc, cqc, c_eqn, eqn);
+ MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
MLX5_SET(cqc, cqc, uar_page, fdev->conn_res.uar->index);
MLX5_SET(cqc, cqc, log_page_size, conn->cq.wq_ctrl.buf.page_shift -
MLX5_ADAPTER_PAGE_SHIFT);
return 0;
}
+static int mlx5_cmd_set_slave_root_fdb(struct mlx5_core_dev *master,
+ struct mlx5_core_dev *slave,
+ bool ft_id_valid,
+ u32 ft_id)
+{
+ u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)] = {};
+ struct mlx5_flow_root_namespace *root;
+ struct mlx5_flow_namespace *ns;
+
+ MLX5_SET(set_flow_table_root_in, in, opcode,
+ MLX5_CMD_OP_SET_FLOW_TABLE_ROOT);
+ MLX5_SET(set_flow_table_root_in, in, table_type,
+ FS_FT_FDB);
+ if (ft_id_valid) {
+ MLX5_SET(set_flow_table_root_in, in,
+ table_eswitch_owner_vhca_id_valid, 1);
+ MLX5_SET(set_flow_table_root_in, in,
+ table_eswitch_owner_vhca_id,
+ MLX5_CAP_GEN(master, vhca_id));
+ MLX5_SET(set_flow_table_root_in, in, table_id,
+ ft_id);
+ } else {
+ ns = mlx5_get_flow_namespace(slave,
+ MLX5_FLOW_NAMESPACE_FDB);
+ root = find_root(&ns->node);
+ MLX5_SET(set_flow_table_root_in, in, table_id,
+ root->root_ft->id);
+ }
+
+ return mlx5_cmd_exec(slave, in, sizeof(in), out, sizeof(out));
+}
+
static int mlx5_cmd_update_root_ft(struct mlx5_flow_root_namespace *ns,
struct mlx5_flow_table *ft, u32 underlay_qpn,
bool disconnect)
{
u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)] = {};
struct mlx5_core_dev *dev = ns->dev;
+ int err;
if ((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) &&
underlay_qpn == 0)
return 0;
+ if (ft->type == FS_FT_FDB &&
+ mlx5_lag_is_shared_fdb(dev) &&
+ !mlx5_lag_is_master(dev))
+ return 0;
+
MLX5_SET(set_flow_table_root_in, in, opcode,
MLX5_CMD_OP_SET_FLOW_TABLE_ROOT);
MLX5_SET(set_flow_table_root_in, in, table_type, ft->type);
MLX5_SET(set_flow_table_root_in, in, other_vport,
!!(ft->flags & MLX5_FLOW_TABLE_OTHER_VPORT));
- return mlx5_cmd_exec_in(dev, set_flow_table_root, in);
+ err = mlx5_cmd_exec_in(dev, set_flow_table_root, in);
+ if (!err &&
+ ft->type == FS_FT_FDB &&
+ mlx5_lag_is_shared_fdb(dev) &&
+ mlx5_lag_is_master(dev)) {
+ err = mlx5_cmd_set_slave_root_fdb(dev,
+ mlx5_lag_get_peer_mdev(dev),
+ !disconnect, (!disconnect) ?
+ ft->id : 0);
+ if (err && !disconnect) {
+ MLX5_SET(set_flow_table_root_in, in, op_mod, 0);
+ MLX5_SET(set_flow_table_root_in, in, table_id,
+ ns->root_ft->id);
+ mlx5_cmd_exec_in(dev, set_flow_table_root, in);
+ }
+ }
+
+ return err;
}
static int mlx5_cmd_create_flow_table(struct mlx5_flow_root_namespace *ns,
return true;
}
-static struct mlx5_flow_root_namespace *find_root(struct fs_node *node)
+struct mlx5_flow_root_namespace *find_root(struct fs_node *node)
{
struct fs_node *root;
struct mlx5_flow_namespace *ns;
static int connect_flow_table(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft,
struct fs_prio *prio)
{
- struct mlx5_flow_table *next_ft;
+ struct mlx5_flow_table *next_ft, *first_ft;
int err = 0;
/* Connect_prev_fts and update_root_ft_create are mutually exclusive */
- if (list_empty(&prio->node.children)) {
+ first_ft = list_first_entry_or_null(&prio->node.children,
+ struct mlx5_flow_table, node.list);
+ if (!first_ft || first_ft->level > ft->level) {
err = connect_prev_fts(dev, ft, prio);
if (err)
return err;
- next_ft = find_next_chained_ft(prio);
+ next_ft = first_ft ? first_ft : find_next_chained_ft(prio);
err = connect_fwd_rules(dev, ft, next_ft);
if (err)
return err;
node.list) == ft))
return 0;
- next_ft = find_next_chained_ft(prio);
+ next_ft = find_next_ft(ft);
err = connect_fwd_rules(dev, next_ft, ft);
if (err)
return err;
int mlx5_fs_ingress_acls_init(struct mlx5_core_dev *dev, int total_vports);
void mlx5_fs_ingress_acls_cleanup(struct mlx5_core_dev *dev);
+struct mlx5_flow_root_namespace *find_root(struct fs_node *node);
+
#define fs_get_obj(v, _node) {v = container_of((_node), typeof(*v), node); }
#define fs_list_for_each_entry(pos, root) \
}
fw_reporter_ctx.err_synd = health->synd;
fw_reporter_ctx.miss_counter = health->miss_counter;
- devlink_health_report(health->fw_fatal_reporter,
- "FW fatal error reported", &fw_reporter_ctx);
+ if (devlink_health_report(health->fw_fatal_reporter,
+ "FW fatal error reported", &fw_reporter_ctx) == -ECANCELED) {
+ /* If recovery wasn't performed, due to grace period,
+ * unload the driver. This ensures that the driver
+ * closes all its resources and it is not subjected to
+ * requests from the kernel.
+ */
+ mlx5_core_err(dev, "Driver is in error state. Unloading\n");
+ mlx5_unload_one(dev);
+ }
}
static const struct devlink_health_reporter_ops mlx5_fw_fatal_reporter_ops = {
static int mlx5i_create_flow_steering(struct mlx5e_priv *priv)
{
- struct ttc_params ttc_params = {};
- int tt, err;
+ int err;
priv->fs.ns = mlx5_get_flow_namespace(priv->mdev,
MLX5_FLOW_NAMESPACE_KERNEL);
priv->netdev->hw_features &= ~NETIF_F_NTUPLE;
}
- mlx5e_set_ttc_basic_params(priv, &ttc_params);
- mlx5e_set_ttc_ft_params(&ttc_params);
- for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
- ttc_params.indir_tirn[tt] = priv->rx_res->rss[tt].indir_tir.tirn;
-
- err = mlx5e_create_ttc_table(priv, &ttc_params, &priv->fs.ttc);
+ err = mlx5e_create_ttc_table(priv);
if (err) {
netdev_err(priv->netdev, "Failed to create ttc table, err=%d\n",
err);
static void mlx5i_destroy_flow_steering(struct mlx5e_priv *priv)
{
- mlx5e_destroy_ttc_table(priv, &priv->fs.ttc);
+ mlx5e_destroy_ttc_table(priv);
mlx5e_arfs_destroy_tables(priv);
}
static int mlx5i_init_rx(struct mlx5e_priv *priv)
{
struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5e_lro_param lro_param;
int err;
- priv->rx_res = kvzalloc(sizeof(*priv->rx_res), GFP_KERNEL);
+ priv->rx_res = mlx5e_rx_res_alloc();
if (!priv->rx_res)
return -ENOMEM;
- mlx5e_build_rss_params(&priv->rx_res->rss_params, priv->channels.params.num_channels);
-
mlx5e_create_q_counters(priv);
err = mlx5e_open_drop_rq(priv, &priv->drop_rq);
goto err_destroy_q_counters;
}
- err = mlx5e_create_indirect_rqt(priv);
+ lro_param = mlx5e_get_lro_param(&priv->channels.params);
+ err = mlx5e_rx_res_init(priv->rx_res, priv->mdev, 0,
+ priv->max_nch, priv->drop_rq.rqn, &lro_param,
+ priv->channels.params.num_channels);
if (err)
goto err_close_drop_rq;
- err = mlx5e_create_direct_rqts(priv);
- if (err)
- goto err_destroy_indirect_rqts;
-
- err = mlx5e_create_indirect_tirs(priv, false);
- if (err)
- goto err_destroy_direct_rqts;
-
- err = mlx5e_create_direct_tirs(priv);
- if (err)
- goto err_destroy_indirect_tirs;
-
err = mlx5i_create_flow_steering(priv);
if (err)
- goto err_destroy_direct_tirs;
+ goto err_destroy_rx_res;
return 0;
-err_destroy_direct_tirs:
- mlx5e_destroy_direct_tirs(priv);
-err_destroy_indirect_tirs:
- mlx5e_destroy_indirect_tirs(priv);
-err_destroy_direct_rqts:
- mlx5e_destroy_direct_rqts(priv);
-err_destroy_indirect_rqts:
- mlx5e_rqt_destroy(&priv->rx_res->indir_rqt);
+err_destroy_rx_res:
+ mlx5e_rx_res_destroy(priv->rx_res);
err_close_drop_rq:
mlx5e_close_drop_rq(&priv->drop_rq);
err_destroy_q_counters:
mlx5e_destroy_q_counters(priv);
- kvfree(priv->rx_res);
+ mlx5e_rx_res_free(priv->rx_res);
priv->rx_res = NULL;
return err;
}
static void mlx5i_cleanup_rx(struct mlx5e_priv *priv)
{
mlx5i_destroy_flow_steering(priv);
- mlx5e_destroy_direct_tirs(priv);
- mlx5e_destroy_indirect_tirs(priv);
- mlx5e_destroy_direct_rqts(priv);
- mlx5e_rqt_destroy(&priv->rx_res->indir_rqt);
+ mlx5e_rx_res_destroy(priv->rx_res);
mlx5e_close_drop_rq(&priv->drop_rq);
mlx5e_destroy_q_counters(priv);
- kvfree(priv->rx_res);
+ mlx5e_rx_res_free(priv->rx_res);
priv->rx_res = NULL;
}
#include <linux/netdevice.h>
#include <linux/mlx5/driver.h>
+#include <linux/mlx5/eswitch.h>
#include <linux/mlx5/vport.h>
+#include "lib/devcom.h"
#include "mlx5_core.h"
#include "eswitch.h"
#include "lag.h"
static DEFINE_SPINLOCK(lag_lock);
static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 remap_port1,
- u8 remap_port2)
+ u8 remap_port2, bool shared_fdb)
{
u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {};
void *lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx);
MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1);
MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2);
+ MLX5_SET(lagc, lag_ctx, fdb_selection_mode, shared_fdb);
return mlx5_cmd_exec_in(dev, create_lag, in);
}
}
static int mlx5_create_lag(struct mlx5_lag *ldev,
- struct lag_tracker *tracker)
+ struct lag_tracker *tracker,
+ bool shared_fdb)
{
struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
+ struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
+ u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
int err;
mlx5_infer_tx_affinity_mapping(tracker, &ldev->v2p_map[MLX5_LAG_P1],
&ldev->v2p_map[MLX5_LAG_P2]);
- mlx5_core_info(dev0, "lag map port 1:%d port 2:%d",
- ldev->v2p_map[MLX5_LAG_P1], ldev->v2p_map[MLX5_LAG_P2]);
+ mlx5_core_info(dev0, "lag map port 1:%d port 2:%d shared_fdb:%d",
+ ldev->v2p_map[MLX5_LAG_P1], ldev->v2p_map[MLX5_LAG_P2],
+ shared_fdb);
err = mlx5_cmd_create_lag(dev0, ldev->v2p_map[MLX5_LAG_P1],
- ldev->v2p_map[MLX5_LAG_P2]);
- if (err)
+ ldev->v2p_map[MLX5_LAG_P2], shared_fdb);
+ if (err) {
mlx5_core_err(dev0,
"Failed to create LAG (%d)\n",
err);
+ return err;
+ }
+
+ if (shared_fdb) {
+ err = mlx5_eswitch_offloads_config_single_fdb(dev0->priv.eswitch,
+ dev1->priv.eswitch);
+ if (err)
+ mlx5_core_err(dev0, "Can't enable single FDB mode\n");
+ else
+ mlx5_core_info(dev0, "Operation mode is single FDB\n");
+ }
+
+ if (err) {
+ MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
+ if (mlx5_cmd_exec_in(dev0, destroy_lag, in))
+ mlx5_core_err(dev0,
+ "Failed to deactivate RoCE LAG; driver restart required\n");
+ }
+
return err;
}
int mlx5_activate_lag(struct mlx5_lag *ldev,
struct lag_tracker *tracker,
- u8 flags)
+ u8 flags,
+ bool shared_fdb)
{
bool roce_lag = !!(flags & MLX5_LAG_FLAG_ROCE);
struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
int err;
- err = mlx5_create_lag(ldev, tracker);
+ err = mlx5_create_lag(ldev, tracker, shared_fdb);
if (err) {
if (roce_lag) {
mlx5_core_err(dev0,
}
ldev->flags |= flags;
+ ldev->shared_fdb = shared_fdb;
return 0;
}
ldev->flags &= ~MLX5_LAG_MODE_FLAGS;
+ if (ldev->shared_fdb) {
+ mlx5_eswitch_offloads_destroy_single_fdb(ldev->pf[MLX5_LAG_P1].dev->priv.eswitch,
+ ldev->pf[MLX5_LAG_P2].dev->priv.eswitch);
+ ldev->shared_fdb = false;
+ }
+
MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
err = mlx5_cmd_exec_in(dev0, destroy_lag, in);
if (err) {
if (!ldev->pf[i].dev)
continue;
+ if (ldev->pf[i].dev->priv.flags &
+ MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
+ continue;
+
ldev->pf[i].dev->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
mlx5_rescan_drivers_locked(ldev->pf[i].dev);
}
{
struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
+ bool shared_fdb = ldev->shared_fdb;
bool roce_lag;
int err;
roce_lag = __mlx5_lag_is_roce(ldev);
- if (roce_lag) {
+ if (shared_fdb) {
+ mlx5_lag_remove_devices(ldev);
+ } else if (roce_lag) {
if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) {
dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
mlx5_rescan_drivers_locked(dev0);
if (err)
return;
- if (roce_lag)
+ if (shared_fdb || roce_lag)
mlx5_lag_add_devices(ldev);
+
+ if (shared_fdb) {
+ if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV))
+ mlx5_eswitch_reload_reps(dev0->priv.eswitch);
+ if (!(dev1->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV))
+ mlx5_eswitch_reload_reps(dev1->priv.eswitch);
+ }
+}
+
+static bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev)
+{
+ struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
+ struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
+
+ if (is_mdev_switchdev_mode(dev0) &&
+ is_mdev_switchdev_mode(dev1) &&
+ mlx5_eswitch_vport_match_metadata_enabled(dev0->priv.eswitch) &&
+ mlx5_eswitch_vport_match_metadata_enabled(dev1->priv.eswitch) &&
+ mlx5_devcom_is_paired(dev0->priv.devcom,
+ MLX5_DEVCOM_ESW_OFFLOADS) &&
+ MLX5_CAP_GEN(dev1, lag_native_fdb_selection) &&
+ MLX5_CAP_ESW(dev1, root_ft_on_other_esw) &&
+ MLX5_CAP_ESW(dev0, esw_shared_ingress_acl))
+ return true;
+
+ return false;
}
static void mlx5_do_bond(struct mlx5_lag *ldev)
bool do_bond, roce_lag;
int err;
- if (!mlx5_lag_is_ready(ldev))
- return;
-
- tracker = ldev->tracker;
+ if (!mlx5_lag_is_ready(ldev)) {
+ do_bond = false;
+ } else {
+ tracker = ldev->tracker;
- do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev);
+ do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev);
+ }
if (do_bond && !__mlx5_lag_is_active(ldev)) {
+ bool shared_fdb = mlx5_shared_fdb_supported(ldev);
+
roce_lag = !mlx5_sriov_is_enabled(dev0) &&
!mlx5_sriov_is_enabled(dev1);
dev1->priv.eswitch->mode == MLX5_ESWITCH_NONE;
#endif
- if (roce_lag)
+ if (shared_fdb || roce_lag)
mlx5_lag_remove_devices(ldev);
err = mlx5_activate_lag(ldev, &tracker,
roce_lag ? MLX5_LAG_FLAG_ROCE :
- MLX5_LAG_FLAG_SRIOV);
+ MLX5_LAG_FLAG_SRIOV,
+ shared_fdb);
if (err) {
- if (roce_lag)
+ if (shared_fdb || roce_lag)
mlx5_lag_add_devices(ldev);
return;
- }
-
- if (roce_lag) {
+ } else if (roce_lag) {
dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
mlx5_rescan_drivers_locked(dev0);
mlx5_nic_vport_enable_roce(dev1);
+ } else if (shared_fdb) {
+ dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
+ mlx5_rescan_drivers_locked(dev0);
+
+ err = mlx5_eswitch_reload_reps(dev0->priv.eswitch);
+ if (!err)
+ err = mlx5_eswitch_reload_reps(dev1->priv.eswitch);
+
+ if (err) {
+ dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
+ mlx5_rescan_drivers_locked(dev0);
+ mlx5_deactivate_lag(ldev);
+ mlx5_lag_add_devices(ldev);
+ mlx5_eswitch_reload_reps(dev0->priv.eswitch);
+ mlx5_eswitch_reload_reps(dev1->priv.eswitch);
+ mlx5_core_err(dev0, "Failed to enable lag\n");
+ return;
+ }
}
} else if (do_bond && __mlx5_lag_is_active(ldev)) {
mlx5_modify_lag(ldev, &tracker);
queue_delayed_work(ldev->wq, &ldev->bond_work, delay);
}
+static void mlx5_lag_lock_eswitches(struct mlx5_core_dev *dev0,
+ struct mlx5_core_dev *dev1)
+{
+ if (dev0)
+ mlx5_esw_lock(dev0->priv.eswitch);
+ if (dev1)
+ mlx5_esw_lock(dev1->priv.eswitch);
+}
+
+static void mlx5_lag_unlock_eswitches(struct mlx5_core_dev *dev0,
+ struct mlx5_core_dev *dev1)
+{
+ if (dev1)
+ mlx5_esw_unlock(dev1->priv.eswitch);
+ if (dev0)
+ mlx5_esw_unlock(dev0->priv.eswitch);
+}
+
static void mlx5_do_bond_work(struct work_struct *work)
{
struct delayed_work *delayed_work = to_delayed_work(work);
struct mlx5_lag *ldev = container_of(delayed_work, struct mlx5_lag,
bond_work);
+ struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
+ struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
int status;
status = mlx5_dev_list_trylock();
if (!status) {
- /* 1 sec delay. */
mlx5_queue_bond_work(ldev, HZ);
return;
}
+ if (ldev->mode_changes_in_progress) {
+ mlx5_dev_list_unlock();
+ mlx5_queue_bond_work(ldev, HZ);
+ return;
+ }
+
+ mlx5_lag_lock_eswitches(dev0, dev1);
mlx5_do_bond(ldev);
+ mlx5_lag_unlock_eswitches(dev0, dev1);
mlx5_dev_list_unlock();
}
}
/* Must be called with intf_mutex held */
-static void __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev)
+static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev)
{
struct mlx5_lag *ldev = NULL;
struct mlx5_core_dev *tmp_dev;
if (!MLX5_CAP_GEN(dev, vport_group_manager) ||
!MLX5_CAP_GEN(dev, lag_master) ||
MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_MAX_PORTS)
- return;
+ return 0;
tmp_dev = mlx5_get_next_phys_dev(dev);
if (tmp_dev)
ldev = mlx5_lag_dev_alloc(dev);
if (!ldev) {
mlx5_core_err(dev, "Failed to alloc lag dev\n");
- return;
+ return 0;
}
} else {
+ if (ldev->mode_changes_in_progress)
+ return -EAGAIN;
mlx5_ldev_get(ldev);
}
mlx5_ldev_add_mdev(ldev, dev);
- return;
+ return 0;
}
void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev)
if (!ldev)
return;
+recheck:
mlx5_dev_list_lock();
+ if (ldev->mode_changes_in_progress) {
+ mlx5_dev_list_unlock();
+ msleep(100);
+ goto recheck;
+ }
mlx5_ldev_remove_mdev(ldev, dev);
mlx5_dev_list_unlock();
mlx5_ldev_put(ldev);
void mlx5_lag_add_mdev(struct mlx5_core_dev *dev)
{
+ int err;
+
+recheck:
mlx5_dev_list_lock();
- __mlx5_lag_dev_add_mdev(dev);
+ err = __mlx5_lag_dev_add_mdev(dev);
+ if (err) {
+ mlx5_dev_list_unlock();
+ msleep(100);
+ goto recheck;
+ }
mlx5_dev_list_unlock();
}
if (!ldev)
return;
- if (__mlx5_lag_is_active(ldev))
- mlx5_disable_lag(ldev);
-
mlx5_ldev_remove_netdev(ldev, netdev);
ldev->flags &= ~MLX5_LAG_FLAG_READY;
+
+ if (__mlx5_lag_is_active(ldev))
+ mlx5_queue_bond_work(ldev, 0);
}
/* Must be called with intf_mutex held */
if (i >= MLX5_MAX_PORTS)
ldev->flags |= MLX5_LAG_FLAG_READY;
+ mlx5_queue_bond_work(ldev, 0);
}
bool mlx5_lag_is_roce(struct mlx5_core_dev *dev)
}
EXPORT_SYMBOL(mlx5_lag_is_active);
+bool mlx5_lag_is_master(struct mlx5_core_dev *dev)
+{
+ struct mlx5_lag *ldev;
+ bool res;
+
+ spin_lock(&lag_lock);
+ ldev = mlx5_lag_dev(dev);
+ res = ldev && __mlx5_lag_is_active(ldev) &&
+ dev == ldev->pf[MLX5_LAG_P1].dev;
+ spin_unlock(&lag_lock);
+
+ return res;
+}
+EXPORT_SYMBOL(mlx5_lag_is_master);
+
bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev)
{
struct mlx5_lag *ldev;
}
EXPORT_SYMBOL(mlx5_lag_is_sriov);
-void mlx5_lag_update(struct mlx5_core_dev *dev)
+bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev)
+{
+ struct mlx5_lag *ldev;
+ bool res;
+
+ spin_lock(&lag_lock);
+ ldev = mlx5_lag_dev(dev);
+ res = ldev && __mlx5_lag_is_sriov(ldev) && ldev->shared_fdb;
+ spin_unlock(&lag_lock);
+
+ return res;
+}
+EXPORT_SYMBOL(mlx5_lag_is_shared_fdb);
+
+void mlx5_lag_disable_change(struct mlx5_core_dev *dev)
{
+ struct mlx5_core_dev *dev0;
+ struct mlx5_core_dev *dev1;
struct mlx5_lag *ldev;
mlx5_dev_list_lock();
+
ldev = mlx5_lag_dev(dev);
- if (!ldev)
- goto unlock;
+ dev0 = ldev->pf[MLX5_LAG_P1].dev;
+ dev1 = ldev->pf[MLX5_LAG_P2].dev;
- mlx5_do_bond(ldev);
+ ldev->mode_changes_in_progress++;
+ if (__mlx5_lag_is_active(ldev)) {
+ mlx5_lag_lock_eswitches(dev0, dev1);
+ mlx5_disable_lag(ldev);
+ mlx5_lag_unlock_eswitches(dev0, dev1);
+ }
+ mlx5_dev_list_unlock();
+}
-unlock:
+void mlx5_lag_enable_change(struct mlx5_core_dev *dev)
+{
+ struct mlx5_lag *ldev;
+
+ mlx5_dev_list_lock();
+ ldev = mlx5_lag_dev(dev);
+ ldev->mode_changes_in_progress--;
mlx5_dev_list_unlock();
+ mlx5_queue_bond_work(ldev, 0);
}
struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev)
}
EXPORT_SYMBOL(mlx5_lag_get_slave_port);
+struct mlx5_core_dev *mlx5_lag_get_peer_mdev(struct mlx5_core_dev *dev)
+{
+ struct mlx5_core_dev *peer_dev = NULL;
+ struct mlx5_lag *ldev;
+
+ spin_lock(&lag_lock);
+ ldev = mlx5_lag_dev(dev);
+ if (!ldev)
+ goto unlock;
+
+ peer_dev = ldev->pf[MLX5_LAG_P1].dev == dev ?
+ ldev->pf[MLX5_LAG_P2].dev :
+ ldev->pf[MLX5_LAG_P1].dev;
+
+unlock:
+ spin_unlock(&lag_lock);
+ return peer_dev;
+}
+EXPORT_SYMBOL(mlx5_lag_get_peer_mdev);
+
int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
u64 *values,
int num_counters,
*/
struct mlx5_lag {
u8 flags;
+ int mode_changes_in_progress;
+ bool shared_fdb;
u8 v2p_map[MLX5_MAX_PORTS];
struct kref ref;
struct lag_func pf[MLX5_MAX_PORTS];
struct lag_tracker *tracker);
int mlx5_activate_lag(struct mlx5_lag *ldev,
struct lag_tracker *tracker,
- u8 flags);
+ u8 flags,
+ bool shared_fdb);
int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
struct net_device *ndev);
struct lag_tracker tracker;
tracker = ldev->tracker;
- mlx5_activate_lag(ldev, &tracker, MLX5_LAG_FLAG_MULTIPATH);
+ mlx5_activate_lag(ldev, &tracker, MLX5_LAG_FLAG_MULTIPATH, false);
}
mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY);
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES.
+
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <linux/mlx5/fs.h>
+#include <linux/mlx5/driver.h>
+#include "mlx5_core.h"
+#include "lib/fs_ttc.h"
+
+#define MLX5_TTC_NUM_GROUPS 3
+#define MLX5_TTC_GROUP1_SIZE (BIT(3) + MLX5_NUM_TUNNEL_TT)
+#define MLX5_TTC_GROUP2_SIZE BIT(1)
+#define MLX5_TTC_GROUP3_SIZE BIT(0)
+#define MLX5_TTC_TABLE_SIZE (MLX5_TTC_GROUP1_SIZE +\
+ MLX5_TTC_GROUP2_SIZE +\
+ MLX5_TTC_GROUP3_SIZE)
+
+#define MLX5_INNER_TTC_NUM_GROUPS 3
+#define MLX5_INNER_TTC_GROUP1_SIZE BIT(3)
+#define MLX5_INNER_TTC_GROUP2_SIZE BIT(1)
+#define MLX5_INNER_TTC_GROUP3_SIZE BIT(0)
+#define MLX5_INNER_TTC_TABLE_SIZE (MLX5_INNER_TTC_GROUP1_SIZE +\
+ MLX5_INNER_TTC_GROUP2_SIZE +\
+ MLX5_INNER_TTC_GROUP3_SIZE)
+
+/* L3/L4 traffic type classifier */
+struct mlx5_ttc_table {
+ int num_groups;
+ struct mlx5_flow_table *t;
+ struct mlx5_flow_group **g;
+ struct mlx5_ttc_rule rules[MLX5_NUM_TT];
+ struct mlx5_flow_handle *tunnel_rules[MLX5_NUM_TUNNEL_TT];
+};
+
+struct mlx5_flow_table *mlx5_get_ttc_flow_table(struct mlx5_ttc_table *ttc)
+{
+ return ttc->t;
+}
+
+static void mlx5_cleanup_ttc_rules(struct mlx5_ttc_table *ttc)
+{
+ int i;
+
+ for (i = 0; i < MLX5_NUM_TT; i++) {
+ if (!IS_ERR_OR_NULL(ttc->rules[i].rule)) {
+ mlx5_del_flow_rules(ttc->rules[i].rule);
+ ttc->rules[i].rule = NULL;
+ }
+ }
+
+ for (i = 0; i < MLX5_NUM_TUNNEL_TT; i++) {
+ if (!IS_ERR_OR_NULL(ttc->tunnel_rules[i])) {
+ mlx5_del_flow_rules(ttc->tunnel_rules[i]);
+ ttc->tunnel_rules[i] = NULL;
+ }
+ }
+}
+
+struct mlx5_etype_proto {
+ u16 etype;
+ u8 proto;
+};
+
+static struct mlx5_etype_proto ttc_rules[] = {
+ [MLX5_TT_IPV4_TCP] = {
+ .etype = ETH_P_IP,
+ .proto = IPPROTO_TCP,
+ },
+ [MLX5_TT_IPV6_TCP] = {
+ .etype = ETH_P_IPV6,
+ .proto = IPPROTO_TCP,
+ },
+ [MLX5_TT_IPV4_UDP] = {
+ .etype = ETH_P_IP,
+ .proto = IPPROTO_UDP,
+ },
+ [MLX5_TT_IPV6_UDP] = {
+ .etype = ETH_P_IPV6,
+ .proto = IPPROTO_UDP,
+ },
+ [MLX5_TT_IPV4_IPSEC_AH] = {
+ .etype = ETH_P_IP,
+ .proto = IPPROTO_AH,
+ },
+ [MLX5_TT_IPV6_IPSEC_AH] = {
+ .etype = ETH_P_IPV6,
+ .proto = IPPROTO_AH,
+ },
+ [MLX5_TT_IPV4_IPSEC_ESP] = {
+ .etype = ETH_P_IP,
+ .proto = IPPROTO_ESP,
+ },
+ [MLX5_TT_IPV6_IPSEC_ESP] = {
+ .etype = ETH_P_IPV6,
+ .proto = IPPROTO_ESP,
+ },
+ [MLX5_TT_IPV4] = {
+ .etype = ETH_P_IP,
+ .proto = 0,
+ },
+ [MLX5_TT_IPV6] = {
+ .etype = ETH_P_IPV6,
+ .proto = 0,
+ },
+ [MLX5_TT_ANY] = {
+ .etype = 0,
+ .proto = 0,
+ },
+};
+
+static struct mlx5_etype_proto ttc_tunnel_rules[] = {
+ [MLX5_TT_IPV4_GRE] = {
+ .etype = ETH_P_IP,
+ .proto = IPPROTO_GRE,
+ },
+ [MLX5_TT_IPV6_GRE] = {
+ .etype = ETH_P_IPV6,
+ .proto = IPPROTO_GRE,
+ },
+ [MLX5_TT_IPV4_IPIP] = {
+ .etype = ETH_P_IP,
+ .proto = IPPROTO_IPIP,
+ },
+ [MLX5_TT_IPV6_IPIP] = {
+ .etype = ETH_P_IPV6,
+ .proto = IPPROTO_IPIP,
+ },
+ [MLX5_TT_IPV4_IPV6] = {
+ .etype = ETH_P_IP,
+ .proto = IPPROTO_IPV6,
+ },
+ [MLX5_TT_IPV6_IPV6] = {
+ .etype = ETH_P_IPV6,
+ .proto = IPPROTO_IPV6,
+ },
+
+};
+
+u8 mlx5_get_proto_by_tunnel_type(enum mlx5_tunnel_types tt)
+{
+ return ttc_tunnel_rules[tt].proto;
+}
+
+static bool mlx5_tunnel_proto_supported_rx(struct mlx5_core_dev *mdev,
+ u8 proto_type)
+{
+ switch (proto_type) {
+ case IPPROTO_GRE:
+ return MLX5_CAP_ETH(mdev, tunnel_stateless_gre);
+ case IPPROTO_IPIP:
+ case IPPROTO_IPV6:
+ return (MLX5_CAP_ETH(mdev, tunnel_stateless_ip_over_ip) ||
+ MLX5_CAP_ETH(mdev, tunnel_stateless_ip_over_ip_rx));
+ default:
+ return false;
+ }
+}
+
+static bool mlx5_tunnel_any_rx_proto_supported(struct mlx5_core_dev *mdev)
+{
+ int tt;
+
+ for (tt = 0; tt < MLX5_NUM_TUNNEL_TT; tt++) {
+ if (mlx5_tunnel_proto_supported_rx(mdev,
+ ttc_tunnel_rules[tt].proto))
+ return true;
+ }
+ return false;
+}
+
+bool mlx5_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev)
+{
+ return (mlx5_tunnel_any_rx_proto_supported(mdev) &&
+ MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+ ft_field_support.inner_ip_version));
+}
+
+static u8 mlx5_etype_to_ipv(u16 ethertype)
+{
+ if (ethertype == ETH_P_IP)
+ return 4;
+
+ if (ethertype == ETH_P_IPV6)
+ return 6;
+
+ return 0;
+}
+
+static struct mlx5_flow_handle *
+mlx5_generate_ttc_rule(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft,
+ struct mlx5_flow_destination *dest, u16 etype, u8 proto)
+{
+ int match_ipv_outer =
+ MLX5_CAP_FLOWTABLE_NIC_RX(dev,
+ ft_field_support.outer_ip_version);
+ MLX5_DECLARE_FLOW_ACT(flow_act);
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_spec *spec;
+ int err = 0;
+ u8 ipv;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return ERR_PTR(-ENOMEM);
+
+ if (proto) {
+ spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_protocol);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_protocol, proto);
+ }
+
+ ipv = mlx5_etype_to_ipv(etype);
+ if (match_ipv_outer && ipv) {
+ spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_version);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version, ipv);
+ } else if (etype) {
+ spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ethertype);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype, etype);
+ }
+
+ rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, 1);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ mlx5_core_err(dev, "%s: add rule failed\n", __func__);
+ }
+
+ kvfree(spec);
+ return err ? ERR_PTR(err) : rule;
+}
+
+static int mlx5_generate_ttc_table_rules(struct mlx5_core_dev *dev,
+ struct ttc_params *params,
+ struct mlx5_ttc_table *ttc)
+{
+ struct mlx5_flow_handle **trules;
+ struct mlx5_ttc_rule *rules;
+ struct mlx5_flow_table *ft;
+ int tt;
+ int err;
+
+ ft = ttc->t;
+ rules = ttc->rules;
+ for (tt = 0; tt < MLX5_NUM_TT; tt++) {
+ struct mlx5_ttc_rule *rule = &rules[tt];
+
+ rule->rule = mlx5_generate_ttc_rule(dev, ft, ¶ms->dests[tt],
+ ttc_rules[tt].etype,
+ ttc_rules[tt].proto);
+ if (IS_ERR(rule->rule)) {
+ err = PTR_ERR(rule->rule);
+ rule->rule = NULL;
+ goto del_rules;
+ }
+ rule->default_dest = params->dests[tt];
+ }
+
+ if (!params->inner_ttc || !mlx5_tunnel_inner_ft_supported(dev))
+ return 0;
+
+ trules = ttc->tunnel_rules;
+ for (tt = 0; tt < MLX5_NUM_TUNNEL_TT; tt++) {
+ if (!mlx5_tunnel_proto_supported_rx(dev,
+ ttc_tunnel_rules[tt].proto))
+ continue;
+ trules[tt] = mlx5_generate_ttc_rule(dev, ft,
+ ¶ms->tunnel_dests[tt],
+ ttc_tunnel_rules[tt].etype,
+ ttc_tunnel_rules[tt].proto);
+ if (IS_ERR(trules[tt])) {
+ err = PTR_ERR(trules[tt]);
+ trules[tt] = NULL;
+ goto del_rules;
+ }
+ }
+
+ return 0;
+
+del_rules:
+ mlx5_cleanup_ttc_rules(ttc);
+ return err;
+}
+
+static int mlx5_create_ttc_table_groups(struct mlx5_ttc_table *ttc,
+ bool use_ipv)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ int ix = 0;
+ u32 *in;
+ int err;
+ u8 *mc;
+
+ ttc->g = kcalloc(MLX5_TTC_NUM_GROUPS, sizeof(*ttc->g), GFP_KERNEL);
+ if (!ttc->g)
+ return -ENOMEM;
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in) {
+ kfree(ttc->g);
+ ttc->g = NULL;
+ return -ENOMEM;
+ }
+
+ /* L4 Group */
+ mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+ MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol);
+ if (use_ipv)
+ MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_version);
+ else
+ MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype);
+ MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5_TTC_GROUP1_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in);
+ if (IS_ERR(ttc->g[ttc->num_groups]))
+ goto err;
+ ttc->num_groups++;
+
+ /* L3 Group */
+ MLX5_SET(fte_match_param, mc, outer_headers.ip_protocol, 0);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5_TTC_GROUP2_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in);
+ if (IS_ERR(ttc->g[ttc->num_groups]))
+ goto err;
+ ttc->num_groups++;
+
+ /* Any Group */
+ memset(in, 0, inlen);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5_TTC_GROUP3_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in);
+ if (IS_ERR(ttc->g[ttc->num_groups]))
+ goto err;
+ ttc->num_groups++;
+
+ kvfree(in);
+ return 0;
+
+err:
+ err = PTR_ERR(ttc->g[ttc->num_groups]);
+ ttc->g[ttc->num_groups] = NULL;
+ kvfree(in);
+
+ return err;
+}
+
+static struct mlx5_flow_handle *
+mlx5_generate_inner_ttc_rule(struct mlx5_core_dev *dev,
+ struct mlx5_flow_table *ft,
+ struct mlx5_flow_destination *dest,
+ u16 etype, u8 proto)
+{
+ MLX5_DECLARE_FLOW_ACT(flow_act);
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_spec *spec;
+ int err = 0;
+ u8 ipv;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return ERR_PTR(-ENOMEM);
+
+ ipv = mlx5_etype_to_ipv(etype);
+ if (etype && ipv) {
+ spec->match_criteria_enable = MLX5_MATCH_INNER_HEADERS;
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, inner_headers.ip_version);
+ MLX5_SET(fte_match_param, spec->match_value, inner_headers.ip_version, ipv);
+ }
+
+ if (proto) {
+ spec->match_criteria_enable = MLX5_MATCH_INNER_HEADERS;
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, inner_headers.ip_protocol);
+ MLX5_SET(fte_match_param, spec->match_value, inner_headers.ip_protocol, proto);
+ }
+
+ rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, 1);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ mlx5_core_err(dev, "%s: add inner TTC rule failed\n", __func__);
+ }
+
+ kvfree(spec);
+ return err ? ERR_PTR(err) : rule;
+}
+
+static int mlx5_generate_inner_ttc_table_rules(struct mlx5_core_dev *dev,
+ struct ttc_params *params,
+ struct mlx5_ttc_table *ttc)
+{
+ struct mlx5_ttc_rule *rules;
+ struct mlx5_flow_table *ft;
+ int err;
+ int tt;
+
+ ft = ttc->t;
+ rules = ttc->rules;
+
+ for (tt = 0; tt < MLX5_NUM_TT; tt++) {
+ struct mlx5_ttc_rule *rule = &rules[tt];
+
+ rule->rule = mlx5_generate_inner_ttc_rule(dev, ft,
+ ¶ms->dests[tt],
+ ttc_rules[tt].etype,
+ ttc_rules[tt].proto);
+ if (IS_ERR(rule->rule)) {
+ err = PTR_ERR(rule->rule);
+ rule->rule = NULL;
+ goto del_rules;
+ }
+ rule->default_dest = params->dests[tt];
+ }
+
+ return 0;
+
+del_rules:
+
+ mlx5_cleanup_ttc_rules(ttc);
+ return err;
+}
+
+static int mlx5_create_inner_ttc_table_groups(struct mlx5_ttc_table *ttc)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ int ix = 0;
+ u32 *in;
+ int err;
+ u8 *mc;
+
+ ttc->g = kcalloc(MLX5_INNER_TTC_NUM_GROUPS, sizeof(*ttc->g),
+ GFP_KERNEL);
+ if (!ttc->g)
+ return -ENOMEM;
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in) {
+ kfree(ttc->g);
+ ttc->g = NULL;
+ return -ENOMEM;
+ }
+
+ /* L4 Group */
+ mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+ MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ip_protocol);
+ MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ip_version);
+ MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_INNER_HEADERS);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5_INNER_TTC_GROUP1_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in);
+ if (IS_ERR(ttc->g[ttc->num_groups]))
+ goto err;
+ ttc->num_groups++;
+
+ /* L3 Group */
+ MLX5_SET(fte_match_param, mc, inner_headers.ip_protocol, 0);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5_INNER_TTC_GROUP2_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in);
+ if (IS_ERR(ttc->g[ttc->num_groups]))
+ goto err;
+ ttc->num_groups++;
+
+ /* Any Group */
+ memset(in, 0, inlen);
+ MLX5_SET_CFG(in, start_flow_index, ix);
+ ix += MLX5_INNER_TTC_GROUP3_SIZE;
+ MLX5_SET_CFG(in, end_flow_index, ix - 1);
+ ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in);
+ if (IS_ERR(ttc->g[ttc->num_groups]))
+ goto err;
+ ttc->num_groups++;
+
+ kvfree(in);
+ return 0;
+
+err:
+ err = PTR_ERR(ttc->g[ttc->num_groups]);
+ ttc->g[ttc->num_groups] = NULL;
+ kvfree(in);
+
+ return err;
+}
+
+struct mlx5_ttc_table *mlx5_create_inner_ttc_table(struct mlx5_core_dev *dev,
+ struct ttc_params *params)
+{
+ struct mlx5_ttc_table *ttc;
+ int err;
+
+ ttc = kvzalloc(sizeof(*ttc), GFP_KERNEL);
+ if (!ttc)
+ return ERR_PTR(-ENOMEM);
+
+ WARN_ON_ONCE(params->ft_attr.max_fte);
+ params->ft_attr.max_fte = MLX5_INNER_TTC_TABLE_SIZE;
+ ttc->t = mlx5_create_flow_table(params->ns, ¶ms->ft_attr);
+ if (IS_ERR(ttc->t)) {
+ err = PTR_ERR(ttc->t);
+ kvfree(ttc);
+ return ERR_PTR(err);
+ }
+
+ err = mlx5_create_inner_ttc_table_groups(ttc);
+ if (err)
+ goto destroy_ft;
+
+ err = mlx5_generate_inner_ttc_table_rules(dev, params, ttc);
+ if (err)
+ goto destroy_ft;
+
+ return ttc;
+
+destroy_ft:
+ mlx5_destroy_ttc_table(ttc);
+ return ERR_PTR(err);
+}
+
+void mlx5_destroy_ttc_table(struct mlx5_ttc_table *ttc)
+{
+ int i;
+
+ mlx5_cleanup_ttc_rules(ttc);
+ for (i = ttc->num_groups - 1; i >= 0; i--) {
+ if (!IS_ERR_OR_NULL(ttc->g[i]))
+ mlx5_destroy_flow_group(ttc->g[i]);
+ ttc->g[i] = NULL;
+ }
+
+ kfree(ttc->g);
+ mlx5_destroy_flow_table(ttc->t);
+ kvfree(ttc);
+}
+
+struct mlx5_ttc_table *mlx5_create_ttc_table(struct mlx5_core_dev *dev,
+ struct ttc_params *params)
+{
+ bool match_ipv_outer =
+ MLX5_CAP_FLOWTABLE_NIC_RX(dev,
+ ft_field_support.outer_ip_version);
+ struct mlx5_ttc_table *ttc;
+ int err;
+
+ ttc = kvzalloc(sizeof(*ttc), GFP_KERNEL);
+ if (!ttc)
+ return ERR_PTR(-ENOMEM);
+
+ WARN_ON_ONCE(params->ft_attr.max_fte);
+ params->ft_attr.max_fte = MLX5_TTC_TABLE_SIZE;
+ ttc->t = mlx5_create_flow_table(params->ns, ¶ms->ft_attr);
+ if (IS_ERR(ttc->t)) {
+ err = PTR_ERR(ttc->t);
+ kvfree(ttc);
+ return ERR_PTR(err);
+ }
+
+ err = mlx5_create_ttc_table_groups(ttc, match_ipv_outer);
+ if (err)
+ goto destroy_ft;
+
+ err = mlx5_generate_ttc_table_rules(dev, params, ttc);
+ if (err)
+ goto destroy_ft;
+
+ return ttc;
+
+destroy_ft:
+ mlx5_destroy_ttc_table(ttc);
+ return ERR_PTR(err);
+}
+
+int mlx5_ttc_fwd_dest(struct mlx5_ttc_table *ttc, enum mlx5_traffic_types type,
+ struct mlx5_flow_destination *new_dest)
+{
+ return mlx5_modify_rule_destination(ttc->rules[type].rule, new_dest,
+ NULL);
+}
+
+struct mlx5_flow_destination
+mlx5_ttc_get_default_dest(struct mlx5_ttc_table *ttc,
+ enum mlx5_traffic_types type)
+{
+ struct mlx5_flow_destination *dest = &ttc->rules[type].default_dest;
+
+ WARN_ONCE(dest->type != MLX5_FLOW_DESTINATION_TYPE_TIR,
+ "TTC[%d] default dest is not setup yet", type);
+
+ return *dest;
+}
+
+int mlx5_ttc_fwd_default_dest(struct mlx5_ttc_table *ttc,
+ enum mlx5_traffic_types type)
+{
+ struct mlx5_flow_destination dest = mlx5_ttc_get_default_dest(ttc, type);
+
+ return mlx5_ttc_fwd_dest(ttc, type, &dest);
+}
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020 Mellanox Technologies. */
+
+#ifndef __ML5_FS_TTC_H__
+#define __ML5_FS_TTC_H__
+
+#include <linux/mlx5/fs.h>
+
+enum mlx5_traffic_types {
+ MLX5_TT_IPV4_TCP,
+ MLX5_TT_IPV6_TCP,
+ MLX5_TT_IPV4_UDP,
+ MLX5_TT_IPV6_UDP,
+ MLX5_TT_IPV4_IPSEC_AH,
+ MLX5_TT_IPV6_IPSEC_AH,
+ MLX5_TT_IPV4_IPSEC_ESP,
+ MLX5_TT_IPV6_IPSEC_ESP,
+ MLX5_TT_IPV4,
+ MLX5_TT_IPV6,
+ MLX5_TT_ANY,
+ MLX5_NUM_TT,
+ MLX5_NUM_INDIR_TIRS = MLX5_TT_ANY,
+};
+
+enum mlx5_tunnel_types {
+ MLX5_TT_IPV4_GRE,
+ MLX5_TT_IPV6_GRE,
+ MLX5_TT_IPV4_IPIP,
+ MLX5_TT_IPV6_IPIP,
+ MLX5_TT_IPV4_IPV6,
+ MLX5_TT_IPV6_IPV6,
+ MLX5_NUM_TUNNEL_TT,
+};
+
+struct mlx5_ttc_rule {
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_destination default_dest;
+};
+
+struct mlx5_ttc_table;
+
+struct ttc_params {
+ struct mlx5_flow_namespace *ns;
+ struct mlx5_flow_table_attr ft_attr;
+ struct mlx5_flow_destination dests[MLX5_NUM_TT];
+ bool inner_ttc;
+ struct mlx5_flow_destination tunnel_dests[MLX5_NUM_TUNNEL_TT];
+};
+
+struct mlx5_flow_table *mlx5_get_ttc_flow_table(struct mlx5_ttc_table *ttc);
+
+struct mlx5_ttc_table *mlx5_create_ttc_table(struct mlx5_core_dev *dev,
+ struct ttc_params *params);
+void mlx5_destroy_ttc_table(struct mlx5_ttc_table *ttc);
+
+struct mlx5_ttc_table *mlx5_create_inner_ttc_table(struct mlx5_core_dev *dev,
+ struct ttc_params *params);
+
+int mlx5_ttc_fwd_dest(struct mlx5_ttc_table *ttc, enum mlx5_traffic_types type,
+ struct mlx5_flow_destination *new_dest);
+struct mlx5_flow_destination
+mlx5_ttc_get_default_dest(struct mlx5_ttc_table *ttc,
+ enum mlx5_traffic_types type);
+int mlx5_ttc_fwd_default_dest(struct mlx5_ttc_table *ttc,
+ enum mlx5_traffic_types type);
+
+bool mlx5_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev);
+u8 mlx5_get_proto_by_tunnel_type(enum mlx5_tunnel_types tt);
+
+#endif /* __MLX5_FS_TTC_H__ */
goto err_ec;
}
+ mlx5_lag_add_mdev(dev);
err = mlx5_sriov_attach(dev);
if (err) {
mlx5_core_err(dev, "sriov init failed %d\n", err);
}
mlx5_sf_dev_table_create(dev);
- mlx5_lag_add_mdev(dev);
return 0;
err_sriov:
+ mlx5_lag_remove_mdev(dev);
mlx5_ec_cleanup(dev);
err_ec:
mlx5_sf_hw_table_destroy(dev);
static void mlx5_unload(struct mlx5_core_dev *dev)
{
- mlx5_lag_remove_mdev(dev);
mlx5_sf_dev_table_destroy(dev);
mlx5_sriov_detach(dev);
+ mlx5_lag_remove_mdev(dev);
mlx5_ec_cleanup(dev);
mlx5_sf_hw_table_destroy(dev);
mlx5_vhca_event_stop(dev);
set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
- err = mlx5_devlink_register(priv_to_devlink(dev), dev->device);
+ err = mlx5_devlink_register(priv_to_devlink(dev));
if (err)
goto err_devlink_reg;
struct devlink *devlink;
int err;
- devlink = mlx5_devlink_alloc();
+ devlink = mlx5_devlink_alloc(&pdev->dev);
if (!devlink) {
dev_err(&pdev->dev, "devlink alloc failed\n");
return -ENOMEM;
void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev, struct net_device *netdev);
void mlx5_lag_add_mdev(struct mlx5_core_dev *dev);
void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev);
+void mlx5_lag_disable_change(struct mlx5_core_dev *dev);
+void mlx5_lag_enable_change(struct mlx5_core_dev *dev);
int mlx5_events_init(struct mlx5_core_dev *dev);
void mlx5_events_cleanup(struct mlx5_core_dev *dev);
struct devlink *devlink;
int err;
- devlink = mlx5_devlink_alloc();
+ devlink = mlx5_devlink_alloc(&adev->dev);
if (!devlink)
return -ENOMEM;
return sf->hw_state == MLX5_VHCA_STATE_ACTIVE || sf->hw_state == MLX5_VHCA_STATE_IN_USE;
}
-int mlx5_devlink_sf_port_fn_state_get(struct devlink *devlink, struct devlink_port *dl_port,
+int mlx5_devlink_sf_port_fn_state_get(struct devlink_port *dl_port,
enum devlink_port_fn_state *state,
enum devlink_port_fn_opstate *opstate,
struct netlink_ext_ack *extack)
{
- struct mlx5_core_dev *dev = devlink_priv(devlink);
+ struct mlx5_core_dev *dev = devlink_priv(dl_port->devlink);
struct mlx5_sf_table *table;
struct mlx5_sf *sf;
int err = 0;
return err;
}
-int mlx5_devlink_sf_port_fn_state_set(struct devlink *devlink, struct devlink_port *dl_port,
+int mlx5_devlink_sf_port_fn_state_set(struct devlink_port *dl_port,
enum devlink_port_fn_state state,
struct netlink_ext_ack *extack)
{
- struct mlx5_core_dev *dev = devlink_priv(devlink);
+ struct mlx5_core_dev *dev = devlink_priv(dl_port->devlink);
struct mlx5_sf_table *table;
struct mlx5_sf *sf;
int err;
unsigned int *new_port_index);
int mlx5_devlink_sf_port_del(struct devlink *devlink, unsigned int port_index,
struct netlink_ext_ack *extack);
-int mlx5_devlink_sf_port_fn_state_get(struct devlink *devlink, struct devlink_port *dl_port,
+int mlx5_devlink_sf_port_fn_state_get(struct devlink_port *dl_port,
enum devlink_port_fn_state *state,
enum devlink_port_fn_opstate *opstate,
struct netlink_ext_ack *extack);
-int mlx5_devlink_sf_port_fn_state_set(struct devlink *devlink, struct devlink_port *dl_port,
+int mlx5_devlink_sf_port_fn_state_set(struct devlink_port *dl_port,
enum devlink_port_fn_state state,
struct netlink_ext_ack *extack);
#else
cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
MLX5_SET(cqc, cqc, log_cq_size, ilog2(ncqe));
- MLX5_SET(cqc, cqc, c_eqn, eqn);
+ MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
MLX5_SET(cqc, cqc, uar_page, uar->index);
MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift -
MLX5_ADAPTER_PAGE_SHIFT);
if (!reload) {
alloc_size = sizeof(*mlxsw_core) + mlxsw_driver->priv_size;
- devlink = devlink_alloc(&mlxsw_devlink_ops, alloc_size);
+ devlink = devlink_alloc(&mlxsw_devlink_ops, alloc_size,
+ mlxsw_bus_info->dev);
if (!devlink) {
err = -ENOMEM;
goto err_devlink_alloc;
goto err_emad_init;
if (!reload) {
- err = devlink_register(devlink, mlxsw_bus_info->dev);
+ err = devlink_register(devlink);
if (err)
goto err_devlink_register;
}
config SPARX5_SWITCH
tristate "Sparx5 switch driver"
- depends on BRIDGE || BRIDGE=n
depends on NET_SWITCHDEV
depends on HAS_IOMEM
depends on OF
*/
#define VSTAX 73
-static void ifh_encode_bitfield(void *ifh, u64 value, u32 pos, u32 width)
+#define ifh_encode_bitfield(ifh, value, pos, _width) \
+ ({ \
+ u32 width = (_width); \
+ \
+ /* Max width is 5 bytes - 40 bits. In worst case this will
+ * spread over 6 bytes - 48 bits
+ */ \
+ compiletime_assert(width <= 40, \
+ "Unsupported width, must be <= 40"); \
+ __ifh_encode_bitfield((ifh), (value), (pos), width); \
+ })
+
+static void __ifh_encode_bitfield(void *ifh, u64 value, u32 pos, u32 width)
{
u8 *ifh_hdr = ifh;
/* Calculate the Start IFH byte position of this IFH bit position */
u32 byte = (35 - (pos / 8));
/* Calculate the Start bit position in the Start IFH byte */
u32 bit = (pos % 8);
- u64 encode = GENMASK(bit + width - 1, bit) & (value << bit);
-
- /* Max width is 5 bytes - 40 bits. In worst case this will
- * spread over 6 bytes - 48 bits
- */
- compiletime_assert(width <= 40, "Unsupported width, must be <= 40");
+ u64 encode = GENMASK_ULL(bit + width - 1, bit) & (value << bit);
/* The b0-b7 goes into the start IFH byte */
if (encode & 0xFF)
if (!np && !pdev->dev.platform_data)
return -ENODEV;
- devlink = devlink_alloc(&ocelot_devlink_ops, sizeof(*ocelot));
+ devlink =
+ devlink_alloc(&ocelot_devlink_ops, sizeof(*ocelot), &pdev->dev);
if (!devlink)
return -ENOMEM;
if (err)
goto out_put_ports;
- err = devlink_register(devlink, ocelot->dev);
+ err = devlink_register(devlink);
if (err)
goto out_ocelot_deinit;
SET_NETDEV_DEV(dev, &pdev->dev);
platform_set_drvdata(pdev, dev);
- netdev_boot_setup_check(dev);
-
dev->base_addr = res->start;
dev->irq = platform_get_irq(pdev, 0);
err = sonic_probe1(dev);
printk(version);
#endif
- i = pci_enable_device(pdev);
+ i = pcim_enable_device(pdev);
if (i) return i;
/* natsemi has a non-standard PM control register
ioaddr = ioremap(iostart, iosize);
if (!ioaddr) {
i = -ENOMEM;
- goto err_ioremap;
+ goto err_pci_request_regions;
}
/* Work around the dropped serial bit. */
err_register_netdev:
iounmap(ioaddr);
- err_ioremap:
- pci_release_regions(pdev);
-
err_pci_request_regions:
free_netdev(dev);
return i;
NATSEMI_REMOVE_FILE(pdev, dspcfg_workaround);
unregister_netdev (dev);
- pci_release_regions (pdev);
iounmap(ioaddr);
free_netdev (dev);
}
lp->device = &pdev->dev;
platform_set_drvdata(pdev, dev);
SET_NETDEV_DEV(dev, &pdev->dev);
- netdev_boot_setup_check(dev);
dev->base_addr = resmem->start;
dev->irq = resirq->start;
kfree(vdev->vpaths);
- /* we are safe to free it now */
- free_netdev(dev);
-
vxge_debug_init(vdev->level_trace, "%s: ethernet device unregistered",
buf);
vxge_debug_entryexit(vdev->level_trace, "%s: %s:%d Exiting...", buf,
__func__, __LINE__);
+
+ /* we are safe to free it now */
+ free_netdev(dev);
}
/*
goto err_pci_disable;
}
- devlink = devlink_alloc(&nfp_devlink_ops, sizeof(*pf));
+ devlink = devlink_alloc(&nfp_devlink_ops, sizeof(*pf), &pdev->dev);
if (!devlink) {
err = -ENOMEM;
goto err_rel_regions;
for (r = 0; r < nn->max_r_vecs; r++)
nfp_net_vector_assign_rings(&nn->dp, &nn->r_vecs[r], r);
- err = netif_set_real_num_rx_queues(nn->dp.netdev, nn->dp.num_rx_rings);
+ err = netif_set_real_num_queues(nn->dp.netdev,
+ nn->dp.num_stack_tx_rings,
+ nn->dp.num_rx_rings);
if (err)
return err;
- if (nn->dp.netdev->real_num_tx_queues != nn->dp.num_stack_tx_rings) {
- err = netif_set_real_num_tx_queues(nn->dp.netdev,
- nn->dp.num_stack_tx_rings);
- if (err)
- return err;
- }
-
return nfp_net_set_config_and_enable(nn);
}
/* Init to unknowns */
ethtool_link_ksettings_add_link_mode(cmd, supported, FIBRE);
+ ethtool_link_ksettings_add_link_mode(cmd, supported, Pause);
+ ethtool_link_ksettings_add_link_mode(cmd, advertising, Pause);
cmd->base.port = PORT_OTHER;
cmd->base.speed = SPEED_UNKNOWN;
cmd->base.duplex = DUPLEX_UNKNOWN;
if (err)
goto err_unmap;
- err = devlink_register(devlink, &pf->pdev->dev);
+ err = devlink_register(devlink);
if (err)
goto err_app_clean;
{
struct devlink *dl;
- dl = devlink_alloc(&ionic_dl_ops, sizeof(struct ionic));
+ dl = devlink_alloc(&ionic_dl_ops, sizeof(struct ionic), dev);
return devlink_priv(dl);
}
struct devlink_port_attrs attrs = {};
int err;
- err = devlink_register(dl, ionic->dev);
+ err = devlink_register(dl);
if (err) {
dev_warn(ionic->dev, "devlink_register failed: %d\n", err);
return err;
*/
};
-static void ionic_lif_rx_mode(struct ionic_lif *lif, unsigned int rx_mode);
+static void ionic_lif_rx_mode(struct ionic_lif *lif);
static int ionic_lif_addr_add(struct ionic_lif *lif, const u8 *addr);
static int ionic_lif_addr_del(struct ionic_lif *lif, const u8 *addr);
static void ionic_link_status_check(struct ionic_lif *lif);
cur_moder = net_dim_get_rx_moderation(dim->mode, dim->profile_ix);
qcq = container_of(dim, struct ionic_qcq, dim);
new_coal = ionic_coal_usec_to_hw(qcq->q.lif->ionic, cur_moder.usec);
- qcq->intr.dim_coal_hw = new_coal ? new_coal : 1;
+ new_coal = new_coal ? new_coal : 1;
+
+ if (qcq->intr.dim_coal_hw != new_coal) {
+ unsigned int qi = qcq->cq.bound_q->index;
+ struct ionic_lif *lif = qcq->q.lif;
+
+ qcq->intr.dim_coal_hw = new_coal;
+
+ ionic_intr_coal_init(lif->ionic->idev.intr_ctrl,
+ lif->rxqcqs[qi]->intr.index,
+ qcq->intr.dim_coal_hw);
+ }
+
dim->state = DIM_START_MEASURE;
}
switch (w->type) {
case IONIC_DW_TYPE_RX_MODE:
- ionic_lif_rx_mode(lif, w->rx_mode);
+ ionic_lif_rx_mode(lif);
break;
case IONIC_DW_TYPE_RX_ADDR_ADD:
ionic_lif_addr_add(lif, w->addr);
return 0;
}
-static int ionic_lif_addr(struct ionic_lif *lif, const u8 *addr, bool add,
- bool can_sleep)
+static int ionic_lif_addr(struct ionic_lif *lif, const u8 *addr, bool add)
{
- struct ionic_deferred_work *work;
unsigned int nmfilters;
unsigned int nufilters;
lif->nucast--;
}
- if (!can_sleep) {
- work = kzalloc(sizeof(*work), GFP_ATOMIC);
- if (!work)
- return -ENOMEM;
- work->type = add ? IONIC_DW_TYPE_RX_ADDR_ADD :
- IONIC_DW_TYPE_RX_ADDR_DEL;
- memcpy(work->addr, addr, ETH_ALEN);
- netdev_dbg(lif->netdev, "deferred: rx_filter %s %pM\n",
- add ? "add" : "del", addr);
- ionic_lif_deferred_enqueue(&lif->deferred, work);
- } else {
- netdev_dbg(lif->netdev, "rx_filter %s %pM\n",
- add ? "add" : "del", addr);
- if (add)
- return ionic_lif_addr_add(lif, addr);
- else
- return ionic_lif_addr_del(lif, addr);
- }
+ netdev_dbg(lif->netdev, "rx_filter %s %pM\n",
+ add ? "add" : "del", addr);
+ if (add)
+ return ionic_lif_addr_add(lif, addr);
+ else
+ return ionic_lif_addr_del(lif, addr);
return 0;
}
static int ionic_addr_add(struct net_device *netdev, const u8 *addr)
{
- return ionic_lif_addr(netdev_priv(netdev), addr, ADD_ADDR, CAN_SLEEP);
-}
-
-static int ionic_ndo_addr_add(struct net_device *netdev, const u8 *addr)
-{
- return ionic_lif_addr(netdev_priv(netdev), addr, ADD_ADDR, CAN_NOT_SLEEP);
+ return ionic_lif_addr(netdev_priv(netdev), addr, ADD_ADDR);
}
static int ionic_addr_del(struct net_device *netdev, const u8 *addr)
{
- return ionic_lif_addr(netdev_priv(netdev), addr, DEL_ADDR, CAN_SLEEP);
+ return ionic_lif_addr(netdev_priv(netdev), addr, DEL_ADDR);
}
-static int ionic_ndo_addr_del(struct net_device *netdev, const u8 *addr)
+static void ionic_lif_rx_mode(struct ionic_lif *lif)
{
- return ionic_lif_addr(netdev_priv(netdev), addr, DEL_ADDR, CAN_NOT_SLEEP);
-}
-
-static void ionic_lif_rx_mode(struct ionic_lif *lif, unsigned int rx_mode)
-{
- struct ionic_admin_ctx ctx = {
- .work = COMPLETION_INITIALIZER_ONSTACK(ctx.work),
- .cmd.rx_mode_set = {
- .opcode = IONIC_CMD_RX_MODE_SET,
- .lif_index = cpu_to_le16(lif->index),
- .rx_mode = cpu_to_le16(rx_mode),
- },
- };
+ struct net_device *netdev = lif->netdev;
+ unsigned int nfilters;
+ unsigned int nd_flags;
char buf[128];
- int err;
+ u16 rx_mode;
int i;
#define REMAIN(__x) (sizeof(buf) - (__x))
- i = scnprintf(buf, sizeof(buf), "rx_mode 0x%04x -> 0x%04x:",
- lif->rx_mode, rx_mode);
- if (rx_mode & IONIC_RX_MODE_F_UNICAST)
- i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_UNICAST");
- if (rx_mode & IONIC_RX_MODE_F_MULTICAST)
- i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_MULTICAST");
- if (rx_mode & IONIC_RX_MODE_F_BROADCAST)
- i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_BROADCAST");
- if (rx_mode & IONIC_RX_MODE_F_PROMISC)
- i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_PROMISC");
- if (rx_mode & IONIC_RX_MODE_F_ALLMULTI)
- i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_ALLMULTI");
- netdev_dbg(lif->netdev, "lif%d %s\n", lif->index, buf);
-
- err = ionic_adminq_post_wait(lif, &ctx);
- if (err)
- netdev_warn(lif->netdev, "set rx_mode 0x%04x failed: %d\n",
- rx_mode, err);
- else
- lif->rx_mode = rx_mode;
-}
+ mutex_lock(&lif->config_lock);
-static void ionic_set_rx_mode(struct net_device *netdev, bool can_sleep)
-{
- struct ionic_lif *lif = netdev_priv(netdev);
- struct ionic_deferred_work *work;
- unsigned int nfilters;
- unsigned int rx_mode;
+ /* grab the flags once for local use */
+ nd_flags = netdev->flags;
rx_mode = IONIC_RX_MODE_F_UNICAST;
- rx_mode |= (netdev->flags & IFF_MULTICAST) ? IONIC_RX_MODE_F_MULTICAST : 0;
- rx_mode |= (netdev->flags & IFF_BROADCAST) ? IONIC_RX_MODE_F_BROADCAST : 0;
- rx_mode |= (netdev->flags & IFF_PROMISC) ? IONIC_RX_MODE_F_PROMISC : 0;
- rx_mode |= (netdev->flags & IFF_ALLMULTI) ? IONIC_RX_MODE_F_ALLMULTI : 0;
+ rx_mode |= (nd_flags & IFF_MULTICAST) ? IONIC_RX_MODE_F_MULTICAST : 0;
+ rx_mode |= (nd_flags & IFF_BROADCAST) ? IONIC_RX_MODE_F_BROADCAST : 0;
+ rx_mode |= (nd_flags & IFF_PROMISC) ? IONIC_RX_MODE_F_PROMISC : 0;
+ rx_mode |= (nd_flags & IFF_ALLMULTI) ? IONIC_RX_MODE_F_ALLMULTI : 0;
/* sync unicast addresses
* next check to see if we're in an overflow state
* we remove our overflow flag and check the netdev flags
* to see if we can disable NIC PROMISC
*/
- if (can_sleep)
- __dev_uc_sync(netdev, ionic_addr_add, ionic_addr_del);
- else
- __dev_uc_sync(netdev, ionic_ndo_addr_add, ionic_ndo_addr_del);
+ __dev_uc_sync(netdev, ionic_addr_add, ionic_addr_del);
nfilters = le32_to_cpu(lif->identity->eth.max_ucast_filters);
if (netdev_uc_count(netdev) + 1 > nfilters) {
rx_mode |= IONIC_RX_MODE_F_PROMISC;
lif->uc_overflow = true;
} else if (lif->uc_overflow) {
lif->uc_overflow = false;
- if (!(netdev->flags & IFF_PROMISC))
+ if (!(nd_flags & IFF_PROMISC))
rx_mode &= ~IONIC_RX_MODE_F_PROMISC;
}
/* same for multicast */
- if (can_sleep)
- __dev_mc_sync(netdev, ionic_addr_add, ionic_addr_del);
- else
- __dev_mc_sync(netdev, ionic_ndo_addr_add, ionic_ndo_addr_del);
+ __dev_mc_sync(netdev, ionic_addr_add, ionic_addr_del);
nfilters = le32_to_cpu(lif->identity->eth.max_mcast_filters);
if (netdev_mc_count(netdev) > nfilters) {
rx_mode |= IONIC_RX_MODE_F_ALLMULTI;
lif->mc_overflow = true;
} else if (lif->mc_overflow) {
lif->mc_overflow = false;
- if (!(netdev->flags & IFF_ALLMULTI))
+ if (!(nd_flags & IFF_ALLMULTI))
rx_mode &= ~IONIC_RX_MODE_F_ALLMULTI;
}
+ i = scnprintf(buf, sizeof(buf), "rx_mode 0x%04x -> 0x%04x:",
+ lif->rx_mode, rx_mode);
+ if (rx_mode & IONIC_RX_MODE_F_UNICAST)
+ i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_UNICAST");
+ if (rx_mode & IONIC_RX_MODE_F_MULTICAST)
+ i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_MULTICAST");
+ if (rx_mode & IONIC_RX_MODE_F_BROADCAST)
+ i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_BROADCAST");
+ if (rx_mode & IONIC_RX_MODE_F_PROMISC)
+ i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_PROMISC");
+ if (rx_mode & IONIC_RX_MODE_F_ALLMULTI)
+ i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_ALLMULTI");
+ if (rx_mode & IONIC_RX_MODE_F_RDMA_SNIFFER)
+ i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_RDMA_SNIFFER");
+ netdev_dbg(netdev, "lif%d %s\n", lif->index, buf);
+
if (lif->rx_mode != rx_mode) {
- if (!can_sleep) {
- work = kzalloc(sizeof(*work), GFP_ATOMIC);
- if (!work) {
- netdev_err(lif->netdev, "rxmode change dropped\n");
- return;
- }
- work->type = IONIC_DW_TYPE_RX_MODE;
- work->rx_mode = rx_mode;
- netdev_dbg(lif->netdev, "deferred: rx_mode\n");
- ionic_lif_deferred_enqueue(&lif->deferred, work);
- } else {
- ionic_lif_rx_mode(lif, rx_mode);
+ struct ionic_admin_ctx ctx = {
+ .work = COMPLETION_INITIALIZER_ONSTACK(ctx.work),
+ .cmd.rx_mode_set = {
+ .opcode = IONIC_CMD_RX_MODE_SET,
+ .lif_index = cpu_to_le16(lif->index),
+ },
+ };
+ int err;
+
+ ctx.cmd.rx_mode_set.rx_mode = cpu_to_le16(rx_mode);
+ err = ionic_adminq_post_wait(lif, &ctx);
+ if (err)
+ netdev_warn(netdev, "set rx_mode 0x%04x failed: %d\n",
+ rx_mode, err);
+ else
+ lif->rx_mode = rx_mode;
+ }
+
+ mutex_unlock(&lif->config_lock);
+}
+
+static void ionic_set_rx_mode(struct net_device *netdev, bool can_sleep)
+{
+ struct ionic_lif *lif = netdev_priv(netdev);
+ struct ionic_deferred_work *work;
+
+ if (!can_sleep) {
+ work = kzalloc(sizeof(*work), GFP_ATOMIC);
+ if (!work) {
+ netdev_err(lif->netdev, "rxmode change dropped\n");
+ return;
}
+ work->type = IONIC_DW_TYPE_RX_MODE;
+ netdev_dbg(lif->netdev, "deferred: rx_mode\n");
+ ionic_lif_deferred_enqueue(&lif->deferred, work);
+ } else {
+ ionic_lif_rx_mode(lif);
}
}
ionic_lif_qcq_deinit(lif, lif->notifyqcq);
ionic_lif_qcq_deinit(lif, lif->adminqcq);
+ mutex_destroy(&lif->config_lock);
mutex_destroy(&lif->queue_lock);
ionic_lif_reset(lif);
}
*/
if (!ether_addr_equal(ctx.comp.lif_getattr.mac,
netdev->dev_addr))
- ionic_lif_addr(lif, netdev->dev_addr, ADD_ADDR, CAN_SLEEP);
+ ionic_lif_addr(lif, netdev->dev_addr, ADD_ADDR);
} else {
/* Update the netdev mac with the device's mac */
memcpy(addr.sa_data, ctx.comp.lif_getattr.mac, netdev->addr_len);
netdev_dbg(lif->netdev, "adding station MAC addr %pM\n",
netdev->dev_addr);
- ionic_lif_addr(lif, netdev->dev_addr, ADD_ADDR, CAN_SLEEP);
+ ionic_lif_addr(lif, netdev->dev_addr, ADD_ADDR);
return 0;
}
lif->hw_index = le16_to_cpu(comp.hw_index);
mutex_init(&lif->queue_lock);
+ mutex_init(&lif->config_lock);
/* now that we have the hw_index we can figure out our doorbell page */
lif->dbid_count = le32_to_cpu(lif->ionic->ident.dev.ndbpgs_per_lif);
struct list_head list;
enum ionic_deferred_work_type type;
union {
- unsigned int rx_mode;
u8 addr[ETH_ALEN];
u8 fw_status;
};
unsigned int index;
unsigned int hw_index;
struct mutex queue_lock; /* lock for queue structures */
+ struct mutex config_lock; /* lock for config actions */
spinlock_t adminq_lock; /* lock for AdminQ operations */
struct ionic_qcq *adminqcq;
struct ionic_qcq *notifyqcq;
unsigned int nrxq_descs;
u32 rx_copybreak;
u64 rxq_features;
- unsigned int rx_mode;
+ u16 rx_mode;
u64 hw_features;
bool registered;
bool mc_overflow;
int ionic_lif_size(struct ionic *ionic);
#if IS_ENABLED(CONFIG_PTP_1588_CLOCK)
-int ionic_lif_hwstamp_replay(struct ionic_lif *lif);
+void ionic_lif_hwstamp_replay(struct ionic_lif *lif);
int ionic_lif_hwstamp_set(struct ionic_lif *lif, struct ifreq *ifr);
int ionic_lif_hwstamp_get(struct ionic_lif *lif, struct ifreq *ifr);
ktime_t ionic_lif_phc_ktime(struct ionic_lif *lif, u64 counter);
void ionic_lif_alloc_phc(struct ionic_lif *lif);
void ionic_lif_free_phc(struct ionic_lif *lif);
#else
-static inline int ionic_lif_hwstamp_replay(struct ionic_lif *lif)
-{
- return -EOPNOTSUPP;
-}
+static inline void ionic_lif_hwstamp_replay(struct ionic_lif *lif) {}
static inline int ionic_lif_hwstamp_set(struct ionic_lif *lif, struct ifreq *ifr)
{
struct hwtstamp_config config;
int err;
+ if (!lif->phc || !lif->phc->ptp)
+ return -EOPNOTSUPP;
+
if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
return -EFAULT;
return 0;
}
-int ionic_lif_hwstamp_replay(struct ionic_lif *lif)
+void ionic_lif_hwstamp_replay(struct ionic_lif *lif)
{
int err;
+ if (!lif->phc || !lif->phc->ptp)
+ return;
+
err = ionic_lif_hwstamp_set_ts_config(lif, NULL);
if (err)
netdev_info(lif->netdev, "hwstamp replay failed: %d\n", err);
-
- return err;
}
int ionic_lif_hwstamp_get(struct ionic_lif *lif, struct ifreq *ifr)
}
}
- if (likely(netdev->features & NETIF_F_RXCSUM)) {
- if (comp->csum_flags & IONIC_RXQ_COMP_CSUM_F_CALC) {
- skb->ip_summed = CHECKSUM_COMPLETE;
- skb->csum = (__force __wsum)le16_to_cpu(comp->csum);
- stats->csum_complete++;
- }
+ if (likely(netdev->features & NETIF_F_RXCSUM) &&
+ (comp->csum_flags & IONIC_RXQ_COMP_CSUM_F_CALC)) {
+ skb->ip_summed = CHECKSUM_COMPLETE;
+ skb->csum = (__force __wsum)le16_to_cpu(comp->csum);
+ stats->csum_complete++;
} else {
stats->csum_none++;
}
q->tail_idx = 0;
}
-static void ionic_dim_update(struct ionic_qcq *qcq)
+static void ionic_dim_update(struct ionic_qcq *qcq, int napi_mode)
{
struct dim_sample dim_sample;
struct ionic_lif *lif;
unsigned int qi;
+ u64 pkts, bytes;
if (!qcq->intr.dim_coal_hw)
return;
lif = qcq->q.lif;
qi = qcq->cq.bound_q->index;
- ionic_intr_coal_init(lif->ionic->idev.intr_ctrl,
- lif->rxqcqs[qi]->intr.index,
- qcq->intr.dim_coal_hw);
+ switch (napi_mode) {
+ case IONIC_LIF_F_TX_DIM_INTR:
+ pkts = lif->txqstats[qi].pkts;
+ bytes = lif->txqstats[qi].bytes;
+ break;
+ case IONIC_LIF_F_RX_DIM_INTR:
+ pkts = lif->rxqstats[qi].pkts;
+ bytes = lif->rxqstats[qi].bytes;
+ break;
+ default:
+ pkts = lif->txqstats[qi].pkts + lif->rxqstats[qi].pkts;
+ bytes = lif->txqstats[qi].bytes + lif->rxqstats[qi].bytes;
+ break;
+ }
dim_update_sample(qcq->cq.bound_intr->rearm_count,
- lif->txqstats[qi].pkts,
- lif->txqstats[qi].bytes,
- &dim_sample);
+ pkts, bytes, &dim_sample);
net_dim(&qcq->dim, dim_sample);
}
ionic_tx_service, NULL, NULL);
if (work_done < budget && napi_complete_done(napi, work_done)) {
- ionic_dim_update(qcq);
+ ionic_dim_update(qcq, IONIC_LIF_F_TX_DIM_INTR);
flags |= IONIC_INTR_CRED_UNMASK;
cq->bound_intr->rearm_count++;
}
ionic_rx_fill(cq->bound_q);
if (work_done < budget && napi_complete_done(napi, work_done)) {
- ionic_dim_update(qcq);
+ ionic_dim_update(qcq, IONIC_LIF_F_RX_DIM_INTR);
flags |= IONIC_INTR_CRED_UNMASK;
cq->bound_intr->rearm_count++;
}
ionic_rx_fill(rxcq->bound_q);
if (rx_work_done < budget && napi_complete_done(napi, rx_work_done)) {
- ionic_dim_update(qcq);
+ ionic_dim_update(qcq, 0);
flags |= IONIC_INTR_CRED_UNMASK;
rxcq->bound_intr->rearm_count++;
}
struct devlink *dl;
int rc;
- dl = devlink_alloc(&qed_dl_ops, sizeof(struct qed_devlink));
+ dl = devlink_alloc(&qed_dl_ops, sizeof(struct qed_devlink),
+ &cdev->pdev->dev);
if (!dl)
return ERR_PTR(-ENOMEM);
qdevlink = devlink_priv(dl);
qdevlink->cdev = cdev;
- rc = devlink_register(dl, &cdev->pdev->dev);
+ rc = devlink_register(dl);
if (rc)
goto err_free;
u32 int_sts, first_drop_reason, details, address, all_drops_reason;
struct qed_ptt *p_ptt = p_hwfn->p_dpc_ptt;
+ int_sts = qed_rd(p_hwfn, p_ptt, DORQ_REG_INT_STS);
+ if (int_sts == 0xdeadbeaf) {
+ DP_NOTICE(p_hwfn->cdev,
+ "DORQ is being reset, skipping int_sts handler\n");
+
+ return 0;
+ }
+
/* int_sts may be zero since all PFs were interrupted for doorbell
* overflow but another one already handled it. Can abort here. If
* This PF also requires overflow recovery we will be interrupted again.
* The masked almost full indication may also be set. Ignoring.
*/
- int_sts = qed_rd(p_hwfn, p_ptt, DORQ_REG_INT_STS);
if (!(int_sts & ~DORQ_REG_INT_STS_DORQ_FIFO_AFULL))
return 0;
static int qed_dorq_attn_cb(struct qed_hwfn *p_hwfn)
{
+ if (p_hwfn->cdev->recov_in_prog)
+ return 0;
+
p_hwfn->db_recovery_info.dorq_attn = true;
qed_dorq_attn_overflow(p_hwfn);
static const u32 ip_zero[4] = { 0, 0, 0, 0 };
bool found = false;
- qed_iwarp_print_cm_info(p_hwfn, cm_info);
-
list_for_each_entry(listener,
&p_hwfn->p_rdma_info->iwarp.listen_list,
list_entry) {
if (test_and_clear_bit(QED_SLOWPATH_PERIODIC_DB_REC,
&hwfn->slowpath_task_flags)) {
+ /* skip qed_db_rec_handler during recovery/unload */
+ if (hwfn->cdev->recov_in_prog || !hwfn->slowpath_wq_active)
+ goto out;
+
qed_db_rec_handler(hwfn, ptt);
if (hwfn->periodic_db_rec_count--)
qed_slowpath_delayed_work(hwfn,
QED_PERIODIC_DB_REC_INTERVAL);
}
+out:
qed_ptt_release(hwfn, ptt);
}
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/pci.h>
-#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/mm.h>
#include <linux/types.h>
#define QEDE_SP_HW_ERR 4
#define QEDE_SP_ARFS_CONFIG 5
#define QEDE_SP_AER 7
+#define QEDE_SP_DISABLE 8
#ifdef CONFIG_RFS_ACCEL
int qede_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
int qede_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid)
{
struct qede_dev *edev = netdev_priv(dev);
- struct qede_vlan *vlan = NULL;
+ struct qede_vlan *vlan;
int rc = 0;
DP_VERBOSE(edev, NETIF_MSG_IFDOWN, "Removing vlan 0x%04x\n", vid);
if (vlan->vid == vid)
break;
- if (!vlan || (vlan->vid != vid)) {
+ if (list_entry_is_head(vlan, &edev->vlan_list, list)) {
DP_VERBOSE(edev, (NETIF_MSG_IFUP | NETIF_MSG_IFDOWN),
"Vlan isn't configured\n");
goto out;
struct qede_dev *edev = container_of(work, struct qede_dev,
sp_task.work);
+ /* Disable execution of this deferred work once
+ * qede removal is in progress, this stop any future
+ * scheduling of sp_task.
+ */
+ if (test_bit(QEDE_SP_DISABLE, &edev->sp_flags))
+ return;
+
/* The locking scheme depends on the specific flag:
* In case of QEDE_SP_RECOVERY, acquiring the RTNL lock is required to
* ensure that ongoing flows are ended and new ones are not started.
qede_rdma_dev_remove(edev, (mode == QEDE_REMOVE_RECOVERY));
if (mode != QEDE_REMOVE_RECOVERY) {
+ set_bit(QEDE_SP_DISABLE, &edev->sp_flags);
unregister_netdev(ndev);
cancel_delayed_work_sync(&edev->sp_task);
"driver lock acquired\n");
return 1;
}
- ssleep(1);
+ mdelay(1000);
} while (++i < 10);
netdev_err(qdev->ndev, "Timed out waiting for driver lock...\n");
if ((value & ISP_CONTROL_SR) == 0)
break;
- ssleep(1);
+ mdelay(1000);
} while ((--max_wait_time));
/*
ispControlStatus);
if ((value & ISP_CONTROL_FSR) == 0)
break;
- ssleep(1);
+ mdelay(1000);
} while ((--max_wait_time));
}
if (max_wait_time == 0)
static void qlcnic_create_loopback_buff(unsigned char *data, u8 mac[])
{
- unsigned char random_data[] = {0xa8, 0x06, 0x45, 0x00};
+ static const unsigned char random_data[] = {0xa8, 0x06, 0x45, 0x00};
memset(data, 0x4e, QLCNIC_ILB_PKT_SIZE);
#endif
/* setup various bits in PCI command register */
- ret = pci_enable_device(pci_dev);
+ ret = pcim_enable_device(pci_dev);
if(ret) return ret;
i = dma_set_mask(&pci_dev->dev, DMA_BIT_MASK(32));
ioaddr = pci_iomap(pci_dev, 0, 0);
if (!ioaddr) {
ret = -ENOMEM;
- goto err_out_cleardev;
+ goto err_out;
}
sis_priv = netdev_priv(net_dev);
sis_priv->tx_ring_dma);
err_out_unmap:
pci_iounmap(pci_dev, ioaddr);
-err_out_cleardev:
- pci_release_regions(pci_dev);
err_out:
free_netdev(net_dev);
return ret;
sis_priv->tx_ring_dma);
pci_iounmap(pci_dev, sis_priv->ioaddr);
free_netdev(net_dev);
- pci_release_regions(pci_dev);
}
static int __maybe_unused sis900_suspend(struct device *dev)
tristate "SMC 9194 support"
depends on ISA
select CRC32
+ select NETDEV_LEGACY_INIT
help
This is support for the SMC9xxx based Ethernet cards. Choose this
option if you have a DELL laptop with the docking station, or
MODULE_PARM_DESC(irq, "SMC 99194 IRQ number");
MODULE_PARM_DESC(ifport, "SMC 99194 interface port (0-default, 1-TP, 2-AUI)");
-int __init init_module(void)
+static int __init smc_init_module(void)
{
if (io == 0)
printk(KERN_WARNING
devSMC9194 = smc_init(-1);
return PTR_ERR_OR_ZERO(devSMC9194);
}
+module_init(smc_init_module);
-void __exit cleanup_module(void)
+static void __exit smc_cleanup_module(void)
{
unregister_netdev(devSMC9194);
free_irq(devSMC9194->irq, devSMC9194);
release_region(devSMC9194->base_addr, SMC_IO_EXTENT);
free_netdev(devSMC9194);
}
+module_exit(smc_cleanup_module);
#endif /* MODULE */
val &= ~NSS_COMMON_GMAC_CTL_PHY_IFACE_SEL;
break;
default:
- dev_err(&pdev->dev, "Unsupported PHY mode: \"%s\"\n",
- phy_modes(gmac->phy_mode));
- err = -EINVAL;
- goto err_remove_config_dt;
+ goto err_unsupported_phy;
}
regmap_write(gmac->nss_common, NSS_COMMON_GMAC_CTL(gmac->id), val);
NSS_COMMON_CLK_SRC_CTRL_OFFSET(gmac->id);
break;
default:
- dev_err(&pdev->dev, "Unsupported PHY mode: \"%s\"\n",
- phy_modes(gmac->phy_mode));
- err = -EINVAL;
- goto err_remove_config_dt;
+ goto err_unsupported_phy;
}
regmap_write(gmac->nss_common, NSS_COMMON_CLK_SRC_CTRL, val);
NSS_COMMON_CLK_GATE_GMII_TX_EN(gmac->id);
break;
default:
- /* We don't get here; the switch above will have errored out */
- unreachable();
+ goto err_unsupported_phy;
}
regmap_write(gmac->nss_common, NSS_COMMON_CLK_GATE, val);
return 0;
+err_unsupported_phy:
+ dev_err(&pdev->dev, "Unsupported PHY mode: \"%s\"\n",
+ phy_modes(gmac->phy_mode));
+ err = -EINVAL;
+
err_remove_config_dt:
stmmac_remove_config_dt(pdev, plat_dat);
.config_l3_filter = dwmac4_config_l3_filter,
.config_l4_filter = dwmac4_config_l4_filter,
.est_configure = dwmac5_est_configure,
+ .est_irq_status = dwmac5_est_irq_status,
.fpe_configure = dwmac5_fpe_configure,
.fpe_send_mpacket = dwmac5_fpe_send_mpacket,
.fpe_irq_status = dwmac5_fpe_irq_status,
.config_l3_filter = dwmac4_config_l3_filter,
.config_l4_filter = dwmac4_config_l4_filter,
.est_configure = dwmac5_est_configure,
+ .est_irq_status = dwmac5_est_irq_status,
.fpe_configure = dwmac5_fpe_configure,
.fpe_send_mpacket = dwmac5_fpe_send_mpacket,
.fpe_irq_status = dwmac5_fpe_irq_status,
err = niu_pci_vpd_scan_props(np, here, end);
if (err < 0)
return err;
+ /* ret == 1 is not an error */
if (err == 1)
- return -EINVAL;
+ return 0;
}
return 0;
}
else
dp = pci_device_to_OF_node(np->pdev);
- phy_type = of_get_property(dp, "phy-type", &prop_len);
+ phy_type = of_get_property(dp, "phy-type", NULL);
if (!phy_type) {
netdev_err(dev, "%pOF: OF node lacks phy-type property\n", dp);
return -EINVAL;
return -EINVAL;
}
- model = of_get_property(dp, "model", &prop_len);
+ model = of_get_property(dp, "model", NULL);
if (model)
strcpy(np->vpd.model, model);
- if (of_find_property(dp, "hot-swappable-phy", &prop_len)) {
+ if (of_find_property(dp, "hot-swappable-phy", NULL)) {
np->flags |= (NIU_FLAGS_10G | NIU_FLAGS_FIBER |
NIU_FLAGS_HOTPLUG_PHY);
}
config TI_CPSW_SWITCHDEV
tristate "TI CPSW Switch Support with switchdev"
depends on ARCH_DAVINCI || ARCH_OMAP2PLUS || COMPILE_TEST
- depends on BRIDGE || BRIDGE=n
depends on NET_SWITCHDEV
depends on TI_CPTS || !TI_CPTS
select PAGE_POOL
config TI_K3_AM65_CPSW_SWITCHDEV
bool "TI K3 AM654x/J721E CPSW Switch mode support"
depends on TI_K3_AM65_CPSW_NUSS
- depends on BRIDGE || BRIDGE=n
depends on NET_SWITCHDEV
help
This enables switchdev support for TI K3 CPSWxG Ethernet
#include <linux/clk.h>
#include <linux/etherdevice.h>
-#include <linux/if_bridge.h>
#include <linux/if_vlan.h>
#include <linux/interrupt.h>
#include <linux/kernel.h>
#include <linux/sys_soc.h>
#include <linux/dma/ti-cppi5.h>
#include <linux/dma/k3-udma-glue.h>
+#include <net/switchdev.h>
#include "cpsw_ale.h"
#include "cpsw_sl.h"
}
napi_enable(&common->napi_rx);
+ if (common->rx_irq_disabled) {
+ common->rx_irq_disabled = false;
+ enable_irq(common->rx_chns.irq);
+ }
dev_dbg(common->dev, "cpsw_nuss started\n");
return 0;
dev_dbg(common->dev, "%s num_rx:%d %d\n", __func__, num_rx, budget);
- if (num_rx < budget && napi_complete_done(napi_rx, num_rx))
- enable_irq(common->rx_chns.irq);
+ if (num_rx < budget && napi_complete_done(napi_rx, num_rx)) {
+ if (common->rx_irq_disabled) {
+ common->rx_irq_disabled = false;
+ enable_irq(common->rx_chns.irq);
+ }
+ }
return num_rx;
}
else
num_tx = am65_cpsw_nuss_tx_compl_packets(tx_chn->common, tx_chn->id, budget);
- num_tx = min(num_tx, budget);
- if (num_tx < budget) {
- napi_complete(napi_tx);
+ if (num_tx >= budget)
+ return budget;
+
+ if (napi_complete_done(napi_tx, num_tx))
enable_irq(tx_chn->irq);
- }
- return num_tx;
+ return 0;
}
static irqreturn_t am65_cpsw_nuss_rx_irq(int irq, void *dev_id)
{
struct am65_cpsw_common *common = dev_id;
+ common->rx_irq_disabled = true;
disable_irq_nosync(irq);
napi_schedule(&common->napi_rx);
for (i = 1; i <= common->port_num; i++) {
struct am65_cpsw_port *port = am65_common_get_port(common, i);
- struct am65_cpsw_ndev_priv *priv = am65_ndev_to_priv(port->ndev);
+ struct am65_cpsw_ndev_priv *priv;
+
+ if (!port->ndev)
+ continue;
+ priv = am65_ndev_to_priv(port->ndev);
priv->offload_fwd_mark = set_val;
}
}
int i;
common->devlink =
- devlink_alloc(&am65_cpsw_devlink_ops, sizeof(*dl_priv));
+ devlink_alloc(&am65_cpsw_devlink_ops, sizeof(*dl_priv), dev);
if (!common->devlink)
return -ENOMEM;
dl_priv = devlink_priv(common->devlink);
dl_priv->common = common;
- ret = devlink_register(common->devlink, dev);
+ ret = devlink_register(common->devlink);
if (ret) {
dev_err(dev, "devlink reg fail ret:%d\n", ret);
goto dl_free;
struct am65_cpsw_rx_chn rx_chns;
struct napi_struct napi_rx;
+ bool rx_irq_disabled;
+
u32 nuss_ver;
u32 cpsw_ver;
unsigned long bus_freq;
skb->protocol = eth_type_trans(skb, ndev);
/* mark skb for recycling */
- skb_mark_for_recycle(skb, page, pool);
+ skb_mark_for_recycle(skb);
netif_receive_skb(skb);
ndev->stats.rx_bytes += len;
struct cpdma_chan *txch;
int ret, q_idx;
- if (skb_padto(skb, CPSW_MIN_PACKET_SIZE)) {
+ if (skb_put_padto(skb, CPSW_MIN_PACKET_SIZE)) {
cpsw_err(priv, tx_err, "packet pad failed\n");
ndev->stats.tx_dropped++;
return NET_XMIT_DROP;
#include <linux/module.h>
#include <linux/irqreturn.h>
#include <linux/interrupt.h>
-#include <linux/if_bridge.h>
#include <linux/if_ether.h>
#include <linux/etherdevice.h>
#include <linux/net_tstamp.h>
#include <linux/kmemleak.h>
#include <linux/sys_soc.h>
+#include <net/switchdev.h>
#include <net/page_pool.h>
#include <net/pkt_cls.h>
#include <net/devlink.h>
skb->protocol = eth_type_trans(skb, ndev);
/* mark skb for recycling */
- skb_mark_for_recycle(skb, page, pool);
+ skb_mark_for_recycle(skb);
netif_receive_skb(skb);
ndev->stats.rx_bytes += len;
struct cpsw_devlink *dl_priv;
int ret = 0;
- cpsw->devlink = devlink_alloc(&cpsw_devlink_ops, sizeof(*dl_priv));
+ cpsw->devlink = devlink_alloc(&cpsw_devlink_ops, sizeof(*dl_priv), dev);
if (!cpsw->devlink)
return -ENOMEM;
dl_priv = devlink_priv(cpsw->devlink);
dl_priv->cpsw = cpsw;
- ret = devlink_register(cpsw->devlink, dev);
+ ret = devlink_register(cpsw->devlink);
if (ret) {
dev_err(dev, "DL reg fail ret:%d\n", ret);
goto dl_free;
goto fail_tx;
}
- ret_code = skb_padto(skb, EMAC_DEF_MIN_ETHPKTSIZE);
+ ret_code = skb_put_padto(skb, EMAC_DEF_MIN_ETHPKTSIZE);
if (unlikely(ret_code < 0)) {
if (netif_msg_tx_err(priv) && net_ratelimit())
dev_err(emac_dev, "DaVinci EMAC: packet pad failed");
gsi_irq_type_update(gsi, gsi->type_enabled_bitmap & ~BIT(type_id));
}
-/* Turn off all GSI interrupts initially; there is no gsi_irq_teardown() */
-static void gsi_irq_setup(struct gsi *gsi)
-{
- /* Disable all interrupt types */
- gsi_irq_type_update(gsi, 0);
-
- /* Clear all type-specific interrupt masks */
- iowrite32(0, gsi->virt + GSI_CNTXT_SRC_CH_IRQ_MSK_OFFSET);
- iowrite32(0, gsi->virt + GSI_CNTXT_SRC_EV_CH_IRQ_MSK_OFFSET);
- iowrite32(0, gsi->virt + GSI_CNTXT_GLOB_IRQ_EN_OFFSET);
- iowrite32(0, gsi->virt + GSI_CNTXT_SRC_IEOB_IRQ_MSK_OFFSET);
-
- /* The inter-EE interrupts are not supported for IPA v3.0-v3.1 */
- if (gsi->version > IPA_VERSION_3_1) {
- u32 offset;
-
- /* These registers are in the non-adjusted address range */
- offset = GSI_INTER_EE_SRC_CH_IRQ_MSK_OFFSET;
- iowrite32(0, gsi->virt_raw + offset);
- offset = GSI_INTER_EE_SRC_EV_CH_IRQ_MSK_OFFSET;
- iowrite32(0, gsi->virt_raw + offset);
- }
-
- iowrite32(0, gsi->virt + GSI_CNTXT_GSI_IRQ_EN_OFFSET);
-}
-
-/* Get # supported channel and event rings; there is no gsi_ring_teardown() */
-static int gsi_ring_setup(struct gsi *gsi)
-{
- struct device *dev = gsi->dev;
- u32 count;
- u32 val;
-
- if (gsi->version < IPA_VERSION_3_5_1) {
- /* No HW_PARAM_2 register prior to IPA v3.5.1, assume the max */
- gsi->channel_count = GSI_CHANNEL_COUNT_MAX;
- gsi->evt_ring_count = GSI_EVT_RING_COUNT_MAX;
-
- return 0;
- }
-
- val = ioread32(gsi->virt + GSI_GSI_HW_PARAM_2_OFFSET);
-
- count = u32_get_bits(val, NUM_CH_PER_EE_FMASK);
- if (!count) {
- dev_err(dev, "GSI reports zero channels supported\n");
- return -EINVAL;
- }
- if (count > GSI_CHANNEL_COUNT_MAX) {
- dev_warn(dev, "limiting to %u channels; hardware supports %u\n",
- GSI_CHANNEL_COUNT_MAX, count);
- count = GSI_CHANNEL_COUNT_MAX;
- }
- gsi->channel_count = count;
-
- count = u32_get_bits(val, NUM_EV_PER_EE_FMASK);
- if (!count) {
- dev_err(dev, "GSI reports zero event rings supported\n");
- return -EINVAL;
- }
- if (count > GSI_EVT_RING_COUNT_MAX) {
- dev_warn(dev,
- "limiting to %u event rings; hardware supports %u\n",
- GSI_EVT_RING_COUNT_MAX, count);
- count = GSI_EVT_RING_COUNT_MAX;
- }
- gsi->evt_ring_count = count;
-
- return 0;
-}
-
/* Event ring commands are performed one at a time. Their completion
* is signaled by the event ring control GSI interrupt type, which is
* only enabled when we issue an event ring command. Only the event
/* All done! */
}
-static int __gsi_channel_start(struct gsi_channel *channel, bool start)
+static int __gsi_channel_start(struct gsi_channel *channel, bool resume)
{
struct gsi *gsi = channel->gsi;
int ret;
- if (!start)
+ /* Prior to IPA v4.0 suspend/resume is not implemented by GSI */
+ if (resume && gsi->version < IPA_VERSION_4_0)
return 0;
mutex_lock(&gsi->mutex);
napi_enable(&channel->napi);
gsi_irq_ieob_enable_one(gsi, channel->evt_ring_id);
- ret = __gsi_channel_start(channel, true);
+ ret = __gsi_channel_start(channel, false);
if (ret) {
gsi_irq_ieob_disable_one(gsi, channel->evt_ring_id);
napi_disable(&channel->napi);
return ret;
}
-static int __gsi_channel_stop(struct gsi_channel *channel, bool stop)
+static int __gsi_channel_stop(struct gsi_channel *channel, bool suspend)
{
struct gsi *gsi = channel->gsi;
int ret;
/* Wait for any underway transactions to complete before stopping. */
gsi_channel_trans_quiesce(channel);
- if (!stop)
+ /* Prior to IPA v4.0 suspend/resume is not implemented by GSI */
+ if (suspend && gsi->version < IPA_VERSION_4_0)
return 0;
mutex_lock(&gsi->mutex);
struct gsi_channel *channel = &gsi->channel[channel_id];
int ret;
- ret = __gsi_channel_stop(channel, true);
+ ret = __gsi_channel_stop(channel, false);
if (ret)
return ret;
mutex_unlock(&gsi->mutex);
}
-/* Stop a STARTED channel for suspend (using stop if requested) */
-int gsi_channel_suspend(struct gsi *gsi, u32 channel_id, bool stop)
+/* Stop a started channel for suspend */
+int gsi_channel_suspend(struct gsi *gsi, u32 channel_id)
{
struct gsi_channel *channel = &gsi->channel[channel_id];
int ret;
- ret = __gsi_channel_stop(channel, stop);
+ ret = __gsi_channel_stop(channel, true);
if (ret)
return ret;
return 0;
}
-/* Resume a suspended channel (starting will be requested if STOPPED) */
-int gsi_channel_resume(struct gsi *gsi, u32 channel_id, bool start)
+/* Resume a suspended channel (starting if stopped) */
+int gsi_channel_resume(struct gsi *gsi, u32 channel_id)
{
struct gsi_channel *channel = &gsi->channel[channel_id];
- return __gsi_channel_start(channel, start);
+ return __gsi_channel_start(channel, true);
+}
+
+/* Prevent all GSI interrupts while suspended */
+void gsi_suspend(struct gsi *gsi)
+{
+ disable_irq(gsi->irq);
+}
+
+/* Allow all GSI interrupts again when resuming */
+void gsi_resume(struct gsi *gsi)
+{
+ enable_irq(gsi->irq);
}
/**
return IRQ_HANDLED;
}
+/* Init function for GSI IRQ lookup; there is no gsi_irq_exit() */
static int gsi_irq_init(struct gsi *gsi, struct platform_device *pdev)
{
- struct device *dev = &pdev->dev;
- unsigned int irq;
int ret;
ret = platform_get_irq_byname(pdev, "gsi");
if (ret <= 0)
return ret ? : -EINVAL;
- irq = ret;
-
- ret = request_irq(irq, gsi_isr, 0, "gsi", gsi);
- if (ret) {
- dev_err(dev, "error %d requesting \"gsi\" IRQ\n", ret);
- return ret;
- }
- gsi->irq = irq;
+ gsi->irq = ret;
return 0;
}
-static void gsi_irq_exit(struct gsi *gsi)
-{
- free_irq(gsi->irq, gsi);
-}
-
/* Return the transaction associated with a transfer completion event */
static struct gsi_trans *gsi_event_trans(struct gsi_channel *channel,
struct gsi_event *event)
gsi_irq_disable(gsi);
}
+/* Turn off all GSI interrupts initially */
+static int gsi_irq_setup(struct gsi *gsi)
+{
+ int ret;
+
+ /* Writing 1 indicates IRQ interrupts; 0 would be MSI */
+ iowrite32(1, gsi->virt + GSI_CNTXT_INTSET_OFFSET);
+
+ /* Disable all interrupt types */
+ gsi_irq_type_update(gsi, 0);
+
+ /* Clear all type-specific interrupt masks */
+ iowrite32(0, gsi->virt + GSI_CNTXT_SRC_CH_IRQ_MSK_OFFSET);
+ iowrite32(0, gsi->virt + GSI_CNTXT_SRC_EV_CH_IRQ_MSK_OFFSET);
+ iowrite32(0, gsi->virt + GSI_CNTXT_GLOB_IRQ_EN_OFFSET);
+ iowrite32(0, gsi->virt + GSI_CNTXT_SRC_IEOB_IRQ_MSK_OFFSET);
+
+ /* The inter-EE interrupts are not supported for IPA v3.0-v3.1 */
+ if (gsi->version > IPA_VERSION_3_1) {
+ u32 offset;
+
+ /* These registers are in the non-adjusted address range */
+ offset = GSI_INTER_EE_SRC_CH_IRQ_MSK_OFFSET;
+ iowrite32(0, gsi->virt_raw + offset);
+ offset = GSI_INTER_EE_SRC_EV_CH_IRQ_MSK_OFFSET;
+ iowrite32(0, gsi->virt_raw + offset);
+ }
+
+ iowrite32(0, gsi->virt + GSI_CNTXT_GSI_IRQ_EN_OFFSET);
+
+ ret = request_irq(gsi->irq, gsi_isr, 0, "gsi", gsi);
+ if (ret)
+ dev_err(gsi->dev, "error %d requesting \"gsi\" IRQ\n", ret);
+
+ return ret;
+}
+
+static void gsi_irq_teardown(struct gsi *gsi)
+{
+ free_irq(gsi->irq, gsi);
+}
+
+/* Get # supported channel and event rings; there is no gsi_ring_teardown() */
+static int gsi_ring_setup(struct gsi *gsi)
+{
+ struct device *dev = gsi->dev;
+ u32 count;
+ u32 val;
+
+ if (gsi->version < IPA_VERSION_3_5_1) {
+ /* No HW_PARAM_2 register prior to IPA v3.5.1, assume the max */
+ gsi->channel_count = GSI_CHANNEL_COUNT_MAX;
+ gsi->evt_ring_count = GSI_EVT_RING_COUNT_MAX;
+
+ return 0;
+ }
+
+ val = ioread32(gsi->virt + GSI_GSI_HW_PARAM_2_OFFSET);
+
+ count = u32_get_bits(val, NUM_CH_PER_EE_FMASK);
+ if (!count) {
+ dev_err(dev, "GSI reports zero channels supported\n");
+ return -EINVAL;
+ }
+ if (count > GSI_CHANNEL_COUNT_MAX) {
+ dev_warn(dev, "limiting to %u channels; hardware supports %u\n",
+ GSI_CHANNEL_COUNT_MAX, count);
+ count = GSI_CHANNEL_COUNT_MAX;
+ }
+ gsi->channel_count = count;
+
+ count = u32_get_bits(val, NUM_EV_PER_EE_FMASK);
+ if (!count) {
+ dev_err(dev, "GSI reports zero event rings supported\n");
+ return -EINVAL;
+ }
+ if (count > GSI_EVT_RING_COUNT_MAX) {
+ dev_warn(dev,
+ "limiting to %u event rings; hardware supports %u\n",
+ GSI_EVT_RING_COUNT_MAX, count);
+ count = GSI_EVT_RING_COUNT_MAX;
+ }
+ gsi->evt_ring_count = count;
+
+ return 0;
+}
+
/* Setup function for GSI. GSI firmware must be loaded and initialized */
int gsi_setup(struct gsi *gsi)
{
return -EIO;
}
- gsi_irq_setup(gsi); /* No matching teardown required */
+ ret = gsi_irq_setup(gsi);
+ if (ret)
+ return ret;
ret = gsi_ring_setup(gsi); /* No matching teardown required */
if (ret)
- return ret;
+ goto err_irq_teardown;
/* Initialize the error log */
iowrite32(0, gsi->virt + GSI_ERROR_LOG_OFFSET);
- /* Writing 1 indicates IRQ interrupts; 0 would be MSI */
- iowrite32(1, gsi->virt + GSI_CNTXT_INTSET_OFFSET);
+ ret = gsi_channel_setup(gsi);
+ if (ret)
+ goto err_irq_teardown;
- return gsi_channel_setup(gsi);
+ return 0;
+
+err_irq_teardown:
+ gsi_irq_teardown(gsi);
+
+ return ret;
}
/* Inverse of gsi_setup() */
void gsi_teardown(struct gsi *gsi)
{
gsi_channel_teardown(gsi);
+ gsi_irq_teardown(gsi);
}
/* Initialize a channel's event ring */
init_completion(&gsi->completion);
- ret = gsi_irq_init(gsi, pdev);
+ ret = gsi_irq_init(gsi, pdev); /* No matching exit required */
if (ret)
goto err_iounmap;
ret = gsi_channel_init(gsi, count, data);
if (ret)
- goto err_irq_exit;
+ goto err_iounmap;
mutex_init(&gsi->mutex);
return 0;
-err_irq_exit:
- gsi_irq_exit(gsi);
err_iounmap:
iounmap(gsi->virt_raw);
{
mutex_destroy(&gsi->mutex);
gsi_channel_exit(gsi);
- gsi_irq_exit(gsi);
iounmap(gsi->virt_raw);
}
*/
void gsi_channel_reset(struct gsi *gsi, u32 channel_id, bool doorbell);
-int gsi_channel_suspend(struct gsi *gsi, u32 channel_id, bool stop);
-int gsi_channel_resume(struct gsi *gsi, u32 channel_id, bool start);
+/**
+ * gsi_suspend() - Prepare the GSI subsystem for suspend
+ * @gsi: GSI pointer
+ */
+void gsi_suspend(struct gsi *gsi);
+
+/**
+ * gsi_resume() - Resume the GSI subsystem following suspend
+ * @gsi: GSI pointer
+ */
+void gsi_resume(struct gsi *gsi);
+
+/**
+ * gsi_channel_suspend() - Suspend a GSI channel
+ * @gsi: GSI pointer
+ * @channel_id: Channel to suspend
+ *
+ * For IPA v4.0+, suspend is implemented by stopping the channel.
+ */
+int gsi_channel_suspend(struct gsi *gsi, u32 channel_id);
+
+/**
+ * gsi_channel_resume() - Resume a suspended GSI channel
+ * @gsi: GSI pointer
+ * @channel_id: Channel to resume
+ *
+ * For IPA v4.0+, the stopped channel is started again.
+ */
+int gsi_channel_resume(struct gsi *gsi, u32 channel_id);
/**
* gsi_init() - Initialize the GSI subsystem
struct ipa_smp2p;
struct ipa_interrupt;
-/**
- * enum ipa_flag - IPA state flags
- * @IPA_FLAG_RESUMED: Whether resume from suspend has been signaled
- * @IPA_FLAG_COUNT: Number of defined IPA flags
- */
-enum ipa_flag {
- IPA_FLAG_RESUMED,
- IPA_FLAG_COUNT, /* Last; not a flag */
-};
-
/**
* struct ipa - IPA information
* @gsi: Embedded GSI structure
- * @flags: Boolean state flags
* @version: IPA hardware version
* @pdev: Platform device
* @completion: Used to signal pipeline clear transfer complete
*/
struct ipa {
struct gsi gsi;
- DECLARE_BITMAP(flags, IPA_FLAG_COUNT);
enum ipa_version version;
struct platform_device *pdev;
struct completion completion;
#include <linux/clk.h>
#include <linux/device.h>
#include <linux/interconnect.h>
+#include <linux/pm.h>
+#include <linux/bitops.h>
#include "ipa.h"
#include "ipa_clock.h"
+#include "ipa_endpoint.h"
#include "ipa_modem.h"
#include "ipa_data.h"
u32 peak_bandwidth;
};
+/**
+ * enum ipa_power_flag - IPA power flags
+ * @IPA_POWER_FLAG_RESUMED: Whether resume from suspend has been signaled
+ * @IPA_POWER_FLAG_COUNT: Number of defined power flags
+ */
+enum ipa_power_flag {
+ IPA_POWER_FLAG_RESUMED,
+ IPA_POWER_FLAG_COUNT, /* Last; not a flag */
+};
+
/**
* struct ipa_clock - IPA clocking information
* @count: Clocking reference count
* @mutex: Protects clock enable/disable
* @core: IPA core clock
+ * @flags: Boolean state flags
* @interconnect_count: Number of elements in interconnect[]
* @interconnect: Interconnect array
*/
refcount_t count;
struct mutex mutex; /* protects clock enable/disable */
struct clk *core;
+ DECLARE_BITMAP(flags, IPA_POWER_FLAG_COUNT);
u32 interconnect_count;
struct ipa_interconnect *interconnect;
};
ret = icc_set_bw(interconnect->path,
interconnect->average_bandwidth,
interconnect->peak_bandwidth);
- if (ret)
+ if (ret) {
+ dev_err(&ipa->pdev->dev,
+ "error %d enabling %s interconnect\n",
+ ret, icc_get_name(interconnect->path));
goto out_unwind;
+ }
interconnect++;
}
}
/* To disable an interconnect, we just its bandwidth to 0 */
-static void ipa_interconnect_disable(struct ipa *ipa)
+static int ipa_interconnect_disable(struct ipa *ipa)
{
struct ipa_interconnect *interconnect;
struct ipa_clock *clock = ipa->clock;
+ struct device *dev = &ipa->pdev->dev;
int result = 0;
u32 count;
int ret;
while (count--) {
interconnect--;
ret = icc_set_bw(interconnect->path, 0, 0);
- if (ret && !result)
- result = ret;
+ if (ret) {
+ dev_err(dev, "error %d disabling %s interconnect\n",
+ ret, icc_get_name(interconnect->path));
+ /* Try to disable all; record only the first error */
+ if (!result)
+ result = ret;
+ }
}
- if (result)
- dev_err(&ipa->pdev->dev,
- "error %d disabling IPA interconnects\n", ret);
+ return result;
}
/* Turn on IPA clocks, including interconnects */
return ret;
ret = clk_prepare_enable(ipa->clock->core);
- if (ret)
- ipa_interconnect_disable(ipa);
+ if (ret) {
+ dev_err(&ipa->pdev->dev, "error %d enabling core clock\n", ret);
+ (void)ipa_interconnect_disable(ipa);
+ }
return ret;
}
static void ipa_clock_disable(struct ipa *ipa)
{
clk_disable_unprepare(ipa->clock->core);
- ipa_interconnect_disable(ipa);
+ (void)ipa_interconnect_disable(ipa);
}
/* Get an IPA clock reference, but only if the reference count is
goto out_mutex_unlock;
ret = ipa_clock_enable(ipa);
- if (ret) {
- dev_err(&ipa->pdev->dev, "error %d enabling IPA clock\n", ret);
- goto out_mutex_unlock;
- }
-
- refcount_set(&clock->count, 1);
-
+ if (!ret)
+ refcount_set(&clock->count, 1);
out_mutex_unlock:
mutex_unlock(&clock->mutex);
}
return ipa->clock ? (u32)clk_get_rate(ipa->clock->core) : 0;
}
+/**
+ * ipa_suspend_handler() - Handle the suspend IPA interrupt
+ * @ipa: IPA pointer
+ * @irq_id: IPA interrupt type (unused)
+ *
+ * If an RX endpoint is suspended, and the IPA has a packet destined for
+ * that endpoint, the IPA generates a SUSPEND interrupt to inform the AP
+ * that it should resume the endpoint. If we get one of these interrupts
+ * we just wake up the system.
+ */
+static void ipa_suspend_handler(struct ipa *ipa, enum ipa_irq_id irq_id)
+{
+ /* Just report the event, and let system resume handle the rest.
+ * More than one endpoint could signal this; if so, ignore
+ * all but the first.
+ */
+ if (!test_and_set_bit(IPA_POWER_FLAG_RESUMED, ipa->clock->flags))
+ pm_wakeup_dev_event(&ipa->pdev->dev, 0, true);
+
+ /* Acknowledge/clear the suspend interrupt on all endpoints */
+ ipa_interrupt_suspend_clear_all(ipa->interrupt);
+}
+
+void ipa_power_setup(struct ipa *ipa)
+{
+ ipa_interrupt_add(ipa->interrupt, IPA_IRQ_TX_SUSPEND,
+ ipa_suspend_handler);
+}
+
+void ipa_power_teardown(struct ipa *ipa)
+{
+ ipa_interrupt_remove(ipa->interrupt, IPA_IRQ_TX_SUSPEND);
+}
+
/* Initialize IPA clocking */
struct ipa_clock *
ipa_clock_init(struct device *dev, const struct ipa_clock_data *data)
kfree(clock);
clk_put(clk);
}
+
+/**
+ * ipa_suspend() - Power management system suspend callback
+ * @dev: IPA device structure
+ *
+ * Return: Always returns zero
+ *
+ * Called by the PM framework when a system suspend operation is invoked.
+ * Suspends endpoints and releases the clock reference held to keep
+ * the IPA clock running until this point.
+ */
+static int ipa_suspend(struct device *dev)
+{
+ struct ipa *ipa = dev_get_drvdata(dev);
+
+ /* Endpoints aren't usable until setup is complete */
+ if (ipa->setup_complete) {
+ __clear_bit(IPA_POWER_FLAG_RESUMED, ipa->clock->flags);
+ ipa_endpoint_suspend(ipa);
+ gsi_suspend(&ipa->gsi);
+ }
+
+ ipa_clock_put(ipa);
+
+ return 0;
+}
+
+/**
+ * ipa_resume() - Power management system resume callback
+ * @dev: IPA device structure
+ *
+ * Return: Always returns 0
+ *
+ * Called by the PM framework when a system resume operation is invoked.
+ * Takes an IPA clock reference to keep the clock running until suspend,
+ * and resumes endpoints.
+ */
+static int ipa_resume(struct device *dev)
+{
+ struct ipa *ipa = dev_get_drvdata(dev);
+
+ /* This clock reference will keep the IPA out of suspend
+ * until we get a power management suspend request.
+ */
+ ipa_clock_get(ipa);
+
+ /* Endpoints aren't usable until setup is complete */
+ if (ipa->setup_complete) {
+ gsi_resume(&ipa->gsi);
+ ipa_endpoint_resume(ipa);
+ }
+
+ return 0;
+}
+
+const struct dev_pm_ops ipa_pm_ops = {
+ .suspend = ipa_suspend,
+ .resume = ipa_resume,
+};
struct ipa;
struct ipa_clock_data;
+/* IPA device power management function block */
+extern const struct dev_pm_ops ipa_pm_ops;
+
/**
* ipa_clock_rate() - Return the current IPA core clock rate
* @ipa: IPA structure
*/
u32 ipa_clock_rate(struct ipa *ipa);
+/**
+ * ipa_power_setup() - Set up IPA power management
+ * @ipa: IPA pointer
+ */
+void ipa_power_setup(struct ipa *ipa);
+
+/**
+ * ipa_power_teardown() - Inverse of ipa_power_setup()
+ * @ipa: IPA pointer
+ */
+void ipa_power_teardown(struct ipa *ipa);
+
/**
* ipa_clock_init() - Initialize IPA clocking
* @dev: IPA device
/* Interconnect rates are in 1000 byte/second units */
static const struct ipa_interconnect_data ipa_interconnect_data[] = {
{
- .name = "ipa_to_llcc",
+ .name = "memory",
.peak_bandwidth = 600000, /* 600 MBps */
.average_bandwidth = 150000, /* 150 MBps */
},
- {
- .name = "llcc_to_ebi1",
- .peak_bandwidth = 1804000, /* 1.804 GBps */
- .average_bandwidth = 150000, /* 150 MBps */
- },
/* Average rate is unused for the next interconnect */
{
- .name = "appss_to_ipa",
+ .name = "config",
.peak_bandwidth = 74000, /* 74 MBps */
.average_bandwidth = 0, /* unused */
},
{
struct device *dev = &endpoint->ipa->pdev->dev;
struct gsi *gsi = &endpoint->ipa->gsi;
- bool stop_channel;
int ret;
if (!(endpoint->ipa->enabled & BIT(endpoint->endpoint_id)))
(void)ipa_endpoint_program_suspend(endpoint, true);
}
- /* Starting with IPA v4.0, endpoints are suspended by stopping the
- * underlying GSI channel rather than using endpoint suspend mode.
- */
- stop_channel = endpoint->ipa->version >= IPA_VERSION_4_0;
- ret = gsi_channel_suspend(gsi, endpoint->channel_id, stop_channel);
+ ret = gsi_channel_suspend(gsi, endpoint->channel_id);
if (ret)
dev_err(dev, "error %d suspending channel %u\n", ret,
endpoint->channel_id);
{
struct device *dev = &endpoint->ipa->pdev->dev;
struct gsi *gsi = &endpoint->ipa->gsi;
- bool start_channel;
int ret;
if (!(endpoint->ipa->enabled & BIT(endpoint->endpoint_id)))
if (!endpoint->toward_ipa)
(void)ipa_endpoint_program_suspend(endpoint, false);
- /* Starting with IPA v4.0, the underlying GSI channel must be
- * restarted for resume.
- */
- start_channel = endpoint->ipa->version >= IPA_VERSION_4_0;
- ret = gsi_channel_resume(gsi, endpoint->channel_id, start_channel);
+ ret = gsi_channel_resume(gsi, endpoint->channel_id);
if (ret)
dev_err(dev, "error %d resuming channel %u\n", ret,
endpoint->channel_id);
/* Divider for 19.2 MHz crystal oscillator clock to get common timer clock */
#define IPA_XO_CLOCK_DIVIDER 192 /* 1 is subtracted where used */
-/**
- * ipa_suspend_handler() - Handle the suspend IPA interrupt
- * @ipa: IPA pointer
- * @irq_id: IPA interrupt type (unused)
- *
- * If an RX endpoint is in suspend state, and the IPA has a packet
- * destined for that endpoint, the IPA generates a SUSPEND interrupt
- * to inform the AP that it should resume the endpoint. If we get
- * one of these interrupts we just resume everything.
- */
-static void ipa_suspend_handler(struct ipa *ipa, enum ipa_irq_id irq_id)
-{
- /* Just report the event, and let system resume handle the rest.
- * More than one endpoint could signal this; if so, ignore
- * all but the first.
- */
- if (!test_and_set_bit(IPA_FLAG_RESUMED, ipa->flags))
- pm_wakeup_dev_event(&ipa->pdev->dev, 0, true);
-
- /* Acknowledge/clear the suspend interrupt on all endpoints */
- ipa_interrupt_suspend_clear_all(ipa->interrupt);
-}
-
/**
* ipa_setup() - Set up IPA hardware
* @ipa: IPA pointer
if (ret)
return ret;
- ipa_interrupt_add(ipa->interrupt, IPA_IRQ_TX_SUSPEND,
- ipa_suspend_handler);
+ ipa_power_setup(ipa);
ret = device_init_wakeup(dev, true);
if (ret)
- goto err_interrupt_remove;
+ goto err_gsi_teardown;
ipa_endpoint_setup(ipa);
ipa_endpoint_disable_one(command_endpoint);
err_endpoint_teardown:
ipa_endpoint_teardown(ipa);
+ ipa_power_teardown(ipa);
(void)device_init_wakeup(dev, false);
-err_interrupt_remove:
- ipa_interrupt_remove(ipa->interrupt, IPA_IRQ_TX_SUSPEND);
+err_gsi_teardown:
gsi_teardown(&ipa->gsi);
return ret;
command_endpoint = ipa->name_map[IPA_ENDPOINT_AP_COMMAND_TX];
ipa_endpoint_disable_one(command_endpoint);
ipa_endpoint_teardown(ipa);
+ ipa_power_teardown(ipa);
(void)device_init_wakeup(&ipa->pdev->dev, false);
- ipa_interrupt_remove(ipa->interrupt, IPA_IRQ_TX_SUSPEND);
gsi_teardown(&ipa->gsi);
}
ret = ipa_endpoint_config(ipa);
if (ret)
- goto err_interrupt_deconfig;
+ goto err_uc_deconfig;
ipa_table_config(ipa); /* No deconfig required */
err_endpoint_deconfig:
ipa_endpoint_deconfig(ipa);
-err_interrupt_deconfig:
+err_uc_deconfig:
ipa_uc_deconfig(ipa);
ipa_interrupt_deconfig(ipa->interrupt);
ipa->interrupt = NULL;
dev_err(&pdev->dev, "shutdown: remove returned %d\n", ret);
}
-/**
- * ipa_suspend() - Power management system suspend callback
- * @dev: IPA device structure
- *
- * Return: Always returns zero
- *
- * Called by the PM framework when a system suspend operation is invoked.
- * Suspends endpoints and releases the clock reference held to keep
- * the IPA clock running until this point.
- */
-static int ipa_suspend(struct device *dev)
-{
- struct ipa *ipa = dev_get_drvdata(dev);
-
- /* Endpoints aren't usable until setup is complete */
- if (ipa->setup_complete) {
- __clear_bit(IPA_FLAG_RESUMED, ipa->flags);
- ipa_endpoint_suspend(ipa);
- }
-
- ipa_clock_put(ipa);
-
- return 0;
-}
-
-/**
- * ipa_resume() - Power management system resume callback
- * @dev: IPA device structure
- *
- * Return: Always returns 0
- *
- * Called by the PM framework when a system resume operation is invoked.
- * Takes an IPA clock reference to keep the clock running until suspend,
- * and resumes endpoints.
- */
-static int ipa_resume(struct device *dev)
-{
- struct ipa *ipa = dev_get_drvdata(dev);
-
- /* This clock reference will keep the IPA out of suspend
- * until we get a power management suspend request.
- */
- ipa_clock_get(ipa);
-
- /* Endpoints aren't usable until setup is complete */
- if (ipa->setup_complete)
- ipa_endpoint_resume(ipa);
-
- return 0;
-}
-
-static const struct dev_pm_ops ipa_pm_ops = {
- .suspend = ipa_suspend,
- .resume = ipa_resume,
-};
-
static const struct attribute_group *ipa_attribute_groups[] = {
&ipa_attribute_group,
&ipa_feature_attribute_group,
struct ipa_priv *priv = netdev_priv(netdev);
struct ipa *ipa = priv->ipa;
+ if (!(netdev->flags & IFF_UP))
+ return;
+
netif_stop_queue(netdev);
ipa_endpoint_suspend_one(ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]);
struct ipa_priv *priv = netdev_priv(netdev);
struct ipa *ipa = priv->ipa;
+ if (!(netdev->flags & IFF_UP))
+ return;
+
ipa_endpoint_resume_one(ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]);
ipa_endpoint_resume_one(ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]);
SET_NETDEV_DEV(netdev, &ipa->pdev->dev);
priv = netdev_priv(netdev);
priv->ipa = ipa;
+ ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]->netdev = netdev;
+ ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]->netdev = netdev;
+ ipa->modem_netdev = netdev;
ret = register_netdev(netdev);
- if (!ret) {
- ipa->modem_netdev = netdev;
- ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]->netdev = netdev;
- ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]->netdev = netdev;
- } else {
+ if (ret) {
+ ipa->modem_netdev = NULL;
+ ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]->netdev = NULL;
+ ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]->netdev = NULL;
free_netdev(netdev);
}
/* Prevent the modem from triggering a call to ipa_setup() */
ipa_smp2p_disable(ipa);
- /* Stop the queue and disable the endpoints if it's open */
+ /* Clean up the netdev and endpoints if it was started */
if (netdev) {
- (void)ipa_stop(netdev);
+ /* If it was opened, stop it first */
+ if (netdev->flags & IFF_UP)
+ (void)ipa_stop(netdev);
+ unregister_netdev(netdev);
+ ipa->modem_netdev = NULL;
ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]->netdev = NULL;
ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]->netdev = NULL;
- ipa->modem_netdev = NULL;
- unregister_netdev(netdev);
free_netdev(netdev);
}
+++ /dev/null
-obj-$(CONFIG_MHI_NET) += mhi_net.o
-
-mhi_net-y := net.o proto_mbim.o
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/* MHI Network driver - Network over MHI bus
- *
- * Copyright (C) 2021 Linaro Ltd <loic.poulain@linaro.org>
- */
-
-struct mhi_net_stats {
- u64_stats_t rx_packets;
- u64_stats_t rx_bytes;
- u64_stats_t rx_errors;
- u64_stats_t rx_dropped;
- u64_stats_t rx_length_errors;
- u64_stats_t tx_packets;
- u64_stats_t tx_bytes;
- u64_stats_t tx_errors;
- u64_stats_t tx_dropped;
- struct u64_stats_sync tx_syncp;
- struct u64_stats_sync rx_syncp;
-};
-
-struct mhi_net_dev {
- struct mhi_device *mdev;
- struct net_device *ndev;
- struct sk_buff *skbagg_head;
- struct sk_buff *skbagg_tail;
- const struct mhi_net_proto *proto;
- void *proto_data;
- struct delayed_work rx_refill;
- struct mhi_net_stats stats;
- u32 rx_queue_sz;
- int msg_enable;
- unsigned int mru;
-};
-
-struct mhi_net_proto {
- int (*init)(struct mhi_net_dev *mhi_netdev);
- struct sk_buff * (*tx_fixup)(struct mhi_net_dev *mhi_netdev, struct sk_buff *skb);
- void (*rx)(struct mhi_net_dev *mhi_netdev, struct sk_buff *skb);
-};
-
-extern const struct mhi_net_proto proto_mbim;
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-or-later
-/* MHI Network driver - Network over MHI bus
- *
- * Copyright (C) 2020 Linaro Ltd <loic.poulain@linaro.org>
- */
-
-#include <linux/if_arp.h>
-#include <linux/mhi.h>
-#include <linux/mod_devicetable.h>
-#include <linux/module.h>
-#include <linux/netdevice.h>
-#include <linux/skbuff.h>
-#include <linux/u64_stats_sync.h>
-#include <linux/wwan.h>
-
-#include "mhi.h"
-
-#define MHI_NET_MIN_MTU ETH_MIN_MTU
-#define MHI_NET_MAX_MTU 0xffff
-#define MHI_NET_DEFAULT_MTU 0x4000
-
-/* When set to false, the default netdev (link 0) is not created, and it's up
- * to user to create the link (via wwan rtnetlink).
- */
-static bool create_default_iface = true;
-module_param(create_default_iface, bool, 0);
-
-struct mhi_device_info {
- const char *netname;
- const struct mhi_net_proto *proto;
-};
-
-static int mhi_ndo_open(struct net_device *ndev)
-{
- struct mhi_net_dev *mhi_netdev = wwan_netdev_drvpriv(ndev);
-
- /* Feed the rx buffer pool */
- schedule_delayed_work(&mhi_netdev->rx_refill, 0);
-
- /* Carrier is established via out-of-band channel (e.g. qmi) */
- netif_carrier_on(ndev);
-
- netif_start_queue(ndev);
-
- return 0;
-}
-
-static int mhi_ndo_stop(struct net_device *ndev)
-{
- struct mhi_net_dev *mhi_netdev = wwan_netdev_drvpriv(ndev);
-
- netif_stop_queue(ndev);
- netif_carrier_off(ndev);
- cancel_delayed_work_sync(&mhi_netdev->rx_refill);
-
- return 0;
-}
-
-static netdev_tx_t mhi_ndo_xmit(struct sk_buff *skb, struct net_device *ndev)
-{
- struct mhi_net_dev *mhi_netdev = wwan_netdev_drvpriv(ndev);
- const struct mhi_net_proto *proto = mhi_netdev->proto;
- struct mhi_device *mdev = mhi_netdev->mdev;
- int err;
-
- if (proto && proto->tx_fixup) {
- skb = proto->tx_fixup(mhi_netdev, skb);
- if (unlikely(!skb))
- goto exit_drop;
- }
-
- err = mhi_queue_skb(mdev, DMA_TO_DEVICE, skb, skb->len, MHI_EOT);
- if (unlikely(err)) {
- net_err_ratelimited("%s: Failed to queue TX buf (%d)\n",
- ndev->name, err);
- dev_kfree_skb_any(skb);
- goto exit_drop;
- }
-
- if (mhi_queue_is_full(mdev, DMA_TO_DEVICE))
- netif_stop_queue(ndev);
-
- return NETDEV_TX_OK;
-
-exit_drop:
- u64_stats_update_begin(&mhi_netdev->stats.tx_syncp);
- u64_stats_inc(&mhi_netdev->stats.tx_dropped);
- u64_stats_update_end(&mhi_netdev->stats.tx_syncp);
-
- return NETDEV_TX_OK;
-}
-
-static void mhi_ndo_get_stats64(struct net_device *ndev,
- struct rtnl_link_stats64 *stats)
-{
- struct mhi_net_dev *mhi_netdev = wwan_netdev_drvpriv(ndev);
- unsigned int start;
-
- do {
- start = u64_stats_fetch_begin_irq(&mhi_netdev->stats.rx_syncp);
- stats->rx_packets = u64_stats_read(&mhi_netdev->stats.rx_packets);
- stats->rx_bytes = u64_stats_read(&mhi_netdev->stats.rx_bytes);
- stats->rx_errors = u64_stats_read(&mhi_netdev->stats.rx_errors);
- stats->rx_dropped = u64_stats_read(&mhi_netdev->stats.rx_dropped);
- stats->rx_length_errors = u64_stats_read(&mhi_netdev->stats.rx_length_errors);
- } while (u64_stats_fetch_retry_irq(&mhi_netdev->stats.rx_syncp, start));
-
- do {
- start = u64_stats_fetch_begin_irq(&mhi_netdev->stats.tx_syncp);
- stats->tx_packets = u64_stats_read(&mhi_netdev->stats.tx_packets);
- stats->tx_bytes = u64_stats_read(&mhi_netdev->stats.tx_bytes);
- stats->tx_errors = u64_stats_read(&mhi_netdev->stats.tx_errors);
- stats->tx_dropped = u64_stats_read(&mhi_netdev->stats.tx_dropped);
- } while (u64_stats_fetch_retry_irq(&mhi_netdev->stats.tx_syncp, start));
-}
-
-static const struct net_device_ops mhi_netdev_ops = {
- .ndo_open = mhi_ndo_open,
- .ndo_stop = mhi_ndo_stop,
- .ndo_start_xmit = mhi_ndo_xmit,
- .ndo_get_stats64 = mhi_ndo_get_stats64,
-};
-
-static void mhi_net_setup(struct net_device *ndev)
-{
- ndev->header_ops = NULL; /* No header */
- ndev->type = ARPHRD_RAWIP;
- ndev->hard_header_len = 0;
- ndev->addr_len = 0;
- ndev->flags = IFF_POINTOPOINT | IFF_NOARP;
- ndev->netdev_ops = &mhi_netdev_ops;
- ndev->mtu = MHI_NET_DEFAULT_MTU;
- ndev->min_mtu = MHI_NET_MIN_MTU;
- ndev->max_mtu = MHI_NET_MAX_MTU;
- ndev->tx_queue_len = 1000;
-}
-
-static struct sk_buff *mhi_net_skb_agg(struct mhi_net_dev *mhi_netdev,
- struct sk_buff *skb)
-{
- struct sk_buff *head = mhi_netdev->skbagg_head;
- struct sk_buff *tail = mhi_netdev->skbagg_tail;
-
- /* This is non-paged skb chaining using frag_list */
- if (!head) {
- mhi_netdev->skbagg_head = skb;
- return skb;
- }
-
- if (!skb_shinfo(head)->frag_list)
- skb_shinfo(head)->frag_list = skb;
- else
- tail->next = skb;
-
- head->len += skb->len;
- head->data_len += skb->len;
- head->truesize += skb->truesize;
-
- mhi_netdev->skbagg_tail = skb;
-
- return mhi_netdev->skbagg_head;
-}
-
-static void mhi_net_dl_callback(struct mhi_device *mhi_dev,
- struct mhi_result *mhi_res)
-{
- struct mhi_net_dev *mhi_netdev = dev_get_drvdata(&mhi_dev->dev);
- const struct mhi_net_proto *proto = mhi_netdev->proto;
- struct sk_buff *skb = mhi_res->buf_addr;
- int free_desc_count;
-
- free_desc_count = mhi_get_free_desc_count(mhi_dev, DMA_FROM_DEVICE);
-
- if (unlikely(mhi_res->transaction_status)) {
- switch (mhi_res->transaction_status) {
- case -EOVERFLOW:
- /* Packet can not fit in one MHI buffer and has been
- * split over multiple MHI transfers, do re-aggregation.
- * That usually means the device side MTU is larger than
- * the host side MTU/MRU. Since this is not optimal,
- * print a warning (once).
- */
- netdev_warn_once(mhi_netdev->ndev,
- "Fragmented packets received, fix MTU?\n");
- skb_put(skb, mhi_res->bytes_xferd);
- mhi_net_skb_agg(mhi_netdev, skb);
- break;
- case -ENOTCONN:
- /* MHI layer stopping/resetting the DL channel */
- dev_kfree_skb_any(skb);
- return;
- default:
- /* Unknown error, simply drop */
- dev_kfree_skb_any(skb);
- u64_stats_update_begin(&mhi_netdev->stats.rx_syncp);
- u64_stats_inc(&mhi_netdev->stats.rx_errors);
- u64_stats_update_end(&mhi_netdev->stats.rx_syncp);
- }
- } else {
- skb_put(skb, mhi_res->bytes_xferd);
-
- if (mhi_netdev->skbagg_head) {
- /* Aggregate the final fragment */
- skb = mhi_net_skb_agg(mhi_netdev, skb);
- mhi_netdev->skbagg_head = NULL;
- }
-
- switch (skb->data[0] & 0xf0) {
- case 0x40:
- skb->protocol = htons(ETH_P_IP);
- break;
- case 0x60:
- skb->protocol = htons(ETH_P_IPV6);
- break;
- default:
- skb->protocol = htons(ETH_P_MAP);
- break;
- }
-
- if (proto && proto->rx) {
- proto->rx(mhi_netdev, skb);
- } else {
- u64_stats_update_begin(&mhi_netdev->stats.rx_syncp);
- u64_stats_inc(&mhi_netdev->stats.rx_packets);
- u64_stats_add(&mhi_netdev->stats.rx_bytes, skb->len);
- u64_stats_update_end(&mhi_netdev->stats.rx_syncp);
- netif_rx(skb);
- }
- }
-
- /* Refill if RX buffers queue becomes low */
- if (free_desc_count >= mhi_netdev->rx_queue_sz / 2)
- schedule_delayed_work(&mhi_netdev->rx_refill, 0);
-}
-
-static void mhi_net_ul_callback(struct mhi_device *mhi_dev,
- struct mhi_result *mhi_res)
-{
- struct mhi_net_dev *mhi_netdev = dev_get_drvdata(&mhi_dev->dev);
- struct net_device *ndev = mhi_netdev->ndev;
- struct mhi_device *mdev = mhi_netdev->mdev;
- struct sk_buff *skb = mhi_res->buf_addr;
-
- /* Hardware has consumed the buffer, so free the skb (which is not
- * freed by the MHI stack) and perform accounting.
- */
- dev_consume_skb_any(skb);
-
- u64_stats_update_begin(&mhi_netdev->stats.tx_syncp);
- if (unlikely(mhi_res->transaction_status)) {
-
- /* MHI layer stopping/resetting the UL channel */
- if (mhi_res->transaction_status == -ENOTCONN) {
- u64_stats_update_end(&mhi_netdev->stats.tx_syncp);
- return;
- }
-
- u64_stats_inc(&mhi_netdev->stats.tx_errors);
- } else {
- u64_stats_inc(&mhi_netdev->stats.tx_packets);
- u64_stats_add(&mhi_netdev->stats.tx_bytes, mhi_res->bytes_xferd);
- }
- u64_stats_update_end(&mhi_netdev->stats.tx_syncp);
-
- if (netif_queue_stopped(ndev) && !mhi_queue_is_full(mdev, DMA_TO_DEVICE))
- netif_wake_queue(ndev);
-}
-
-static void mhi_net_rx_refill_work(struct work_struct *work)
-{
- struct mhi_net_dev *mhi_netdev = container_of(work, struct mhi_net_dev,
- rx_refill.work);
- struct net_device *ndev = mhi_netdev->ndev;
- struct mhi_device *mdev = mhi_netdev->mdev;
- struct sk_buff *skb;
- unsigned int size;
- int err;
-
- size = mhi_netdev->mru ? mhi_netdev->mru : READ_ONCE(ndev->mtu);
-
- while (!mhi_queue_is_full(mdev, DMA_FROM_DEVICE)) {
- skb = netdev_alloc_skb(ndev, size);
- if (unlikely(!skb))
- break;
-
- err = mhi_queue_skb(mdev, DMA_FROM_DEVICE, skb, size, MHI_EOT);
- if (unlikely(err)) {
- net_err_ratelimited("%s: Failed to queue RX buf (%d)\n",
- ndev->name, err);
- kfree_skb(skb);
- break;
- }
-
- /* Do not hog the CPU if rx buffers are consumed faster than
- * queued (unlikely).
- */
- cond_resched();
- }
-
- /* If we're still starved of rx buffers, reschedule later */
- if (mhi_get_free_desc_count(mdev, DMA_FROM_DEVICE) == mhi_netdev->rx_queue_sz)
- schedule_delayed_work(&mhi_netdev->rx_refill, HZ / 2);
-}
-
-static int mhi_net_newlink(void *ctxt, struct net_device *ndev, u32 if_id,
- struct netlink_ext_ack *extack)
-{
- const struct mhi_device_info *info;
- struct mhi_device *mhi_dev = ctxt;
- struct mhi_net_dev *mhi_netdev;
- int err;
-
- info = (struct mhi_device_info *)mhi_dev->id->driver_data;
-
- /* For now we only support one link (link context 0), driver must be
- * reworked to break 1:1 relationship for net MBIM and to forward setup
- * call to rmnet(QMAP) otherwise.
- */
- if (if_id != 0)
- return -EINVAL;
-
- if (dev_get_drvdata(&mhi_dev->dev))
- return -EBUSY;
-
- mhi_netdev = wwan_netdev_drvpriv(ndev);
-
- dev_set_drvdata(&mhi_dev->dev, mhi_netdev);
- mhi_netdev->ndev = ndev;
- mhi_netdev->mdev = mhi_dev;
- mhi_netdev->skbagg_head = NULL;
- mhi_netdev->proto = info->proto;
- mhi_netdev->mru = mhi_dev->mhi_cntrl->mru;
-
- INIT_DELAYED_WORK(&mhi_netdev->rx_refill, mhi_net_rx_refill_work);
- u64_stats_init(&mhi_netdev->stats.rx_syncp);
- u64_stats_init(&mhi_netdev->stats.tx_syncp);
-
- /* Start MHI channels */
- err = mhi_prepare_for_transfer(mhi_dev);
- if (err)
- goto out_err;
-
- /* Number of transfer descriptors determines size of the queue */
- mhi_netdev->rx_queue_sz = mhi_get_free_desc_count(mhi_dev, DMA_FROM_DEVICE);
-
- if (extack)
- err = register_netdevice(ndev);
- else
- err = register_netdev(ndev);
- if (err)
- goto out_err;
-
- if (mhi_netdev->proto) {
- err = mhi_netdev->proto->init(mhi_netdev);
- if (err)
- goto out_err_proto;
- }
-
- return 0;
-
-out_err_proto:
- unregister_netdevice(ndev);
-out_err:
- free_netdev(ndev);
- return err;
-}
-
-static void mhi_net_dellink(void *ctxt, struct net_device *ndev,
- struct list_head *head)
-{
- struct mhi_net_dev *mhi_netdev = wwan_netdev_drvpriv(ndev);
- struct mhi_device *mhi_dev = ctxt;
-
- if (head)
- unregister_netdevice_queue(ndev, head);
- else
- unregister_netdev(ndev);
-
- mhi_unprepare_from_transfer(mhi_dev);
-
- kfree_skb(mhi_netdev->skbagg_head);
-
- dev_set_drvdata(&mhi_dev->dev, NULL);
-}
-
-static const struct wwan_ops mhi_wwan_ops = {
- .priv_size = sizeof(struct mhi_net_dev),
- .setup = mhi_net_setup,
- .newlink = mhi_net_newlink,
- .dellink = mhi_net_dellink,
-};
-
-static int mhi_net_probe(struct mhi_device *mhi_dev,
- const struct mhi_device_id *id)
-{
- const struct mhi_device_info *info = (struct mhi_device_info *)id->driver_data;
- struct mhi_controller *cntrl = mhi_dev->mhi_cntrl;
- struct net_device *ndev;
- int err;
-
- err = wwan_register_ops(&cntrl->mhi_dev->dev, &mhi_wwan_ops, mhi_dev,
- WWAN_NO_DEFAULT_LINK);
- if (err)
- return err;
-
- if (!create_default_iface)
- return 0;
-
- /* Create a default interface which is used as either RMNET real-dev,
- * MBIM link 0 or ip link 0)
- */
- ndev = alloc_netdev(sizeof(struct mhi_net_dev), info->netname,
- NET_NAME_PREDICTABLE, mhi_net_setup);
- if (!ndev) {
- err = -ENOMEM;
- goto err_unregister;
- }
-
- SET_NETDEV_DEV(ndev, &mhi_dev->dev);
-
- err = mhi_net_newlink(mhi_dev, ndev, 0, NULL);
- if (err)
- goto err_release;
-
- return 0;
-
-err_release:
- free_netdev(ndev);
-err_unregister:
- wwan_unregister_ops(&cntrl->mhi_dev->dev);
-
- return err;
-}
-
-static void mhi_net_remove(struct mhi_device *mhi_dev)
-{
- struct mhi_net_dev *mhi_netdev = dev_get_drvdata(&mhi_dev->dev);
- struct mhi_controller *cntrl = mhi_dev->mhi_cntrl;
-
- /* WWAN core takes care of removing remaining links */
- wwan_unregister_ops(&cntrl->mhi_dev->dev);
-
- if (create_default_iface)
- mhi_net_dellink(mhi_dev, mhi_netdev->ndev, NULL);
-}
-
-static const struct mhi_device_info mhi_hwip0 = {
- .netname = "mhi_hwip%d",
-};
-
-static const struct mhi_device_info mhi_swip0 = {
- .netname = "mhi_swip%d",
-};
-
-static const struct mhi_device_info mhi_hwip0_mbim = {
- .netname = "mhi_mbim%d",
- .proto = &proto_mbim,
-};
-
-static const struct mhi_device_id mhi_net_id_table[] = {
- /* Hardware accelerated data PATH (to modem IPA), protocol agnostic */
- { .chan = "IP_HW0", .driver_data = (kernel_ulong_t)&mhi_hwip0 },
- /* Software data PATH (to modem CPU) */
- { .chan = "IP_SW0", .driver_data = (kernel_ulong_t)&mhi_swip0 },
- /* Hardware accelerated data PATH (to modem IPA), MBIM protocol */
- { .chan = "IP_HW0_MBIM", .driver_data = (kernel_ulong_t)&mhi_hwip0_mbim },
- {}
-};
-MODULE_DEVICE_TABLE(mhi, mhi_net_id_table);
-
-static struct mhi_driver mhi_net_driver = {
- .probe = mhi_net_probe,
- .remove = mhi_net_remove,
- .dl_xfer_cb = mhi_net_dl_callback,
- .ul_xfer_cb = mhi_net_ul_callback,
- .id_table = mhi_net_id_table,
- .driver = {
- .name = "mhi_net",
- .owner = THIS_MODULE,
- },
-};
-
-module_mhi_driver(mhi_net_driver);
-
-MODULE_AUTHOR("Loic Poulain <loic.poulain@linaro.org>");
-MODULE_DESCRIPTION("Network over MHI");
-MODULE_LICENSE("GPL v2");
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-or-later
-/* MHI Network driver - Network over MHI bus
- *
- * Copyright (C) 2021 Linaro Ltd <loic.poulain@linaro.org>
- *
- * This driver copy some code from cdc_ncm, which is:
- * Copyright (C) ST-Ericsson 2010-2012
- * and cdc_mbim, which is:
- * Copyright (c) 2012 Smith Micro Software, Inc.
- * Copyright (c) 2012 Bjørn Mork <bjorn@mork.no>
- *
- */
-
-#include <linux/ethtool.h>
-#include <linux/if_vlan.h>
-#include <linux/ip.h>
-#include <linux/mii.h>
-#include <linux/netdevice.h>
-#include <linux/wwan.h>
-#include <linux/skbuff.h>
-#include <linux/usb.h>
-#include <linux/usb/cdc.h>
-#include <linux/usb/usbnet.h>
-#include <linux/usb/cdc_ncm.h>
-
-#include "mhi.h"
-
-#define MBIM_NDP16_SIGN_MASK 0x00ffffff
-
-/* Usual WWAN MTU */
-#define MHI_MBIM_DEFAULT_MTU 1500
-
-/* 3500 allows to optimize skb allocation, the skbs will basically fit in
- * one 4K page. Large MBIM packets will simply be split over several MHI
- * transfers and chained by the MHI net layer (zerocopy).
- */
-#define MHI_MBIM_DEFAULT_MRU 3500
-
-struct mbim_context {
- u16 rx_seq;
- u16 tx_seq;
-};
-
-static void __mbim_length_errors_inc(struct mhi_net_dev *dev)
-{
- u64_stats_update_begin(&dev->stats.rx_syncp);
- u64_stats_inc(&dev->stats.rx_length_errors);
- u64_stats_update_end(&dev->stats.rx_syncp);
-}
-
-static void __mbim_errors_inc(struct mhi_net_dev *dev)
-{
- u64_stats_update_begin(&dev->stats.rx_syncp);
- u64_stats_inc(&dev->stats.rx_errors);
- u64_stats_update_end(&dev->stats.rx_syncp);
-}
-
-static int mbim_rx_verify_nth16(struct sk_buff *skb)
-{
- struct mhi_net_dev *dev = wwan_netdev_drvpriv(skb->dev);
- struct mbim_context *ctx = dev->proto_data;
- struct usb_cdc_ncm_nth16 *nth16;
- int len;
-
- if (skb->len < sizeof(struct usb_cdc_ncm_nth16) +
- sizeof(struct usb_cdc_ncm_ndp16)) {
- netif_dbg(dev, rx_err, dev->ndev, "frame too short\n");
- __mbim_length_errors_inc(dev);
- return -EINVAL;
- }
-
- nth16 = (struct usb_cdc_ncm_nth16 *)skb->data;
-
- if (nth16->dwSignature != cpu_to_le32(USB_CDC_NCM_NTH16_SIGN)) {
- netif_dbg(dev, rx_err, dev->ndev,
- "invalid NTH16 signature <%#010x>\n",
- le32_to_cpu(nth16->dwSignature));
- __mbim_errors_inc(dev);
- return -EINVAL;
- }
-
- /* No limit on the block length, except the size of the data pkt */
- len = le16_to_cpu(nth16->wBlockLength);
- if (len > skb->len) {
- netif_dbg(dev, rx_err, dev->ndev,
- "NTB does not fit into the skb %u/%u\n", len,
- skb->len);
- __mbim_length_errors_inc(dev);
- return -EINVAL;
- }
-
- if (ctx->rx_seq + 1 != le16_to_cpu(nth16->wSequence) &&
- (ctx->rx_seq || le16_to_cpu(nth16->wSequence)) &&
- !(ctx->rx_seq == 0xffff && !le16_to_cpu(nth16->wSequence))) {
- netif_dbg(dev, rx_err, dev->ndev,
- "sequence number glitch prev=%d curr=%d\n",
- ctx->rx_seq, le16_to_cpu(nth16->wSequence));
- }
- ctx->rx_seq = le16_to_cpu(nth16->wSequence);
-
- return le16_to_cpu(nth16->wNdpIndex);
-}
-
-static int mbim_rx_verify_ndp16(struct sk_buff *skb, struct usb_cdc_ncm_ndp16 *ndp16)
-{
- struct mhi_net_dev *dev = wwan_netdev_drvpriv(skb->dev);
- int ret;
-
- if (le16_to_cpu(ndp16->wLength) < USB_CDC_NCM_NDP16_LENGTH_MIN) {
- netif_dbg(dev, rx_err, dev->ndev, "invalid DPT16 length <%u>\n",
- le16_to_cpu(ndp16->wLength));
- return -EINVAL;
- }
-
- ret = ((le16_to_cpu(ndp16->wLength) - sizeof(struct usb_cdc_ncm_ndp16))
- / sizeof(struct usb_cdc_ncm_dpe16));
- ret--; /* Last entry is always a NULL terminator */
-
- if (sizeof(struct usb_cdc_ncm_ndp16) +
- ret * sizeof(struct usb_cdc_ncm_dpe16) > skb->len) {
- netif_dbg(dev, rx_err, dev->ndev,
- "Invalid nframes = %d\n", ret);
- return -EINVAL;
- }
-
- return ret;
-}
-
-static void mbim_rx(struct mhi_net_dev *mhi_netdev, struct sk_buff *skb)
-{
- struct net_device *ndev = mhi_netdev->ndev;
- int ndpoffset;
-
- /* Check NTB header and retrieve first NDP offset */
- ndpoffset = mbim_rx_verify_nth16(skb);
- if (ndpoffset < 0) {
- net_err_ratelimited("%s: Incorrect NTB header\n", ndev->name);
- goto error;
- }
-
- /* Process each NDP */
- while (1) {
- struct usb_cdc_ncm_ndp16 ndp16;
- struct usb_cdc_ncm_dpe16 dpe16;
- int nframes, n, dpeoffset;
-
- if (skb_copy_bits(skb, ndpoffset, &ndp16, sizeof(ndp16))) {
- net_err_ratelimited("%s: Incorrect NDP offset (%u)\n",
- ndev->name, ndpoffset);
- __mbim_length_errors_inc(mhi_netdev);
- goto error;
- }
-
- /* Check NDP header and retrieve number of datagrams */
- nframes = mbim_rx_verify_ndp16(skb, &ndp16);
- if (nframes < 0) {
- net_err_ratelimited("%s: Incorrect NDP16\n", ndev->name);
- __mbim_length_errors_inc(mhi_netdev);
- goto error;
- }
-
- /* Only IP data type supported, no DSS in MHI context */
- if ((ndp16.dwSignature & cpu_to_le32(MBIM_NDP16_SIGN_MASK))
- != cpu_to_le32(USB_CDC_MBIM_NDP16_IPS_SIGN)) {
- net_err_ratelimited("%s: Unsupported NDP type\n", ndev->name);
- __mbim_errors_inc(mhi_netdev);
- goto next_ndp;
- }
-
- /* Only primary IP session 0 (0x00) supported for now */
- if (ndp16.dwSignature & ~cpu_to_le32(MBIM_NDP16_SIGN_MASK)) {
- net_err_ratelimited("%s: bad packet session\n", ndev->name);
- __mbim_errors_inc(mhi_netdev);
- goto next_ndp;
- }
-
- /* de-aggregate and deliver IP packets */
- dpeoffset = ndpoffset + sizeof(struct usb_cdc_ncm_ndp16);
- for (n = 0; n < nframes; n++, dpeoffset += sizeof(dpe16)) {
- u16 dgram_offset, dgram_len;
- struct sk_buff *skbn;
-
- if (skb_copy_bits(skb, dpeoffset, &dpe16, sizeof(dpe16)))
- break;
-
- dgram_offset = le16_to_cpu(dpe16.wDatagramIndex);
- dgram_len = le16_to_cpu(dpe16.wDatagramLength);
-
- if (!dgram_offset || !dgram_len)
- break; /* null terminator */
-
- skbn = netdev_alloc_skb(ndev, dgram_len);
- if (!skbn)
- continue;
-
- skb_put(skbn, dgram_len);
- skb_copy_bits(skb, dgram_offset, skbn->data, dgram_len);
-
- switch (skbn->data[0] & 0xf0) {
- case 0x40:
- skbn->protocol = htons(ETH_P_IP);
- break;
- case 0x60:
- skbn->protocol = htons(ETH_P_IPV6);
- break;
- default:
- net_err_ratelimited("%s: unknown protocol\n",
- ndev->name);
- __mbim_errors_inc(mhi_netdev);
- dev_kfree_skb_any(skbn);
- continue;
- }
-
- u64_stats_update_begin(&mhi_netdev->stats.rx_syncp);
- u64_stats_inc(&mhi_netdev->stats.rx_packets);
- u64_stats_add(&mhi_netdev->stats.rx_bytes, skbn->len);
- u64_stats_update_end(&mhi_netdev->stats.rx_syncp);
- netif_rx(skbn);
- }
-next_ndp:
- /* Other NDP to process? */
- ndpoffset = (int)le16_to_cpu(ndp16.wNextNdpIndex);
- if (!ndpoffset)
- break;
- }
-
- /* free skb */
- dev_consume_skb_any(skb);
- return;
-error:
- dev_kfree_skb_any(skb);
-}
-
-struct mbim_tx_hdr {
- struct usb_cdc_ncm_nth16 nth16;
- struct usb_cdc_ncm_ndp16 ndp16;
- struct usb_cdc_ncm_dpe16 dpe16[2];
-} __packed;
-
-static struct sk_buff *mbim_tx_fixup(struct mhi_net_dev *mhi_netdev,
- struct sk_buff *skb)
-{
- struct mbim_context *ctx = mhi_netdev->proto_data;
- unsigned int dgram_size = skb->len;
- struct usb_cdc_ncm_nth16 *nth16;
- struct usb_cdc_ncm_ndp16 *ndp16;
- struct mbim_tx_hdr *mbim_hdr;
-
- /* For now, this is a partial implementation of CDC MBIM, only one NDP
- * is sent, containing the IP packet (no aggregation).
- */
-
- /* Ensure we have enough headroom for crafting MBIM header */
- if (skb_cow_head(skb, sizeof(struct mbim_tx_hdr))) {
- dev_kfree_skb_any(skb);
- return NULL;
- }
-
- mbim_hdr = skb_push(skb, sizeof(struct mbim_tx_hdr));
-
- /* Fill NTB header */
- nth16 = &mbim_hdr->nth16;
- nth16->dwSignature = cpu_to_le32(USB_CDC_NCM_NTH16_SIGN);
- nth16->wHeaderLength = cpu_to_le16(sizeof(struct usb_cdc_ncm_nth16));
- nth16->wSequence = cpu_to_le16(ctx->tx_seq++);
- nth16->wBlockLength = cpu_to_le16(skb->len);
- nth16->wNdpIndex = cpu_to_le16(sizeof(struct usb_cdc_ncm_nth16));
-
- /* Fill the unique NDP */
- ndp16 = &mbim_hdr->ndp16;
- ndp16->dwSignature = cpu_to_le32(USB_CDC_MBIM_NDP16_IPS_SIGN);
- ndp16->wLength = cpu_to_le16(sizeof(struct usb_cdc_ncm_ndp16)
- + sizeof(struct usb_cdc_ncm_dpe16) * 2);
- ndp16->wNextNdpIndex = 0;
-
- /* Datagram follows the mbim header */
- ndp16->dpe16[0].wDatagramIndex = cpu_to_le16(sizeof(struct mbim_tx_hdr));
- ndp16->dpe16[0].wDatagramLength = cpu_to_le16(dgram_size);
-
- /* null termination */
- ndp16->dpe16[1].wDatagramIndex = 0;
- ndp16->dpe16[1].wDatagramLength = 0;
-
- return skb;
-}
-
-static int mbim_init(struct mhi_net_dev *mhi_netdev)
-{
- struct net_device *ndev = mhi_netdev->ndev;
-
- mhi_netdev->proto_data = devm_kzalloc(&ndev->dev,
- sizeof(struct mbim_context),
- GFP_KERNEL);
- if (!mhi_netdev->proto_data)
- return -ENOMEM;
-
- ndev->needed_headroom = sizeof(struct mbim_tx_hdr);
- ndev->mtu = MHI_MBIM_DEFAULT_MTU;
-
- if (!mhi_netdev->mru)
- mhi_netdev->mru = MHI_MBIM_DEFAULT_MRU;
-
- return 0;
-}
-
-const struct mhi_net_proto proto_mbim = {
- .init = mbim_init,
- .rx = mbim_rx,
- .tx_fixup = mbim_tx_fixup,
-};
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* MHI Network driver - Network over MHI bus
+ *
+ * Copyright (C) 2020 Linaro Ltd <loic.poulain@linaro.org>
+ */
+
+#include <linux/if_arp.h>
+#include <linux/mhi.h>
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/u64_stats_sync.h>
+
+#define MHI_NET_MIN_MTU ETH_MIN_MTU
+#define MHI_NET_MAX_MTU 0xffff
+#define MHI_NET_DEFAULT_MTU 0x4000
+
+struct mhi_net_stats {
+ u64_stats_t rx_packets;
+ u64_stats_t rx_bytes;
+ u64_stats_t rx_errors;
+ u64_stats_t tx_packets;
+ u64_stats_t tx_bytes;
+ u64_stats_t tx_errors;
+ u64_stats_t tx_dropped;
+ struct u64_stats_sync tx_syncp;
+ struct u64_stats_sync rx_syncp;
+};
+
+struct mhi_net_dev {
+ struct mhi_device *mdev;
+ struct net_device *ndev;
+ struct sk_buff *skbagg_head;
+ struct sk_buff *skbagg_tail;
+ struct delayed_work rx_refill;
+ struct mhi_net_stats stats;
+ u32 rx_queue_sz;
+ int msg_enable;
+ unsigned int mru;
+};
+
+struct mhi_device_info {
+ const char *netname;
+};
+
+static int mhi_ndo_open(struct net_device *ndev)
+{
+ struct mhi_net_dev *mhi_netdev = netdev_priv(ndev);
+
+ /* Feed the rx buffer pool */
+ schedule_delayed_work(&mhi_netdev->rx_refill, 0);
+
+ /* Carrier is established via out-of-band channel (e.g. qmi) */
+ netif_carrier_on(ndev);
+
+ netif_start_queue(ndev);
+
+ return 0;
+}
+
+static int mhi_ndo_stop(struct net_device *ndev)
+{
+ struct mhi_net_dev *mhi_netdev = netdev_priv(ndev);
+
+ netif_stop_queue(ndev);
+ netif_carrier_off(ndev);
+ cancel_delayed_work_sync(&mhi_netdev->rx_refill);
+
+ return 0;
+}
+
+static netdev_tx_t mhi_ndo_xmit(struct sk_buff *skb, struct net_device *ndev)
+{
+ struct mhi_net_dev *mhi_netdev = netdev_priv(ndev);
+ struct mhi_device *mdev = mhi_netdev->mdev;
+ int err;
+
+ err = mhi_queue_skb(mdev, DMA_TO_DEVICE, skb, skb->len, MHI_EOT);
+ if (unlikely(err)) {
+ net_err_ratelimited("%s: Failed to queue TX buf (%d)\n",
+ ndev->name, err);
+ dev_kfree_skb_any(skb);
+ goto exit_drop;
+ }
+
+ if (mhi_queue_is_full(mdev, DMA_TO_DEVICE))
+ netif_stop_queue(ndev);
+
+ return NETDEV_TX_OK;
+
+exit_drop:
+ u64_stats_update_begin(&mhi_netdev->stats.tx_syncp);
+ u64_stats_inc(&mhi_netdev->stats.tx_dropped);
+ u64_stats_update_end(&mhi_netdev->stats.tx_syncp);
+
+ return NETDEV_TX_OK;
+}
+
+static void mhi_ndo_get_stats64(struct net_device *ndev,
+ struct rtnl_link_stats64 *stats)
+{
+ struct mhi_net_dev *mhi_netdev = netdev_priv(ndev);
+ unsigned int start;
+
+ do {
+ start = u64_stats_fetch_begin_irq(&mhi_netdev->stats.rx_syncp);
+ stats->rx_packets = u64_stats_read(&mhi_netdev->stats.rx_packets);
+ stats->rx_bytes = u64_stats_read(&mhi_netdev->stats.rx_bytes);
+ stats->rx_errors = u64_stats_read(&mhi_netdev->stats.rx_errors);
+ } while (u64_stats_fetch_retry_irq(&mhi_netdev->stats.rx_syncp, start));
+
+ do {
+ start = u64_stats_fetch_begin_irq(&mhi_netdev->stats.tx_syncp);
+ stats->tx_packets = u64_stats_read(&mhi_netdev->stats.tx_packets);
+ stats->tx_bytes = u64_stats_read(&mhi_netdev->stats.tx_bytes);
+ stats->tx_errors = u64_stats_read(&mhi_netdev->stats.tx_errors);
+ stats->tx_dropped = u64_stats_read(&mhi_netdev->stats.tx_dropped);
+ } while (u64_stats_fetch_retry_irq(&mhi_netdev->stats.tx_syncp, start));
+}
+
+static const struct net_device_ops mhi_netdev_ops = {
+ .ndo_open = mhi_ndo_open,
+ .ndo_stop = mhi_ndo_stop,
+ .ndo_start_xmit = mhi_ndo_xmit,
+ .ndo_get_stats64 = mhi_ndo_get_stats64,
+};
+
+static void mhi_net_setup(struct net_device *ndev)
+{
+ ndev->header_ops = NULL; /* No header */
+ ndev->type = ARPHRD_RAWIP;
+ ndev->hard_header_len = 0;
+ ndev->addr_len = 0;
+ ndev->flags = IFF_POINTOPOINT | IFF_NOARP;
+ ndev->netdev_ops = &mhi_netdev_ops;
+ ndev->mtu = MHI_NET_DEFAULT_MTU;
+ ndev->min_mtu = MHI_NET_MIN_MTU;
+ ndev->max_mtu = MHI_NET_MAX_MTU;
+ ndev->tx_queue_len = 1000;
+}
+
+static struct sk_buff *mhi_net_skb_agg(struct mhi_net_dev *mhi_netdev,
+ struct sk_buff *skb)
+{
+ struct sk_buff *head = mhi_netdev->skbagg_head;
+ struct sk_buff *tail = mhi_netdev->skbagg_tail;
+
+ /* This is non-paged skb chaining using frag_list */
+ if (!head) {
+ mhi_netdev->skbagg_head = skb;
+ return skb;
+ }
+
+ if (!skb_shinfo(head)->frag_list)
+ skb_shinfo(head)->frag_list = skb;
+ else
+ tail->next = skb;
+
+ head->len += skb->len;
+ head->data_len += skb->len;
+ head->truesize += skb->truesize;
+
+ mhi_netdev->skbagg_tail = skb;
+
+ return mhi_netdev->skbagg_head;
+}
+
+static void mhi_net_dl_callback(struct mhi_device *mhi_dev,
+ struct mhi_result *mhi_res)
+{
+ struct mhi_net_dev *mhi_netdev = dev_get_drvdata(&mhi_dev->dev);
+ struct sk_buff *skb = mhi_res->buf_addr;
+ int free_desc_count;
+
+ free_desc_count = mhi_get_free_desc_count(mhi_dev, DMA_FROM_DEVICE);
+
+ if (unlikely(mhi_res->transaction_status)) {
+ switch (mhi_res->transaction_status) {
+ case -EOVERFLOW:
+ /* Packet can not fit in one MHI buffer and has been
+ * split over multiple MHI transfers, do re-aggregation.
+ * That usually means the device side MTU is larger than
+ * the host side MTU/MRU. Since this is not optimal,
+ * print a warning (once).
+ */
+ netdev_warn_once(mhi_netdev->ndev,
+ "Fragmented packets received, fix MTU?\n");
+ skb_put(skb, mhi_res->bytes_xferd);
+ mhi_net_skb_agg(mhi_netdev, skb);
+ break;
+ case -ENOTCONN:
+ /* MHI layer stopping/resetting the DL channel */
+ dev_kfree_skb_any(skb);
+ return;
+ default:
+ /* Unknown error, simply drop */
+ dev_kfree_skb_any(skb);
+ u64_stats_update_begin(&mhi_netdev->stats.rx_syncp);
+ u64_stats_inc(&mhi_netdev->stats.rx_errors);
+ u64_stats_update_end(&mhi_netdev->stats.rx_syncp);
+ }
+ } else {
+ skb_put(skb, mhi_res->bytes_xferd);
+
+ if (mhi_netdev->skbagg_head) {
+ /* Aggregate the final fragment */
+ skb = mhi_net_skb_agg(mhi_netdev, skb);
+ mhi_netdev->skbagg_head = NULL;
+ }
+
+ switch (skb->data[0] & 0xf0) {
+ case 0x40:
+ skb->protocol = htons(ETH_P_IP);
+ break;
+ case 0x60:
+ skb->protocol = htons(ETH_P_IPV6);
+ break;
+ default:
+ skb->protocol = htons(ETH_P_MAP);
+ break;
+ }
+
+ u64_stats_update_begin(&mhi_netdev->stats.rx_syncp);
+ u64_stats_inc(&mhi_netdev->stats.rx_packets);
+ u64_stats_add(&mhi_netdev->stats.rx_bytes, skb->len);
+ u64_stats_update_end(&mhi_netdev->stats.rx_syncp);
+ netif_rx(skb);
+ }
+
+ /* Refill if RX buffers queue becomes low */
+ if (free_desc_count >= mhi_netdev->rx_queue_sz / 2)
+ schedule_delayed_work(&mhi_netdev->rx_refill, 0);
+}
+
+static void mhi_net_ul_callback(struct mhi_device *mhi_dev,
+ struct mhi_result *mhi_res)
+{
+ struct mhi_net_dev *mhi_netdev = dev_get_drvdata(&mhi_dev->dev);
+ struct net_device *ndev = mhi_netdev->ndev;
+ struct mhi_device *mdev = mhi_netdev->mdev;
+ struct sk_buff *skb = mhi_res->buf_addr;
+
+ /* Hardware has consumed the buffer, so free the skb (which is not
+ * freed by the MHI stack) and perform accounting.
+ */
+ dev_consume_skb_any(skb);
+
+ u64_stats_update_begin(&mhi_netdev->stats.tx_syncp);
+ if (unlikely(mhi_res->transaction_status)) {
+ /* MHI layer stopping/resetting the UL channel */
+ if (mhi_res->transaction_status == -ENOTCONN) {
+ u64_stats_update_end(&mhi_netdev->stats.tx_syncp);
+ return;
+ }
+
+ u64_stats_inc(&mhi_netdev->stats.tx_errors);
+ } else {
+ u64_stats_inc(&mhi_netdev->stats.tx_packets);
+ u64_stats_add(&mhi_netdev->stats.tx_bytes, mhi_res->bytes_xferd);
+ }
+ u64_stats_update_end(&mhi_netdev->stats.tx_syncp);
+
+ if (netif_queue_stopped(ndev) && !mhi_queue_is_full(mdev, DMA_TO_DEVICE))
+ netif_wake_queue(ndev);
+}
+
+static void mhi_net_rx_refill_work(struct work_struct *work)
+{
+ struct mhi_net_dev *mhi_netdev = container_of(work, struct mhi_net_dev,
+ rx_refill.work);
+ struct net_device *ndev = mhi_netdev->ndev;
+ struct mhi_device *mdev = mhi_netdev->mdev;
+ struct sk_buff *skb;
+ unsigned int size;
+ int err;
+
+ size = mhi_netdev->mru ? mhi_netdev->mru : READ_ONCE(ndev->mtu);
+
+ while (!mhi_queue_is_full(mdev, DMA_FROM_DEVICE)) {
+ skb = netdev_alloc_skb(ndev, size);
+ if (unlikely(!skb))
+ break;
+
+ err = mhi_queue_skb(mdev, DMA_FROM_DEVICE, skb, size, MHI_EOT);
+ if (unlikely(err)) {
+ net_err_ratelimited("%s: Failed to queue RX buf (%d)\n",
+ ndev->name, err);
+ kfree_skb(skb);
+ break;
+ }
+
+ /* Do not hog the CPU if rx buffers are consumed faster than
+ * queued (unlikely).
+ */
+ cond_resched();
+ }
+
+ /* If we're still starved of rx buffers, reschedule later */
+ if (mhi_get_free_desc_count(mdev, DMA_FROM_DEVICE) == mhi_netdev->rx_queue_sz)
+ schedule_delayed_work(&mhi_netdev->rx_refill, HZ / 2);
+}
+
+static int mhi_net_newlink(struct mhi_device *mhi_dev, struct net_device *ndev)
+{
+ struct mhi_net_dev *mhi_netdev;
+ int err;
+
+ mhi_netdev = netdev_priv(ndev);
+
+ dev_set_drvdata(&mhi_dev->dev, mhi_netdev);
+ mhi_netdev->ndev = ndev;
+ mhi_netdev->mdev = mhi_dev;
+ mhi_netdev->skbagg_head = NULL;
+ mhi_netdev->mru = mhi_dev->mhi_cntrl->mru;
+
+ INIT_DELAYED_WORK(&mhi_netdev->rx_refill, mhi_net_rx_refill_work);
+ u64_stats_init(&mhi_netdev->stats.rx_syncp);
+ u64_stats_init(&mhi_netdev->stats.tx_syncp);
+
+ /* Start MHI channels */
+ err = mhi_prepare_for_transfer(mhi_dev, 0);
+ if (err)
+ goto out_err;
+
+ /* Number of transfer descriptors determines size of the queue */
+ mhi_netdev->rx_queue_sz = mhi_get_free_desc_count(mhi_dev, DMA_FROM_DEVICE);
+
+ err = register_netdev(ndev);
+ if (err)
+ return err;
+
+ return 0;
+
+out_err:
+ free_netdev(ndev);
+ return err;
+}
+
+static void mhi_net_dellink(struct mhi_device *mhi_dev, struct net_device *ndev)
+{
+ struct mhi_net_dev *mhi_netdev = netdev_priv(ndev);
+
+ unregister_netdev(ndev);
+
+ mhi_unprepare_from_transfer(mhi_dev);
+
+ kfree_skb(mhi_netdev->skbagg_head);
+
+ dev_set_drvdata(&mhi_dev->dev, NULL);
+}
+
+static int mhi_net_probe(struct mhi_device *mhi_dev,
+ const struct mhi_device_id *id)
+{
+ const struct mhi_device_info *info = (struct mhi_device_info *)id->driver_data;
+ struct net_device *ndev;
+ int err;
+
+ ndev = alloc_netdev(sizeof(struct mhi_net_dev), info->netname,
+ NET_NAME_PREDICTABLE, mhi_net_setup);
+ if (!ndev)
+ return -ENOMEM;
+
+ SET_NETDEV_DEV(ndev, &mhi_dev->dev);
+
+ err = mhi_net_newlink(mhi_dev, ndev);
+ if (err) {
+ free_netdev(ndev);
+ return err;
+ }
+
+ return 0;
+}
+
+static void mhi_net_remove(struct mhi_device *mhi_dev)
+{
+ struct mhi_net_dev *mhi_netdev = dev_get_drvdata(&mhi_dev->dev);
+
+ mhi_net_dellink(mhi_dev, mhi_netdev->ndev);
+}
+
+static const struct mhi_device_info mhi_hwip0 = {
+ .netname = "mhi_hwip%d",
+};
+
+static const struct mhi_device_info mhi_swip0 = {
+ .netname = "mhi_swip%d",
+};
+
+static const struct mhi_device_id mhi_net_id_table[] = {
+ /* Hardware accelerated data PATH (to modem IPA), protocol agnostic */
+ { .chan = "IP_HW0", .driver_data = (kernel_ulong_t)&mhi_hwip0 },
+ /* Software data PATH (to modem CPU) */
+ { .chan = "IP_SW0", .driver_data = (kernel_ulong_t)&mhi_swip0 },
+ {}
+};
+MODULE_DEVICE_TABLE(mhi, mhi_net_id_table);
+
+static struct mhi_driver mhi_net_driver = {
+ .probe = mhi_net_probe,
+ .remove = mhi_net_remove,
+ .dl_xfer_cb = mhi_net_dl_callback,
+ .ul_xfer_cb = mhi_net_ul_callback,
+ .id_table = mhi_net_id_table,
+ .driver = {
+ .name = "mhi_net",
+ .owner = THIS_MODULE,
+ },
+};
+
+module_mhi_driver(mhi_net_driver);
+
+MODULE_AUTHOR("Loic Poulain <loic.poulain@linaro.org>");
+MODULE_DESCRIPTION("Network over MHI");
+MODULE_LICENSE("GPL v2");
const char *buf, size_t count)
{
struct nsim_bus_dev *nsim_bus_dev = to_nsim_bus_dev(dev);
- struct nsim_dev *nsim_dev = dev_get_drvdata(dev);
- struct devlink *devlink;
unsigned int port_index;
int ret;
if (ret)
return ret;
- devlink = priv_to_devlink(nsim_dev);
+ if (!mutex_trylock(&nsim_bus_dev->nsim_bus_reload_lock))
+ return -EBUSY;
+
+ if (nsim_bus_dev->in_reload) {
+ mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock);
+ return -EBUSY;
+ }
- mutex_lock(&nsim_bus_dev->nsim_bus_reload_lock);
- devlink_reload_disable(devlink);
ret = nsim_dev_port_add(nsim_bus_dev, NSIM_DEV_PORT_TYPE_PF, port_index);
- devlink_reload_enable(devlink);
mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock);
return ret ? ret : count;
}
const char *buf, size_t count)
{
struct nsim_bus_dev *nsim_bus_dev = to_nsim_bus_dev(dev);
- struct nsim_dev *nsim_dev = dev_get_drvdata(dev);
- struct devlink *devlink;
unsigned int port_index;
int ret;
if (ret)
return ret;
- devlink = priv_to_devlink(nsim_dev);
+ if (!mutex_trylock(&nsim_bus_dev->nsim_bus_reload_lock))
+ return -EBUSY;
+
+ if (nsim_bus_dev->in_reload) {
+ mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock);
+ return -EBUSY;
+ }
- mutex_lock(&nsim_bus_dev->nsim_bus_reload_lock);
- devlink_reload_disable(devlink);
ret = nsim_dev_port_del(nsim_bus_dev, NSIM_DEV_PORT_TYPE_PF, port_index);
- devlink_reload_enable(devlink);
mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock);
return ret ? ret : count;
}
struct netlink_ext_ack *extack)
{
struct nsim_dev *nsim_dev = devlink_priv(devlink);
+ struct nsim_bus_dev *nsim_bus_dev;
+
+ nsim_bus_dev = nsim_dev->nsim_bus_dev;
+ if (!mutex_trylock(&nsim_bus_dev->nsim_bus_reload_lock))
+ return -EOPNOTSUPP;
if (nsim_dev->dont_allow_reload) {
/* For testing purposes, user set debugfs dont_allow_reload
* value to true. So forbid it.
*/
NL_SET_ERR_MSG_MOD(extack, "User forbid the reload for testing purposes");
+ mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock);
return -EOPNOTSUPP;
}
+ nsim_bus_dev->in_reload = true;
nsim_dev_reload_destroy(nsim_dev);
+ mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock);
return 0;
}
struct netlink_ext_ack *extack)
{
struct nsim_dev *nsim_dev = devlink_priv(devlink);
+ struct nsim_bus_dev *nsim_bus_dev;
+ int ret;
+
+ nsim_bus_dev = nsim_dev->nsim_bus_dev;
+ mutex_lock(&nsim_bus_dev->nsim_bus_reload_lock);
+ nsim_bus_dev->in_reload = false;
if (nsim_dev->fail_reload) {
/* For testing purposes, user set debugfs fail_reload
* value to true. Fail right away.
*/
NL_SET_ERR_MSG_MOD(extack, "User setup the reload to fail for testing purposes");
+ mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock);
return -EINVAL;
}
*actions_performed = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT);
- return nsim_dev_reload_create(nsim_dev, extack);
+ ret = nsim_dev_reload_create(nsim_dev, extack);
+ mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock);
+ return ret;
}
static int nsim_dev_info_get(struct devlink *devlink,
int err;
devlink = devlink_alloc_ns(&nsim_dev_devlink_ops, sizeof(*nsim_dev),
- nsim_bus_dev->initial_net);
+ nsim_bus_dev->initial_net, &nsim_bus_dev->dev);
if (!devlink)
return -ENOMEM;
nsim_dev = devlink_priv(devlink);
if (err)
goto err_devlink_free;
- err = devlink_register(devlink, &nsim_bus_dev->dev);
+ err = devlink_register(devlink);
if (err)
goto err_resources_unregister;
static void nsim_fib_set_max_all(struct nsim_fib_data *data,
struct devlink *devlink)
{
- enum nsim_resource_id res_ids[] = {
+ static const enum nsim_resource_id res_ids[] = {
NSIM_RESOURCE_IPV4_FIB, NSIM_RESOURCE_IPV4_FIB_RULES,
NSIM_RESOURCE_IPV6_FIB, NSIM_RESOURCE_IPV6_FIB_RULES,
NSIM_RESOURCE_NEXTHOPS,
struct nsim_vf_config *vfconfigs;
/* Lock for devlink->reload_enabled in netdevsim module */
struct mutex nsim_bus_reload_lock;
+ bool in_reload;
bool init;
};
if (phydev->dev_flags & PHY_BRCM_DIS_TXCRXC_NOENRGY) {
if (BRCM_PHY_MODEL(phydev) == PHY_ID_BCM54210E ||
BRCM_PHY_MODEL(phydev) == PHY_ID_BCM54810 ||
- BRCM_PHY_MODEL(phydev) == PHY_ID_BCM54210E)
+ BRCM_PHY_MODEL(phydev) == PHY_ID_BCM54811)
val |= BCM54XX_SHD_SCR3_RXCTXC_DIS;
else
val |= BCM54XX_SHD_SCR3_TRDDAPD;
}
static int ksz8051_ksz8795_match_phy_device(struct phy_device *phydev,
- const u32 ksz_phy_id)
+ const bool ksz_8051)
{
int ret;
- if ((phydev->phy_id & MICREL_PHY_ID_MASK) != ksz_phy_id)
+ if ((phydev->phy_id & MICREL_PHY_ID_MASK) != PHY_ID_KSZ8051)
return 0;
ret = phy_read(phydev, MII_BMSR);
* the switch does not.
*/
ret &= BMSR_ERCAP;
- if (ksz_phy_id == PHY_ID_KSZ8051)
+ if (ksz_8051)
return ret;
else
return !ret;
static int ksz8051_match_phy_device(struct phy_device *phydev)
{
- return ksz8051_ksz8795_match_phy_device(phydev, PHY_ID_KSZ8051);
+ return ksz8051_ksz8795_match_phy_device(phydev, true);
}
static int ksz8081_config_init(struct phy_device *phydev)
static int ksz8795_match_phy_device(struct phy_device *phydev)
{
- return ksz8051_ksz8795_match_phy_device(phydev, PHY_ID_KSZ87XX);
+ return ksz8051_ksz8795_match_phy_device(phydev, false);
}
static int ksz9021_load_values_from_of(struct phy_device *phydev,
{
struct vsc8531_private *vsc8531 = phydev->priv;
bool base = phydev->mdio.addr == vsc8531->ts_base_addr;
- u8 msgs[] = {
+ static const u8 msgs[] = {
PTP_MSGTYPE_SYNC,
PTP_MSGTYPE_DELAY_REQ
};
static int vsc85xx_ptp_conf(struct phy_device *phydev, enum ts_blk blk,
bool one_step, bool enable)
{
- u8 msgs[] = {
+ static const u8 msgs[] = {
PTP_MSGTYPE_SYNC,
PTP_MSGTYPE_DELAY_REQ
};
static int __vsc8584_init_ptp(struct phy_device *phydev)
{
struct vsc8531_private *vsc8531 = phydev->priv;
- u32 ltc_seq_e[] = { 0, 400000, 0, 0, 0 };
- u8 ltc_seq_a[] = { 8, 6, 5, 4, 2 };
+ static const u32 ltc_seq_e[] = { 0, 400000, 0, 0, 0 };
+ static const u8 ltc_seq_a[] = { 8, 6, 5, 4, 2 };
u32 val;
if (!vsc8584_is_1588_input_clk_configured(phydev)) {
netdev->netdev_ops = &ipheth_netdev_ops;
netdev->watchdog_timeo = IPHETH_TX_TIMEOUT;
- strcpy(netdev->name, "eth%d");
+ strscpy(netdev->name, "eth%d", sizeof(netdev->name));
dev = netdev_priv(netdev);
dev->udev = udev;
{
struct phy_device *phydev = dev->net->phydev;
struct ethtool_link_ksettings ecmd;
- int ladv, radv, ret;
+ int ladv, radv, ret, link;
u32 buf;
/* clear LAN78xx interrupt status */
if (unlikely(ret < 0))
return -EIO;
+ mutex_lock(&phydev->lock);
phy_read_status(phydev);
+ link = phydev->link;
+ mutex_unlock(&phydev->lock);
- if (!phydev->link && dev->link_on) {
+ if (!link && dev->link_on) {
dev->link_on = false;
/* reset MAC */
return -EIO;
del_timer(&dev->stat_monitor);
- } else if (phydev->link && !dev->link_on) {
+ } else if (link && !dev->link_on) {
dev->link_on = true;
phy_ethtool_ksettings_get(phydev, &ecmd);
static u32 lan78xx_get_link(struct net_device *net)
{
+ u32 link;
+
+ mutex_lock(&net->phydev->lock);
phy_read_status(net->phydev);
+ link = net->phydev->link;
+ mutex_unlock(&net->phydev->lock);
- return net->phydev->link;
+ return link;
}
static void lan78xx_get_drvinfo(struct net_device *net,
// SPDX-License-Identifier: GPL-2.0-only
/*
- * Copyright (c) 1999-2013 Petko Manolov (petkan@nucleusys.com)
+ * Copyright (c) 1999-2021 Petko Manolov (petkan@nucleusys.com)
*
- * ChangeLog:
- * .... Most of the time spent on reading sources & docs.
- * v0.2.x First official release for the Linux kernel.
- * v0.3.0 Beutified and structured, some bugs fixed.
- * v0.3.x URBifying bulk requests and bugfixing. First relatively
- * stable release. Still can touch device's registers only
- * from top-halves.
- * v0.4.0 Control messages remained unurbified are now URBs.
- * Now we can touch the HW at any time.
- * v0.4.9 Control urbs again use process context to wait. Argh...
- * Some long standing bugs (enable_net_traffic) fixed.
- * Also nasty trick about resubmiting control urb from
- * interrupt context used. Please let me know how it
- * behaves. Pegasus II support added since this version.
- * TODO: suppressing HCD warnings spewage on disconnect.
- * v0.4.13 Ethernet address is now set at probe(), not at open()
- * time as this seems to break dhcpd.
- * v0.5.0 branch to 2.5.x kernels
- * v0.5.1 ethtool support added
- * v0.5.5 rx socket buffers are in a pool and the their allocation
- * is out of the interrupt routine.
- * ...
- * v0.9.3 simplified [get|set]_register(s), async update registers
- * logic revisited, receive skb_pool removed.
*/
#include <linux/sched.h>
/*
* Version Information
*/
-#define DRIVER_VERSION "v0.9.3 (2013/04/25)"
#define DRIVER_AUTHOR "Petko Manolov <petkan@nucleusys.com>"
#define DRIVER_DESC "Pegasus/Pegasus II USB Ethernet driver"
static int set_registers(pegasus_t *pegasus, __u16 indx, __u16 size,
const void *data)
{
- return usb_control_msg_send(pegasus->usb, 0, PEGASUS_REQ_SET_REGS,
+ int ret;
+
+ ret = usb_control_msg_send(pegasus->usb, 0, PEGASUS_REQ_SET_REGS,
PEGASUS_REQT_WRITE, 0, indx, data, size,
1000, GFP_NOIO);
+ if (ret < 0)
+ netif_dbg(pegasus, drv, pegasus->net, "%s failed with %d\n", __func__, ret);
+
+ return ret;
}
/*
static int set_register(pegasus_t *pegasus, __u16 indx, __u8 data)
{
void *buf = &data;
+ int ret;
- return usb_control_msg_send(pegasus->usb, 0, PEGASUS_REQ_SET_REG,
+ ret = usb_control_msg_send(pegasus->usb, 0, PEGASUS_REQ_SET_REG,
PEGASUS_REQT_WRITE, data, indx, buf, 1,
1000, GFP_NOIO);
+ if (ret < 0)
+ netif_dbg(pegasus, drv, pegasus->net, "%s failed with %d\n", __func__, ret);
+
+ return ret;
}
static int update_eth_regs_async(pegasus_t *pegasus)
static int __mii_op(pegasus_t *p, __u8 phy, __u8 indx, __u16 *regd, __u8 cmd)
{
- int i;
- __u8 data[4] = { phy, 0, 0, indx };
+ int i, ret;
__le16 regdi;
- int ret = -ETIMEDOUT;
+ __u8 data[4] = { phy, 0, 0, indx };
if (cmd & PHY_WRITE) {
__le16 *t = (__le16 *) & data[1];
if (data[0] & PHY_DONE)
break;
}
- if (i >= REG_TIMEOUT)
+ if (i >= REG_TIMEOUT) {
+ ret = -ETIMEDOUT;
goto fail;
+ }
if (cmd & PHY_READ) {
ret = get_registers(p, PhyData, 2, ®di);
+ if (ret < 0)
+ goto fail;
*regd = le16_to_cpu(regdi);
- return ret;
}
return 0;
fail:
static int mdio_read(struct net_device *dev, int phy_id, int loc)
{
pegasus_t *pegasus = netdev_priv(dev);
+ int ret;
u16 res;
- read_mii_word(pegasus, phy_id, loc, &res);
+ ret = read_mii_word(pegasus, phy_id, loc, &res);
+ if (ret < 0)
+ return ret;
+
return (int)res;
}
static int read_eprom_word(pegasus_t *pegasus, __u8 index, __u16 *retdata)
{
- int i;
- __u8 tmp = 0;
+ int ret, i;
__le16 retdatai;
- int ret;
+ __u8 tmp = 0;
set_register(pegasus, EpromCtrl, 0);
set_register(pegasus, EpromOffset, index);
for (i = 0; i < REG_TIMEOUT; i++) {
ret = get_registers(pegasus, EpromCtrl, 1, &tmp);
+ if (ret < 0)
+ goto fail;
if (tmp & EPROM_DONE)
break;
- if (ret == -ESHUTDOWN)
- goto fail;
}
- if (i >= REG_TIMEOUT)
+ if (i >= REG_TIMEOUT) {
+ ret = -ETIMEDOUT;
goto fail;
+ }
ret = get_registers(pegasus, EpromData, 2, &retdatai);
+ if (ret < 0)
+ goto fail;
*retdata = le16_to_cpu(retdatai);
return ret;
fail:
- netif_warn(pegasus, drv, pegasus->net, "%s failed\n", __func__);
- return -ETIMEDOUT;
+ netif_dbg(pegasus, drv, pegasus->net, "%s failed\n", __func__);
+ return ret;
}
#ifdef PEGASUS_WRITE_EEPROM
return ret;
fail:
- netif_warn(pegasus, drv, pegasus->net, "%s failed\n", __func__);
+ netif_dbg(pegasus, drv, pegasus->net, "%s failed\n", __func__);
return -ETIMEDOUT;
}
-#endif /* PEGASUS_WRITE_EEPROM */
+#endif /* PEGASUS_WRITE_EEPROM */
static inline int get_node_id(pegasus_t *pegasus, u8 *id)
{
return;
err:
eth_hw_addr_random(pegasus->net);
- dev_info(&pegasus->intf->dev, "software assigned MAC address.\n");
+ netif_dbg(pegasus, drv, pegasus->net, "software assigned MAC address.\n");
return;
}
static inline int reset_mac(pegasus_t *pegasus)
{
+ int ret, i;
__u8 data = 0x8;
- int i;
set_register(pegasus, EthCtrl1, data);
for (i = 0; i < REG_TIMEOUT; i++) {
- get_registers(pegasus, EthCtrl1, 1, &data);
+ ret = get_registers(pegasus, EthCtrl1, 1, &data);
+ if (ret < 0)
+ goto fail;
if (~data & 0x08) {
if (loopback)
break;
}
if (usb_dev_id[pegasus->dev_index].vendor == VENDOR_ELCON) {
__u16 auxmode;
- read_mii_word(pegasus, 3, 0x1b, &auxmode);
+ ret = read_mii_word(pegasus, 3, 0x1b, &auxmode);
+ if (ret < 0)
+ goto fail;
auxmode |= 4;
write_mii_word(pegasus, 3, 0x1b, &auxmode);
}
return 0;
+fail:
+ netif_dbg(pegasus, drv, pegasus->net, "%s failed\n", __func__);
+ return ret;
}
static int enable_net_traffic(struct net_device *dev, struct usb_device *usb)
{
- __u16 linkpart;
- __u8 data[4];
pegasus_t *pegasus = netdev_priv(dev);
int ret;
+ __u16 linkpart;
+ __u8 data[4];
- read_mii_word(pegasus, pegasus->phy, MII_LPA, &linkpart);
+ ret = read_mii_word(pegasus, pegasus->phy, MII_LPA, &linkpart);
+ if (ret < 0)
+ goto fail;
data[0] = 0xc8; /* TX & RX enable, append status, no CRC */
data[1] = 0;
if (linkpart & (ADVERTISE_100FULL | ADVERTISE_10FULL))
usb_dev_id[pegasus->dev_index].vendor == VENDOR_LINKSYS2 ||
usb_dev_id[pegasus->dev_index].vendor == VENDOR_DLINK) {
u16 auxmode;
- read_mii_word(pegasus, 0, 0x1b, &auxmode);
+ ret = read_mii_word(pegasus, 0, 0x1b, &auxmode);
+ if (ret < 0)
+ goto fail;
auxmode |= 4;
write_mii_word(pegasus, 0, 0x1b, &auxmode);
}
+ return 0;
+fail:
+ netif_dbg(pegasus, drv, pegasus->net, "%s failed\n", __func__);
return ret;
}
{
pegasus_t *pegasus = urb->context;
struct net_device *net;
+ u8 *buf = urb->transfer_buffer;
int rx_status, count = urb->actual_length;
int status = urb->status;
- u8 *buf = urb->transfer_buffer;
__u16 pkt_len;
if (!pegasus)
set_registers(pegasus, EthCtrl0, sizeof(tmp), &tmp);
}
-static inline void get_interrupt_interval(pegasus_t *pegasus)
+static inline int get_interrupt_interval(pegasus_t *pegasus)
{
u16 data;
u8 interval;
+ int ret;
+
+ ret = read_eprom_word(pegasus, 4, &data);
+ if (ret < 0)
+ return ret;
- read_eprom_word(pegasus, 4, &data);
interval = data >> 8;
if (pegasus->usb->speed != USB_SPEED_HIGH) {
if (interval < 0x80) {
}
}
pegasus->intr_interval = interval;
+
+ return 0;
}
static void set_carrier(struct net_device *net)
pegasus_t *pegasus = netdev_priv(dev);
strlcpy(info->driver, driver_name, sizeof(info->driver));
- strlcpy(info->version, DRIVER_VERSION, sizeof(info->version));
usb_make_path(pegasus->usb, info->bus_info, sizeof(info->bus_info));
}
data[0] = pegasus->phy;
fallthrough;
case SIOCDEVPRIVATE + 1:
- read_mii_word(pegasus, data[0], data[1] & 0x1f, &data[3]);
- res = 0;
+ res = read_mii_word(pegasus, data[0], data[1] & 0x1f, &data[3]);
break;
case SIOCDEVPRIVATE + 2:
if (!capable(CAP_NET_ADMIN))
static __u8 mii_phy_probe(pegasus_t *pegasus)
{
- int i;
+ int i, ret;
__u16 tmp;
for (i = 0; i < 32; i++) {
- read_mii_word(pegasus, i, MII_BMSR, &tmp);
+ ret = read_mii_word(pegasus, i, MII_BMSR, &tmp);
+ if (ret < 0)
+ goto fail;
if (tmp == 0 || tmp == 0xffff || (tmp & BMSR_MEDIA) == 0)
continue;
else
return i;
}
-
+fail:
return 0xff;
}
static inline void setup_pegasus_II(pegasus_t *pegasus)
{
+ int ret;
__u8 data = 0xa5;
set_register(pegasus, Reg1d, 0);
set_register(pegasus, Reg7b, 2);
set_register(pegasus, 0x83, data);
- get_registers(pegasus, 0x83, 1, &data);
+ ret = get_registers(pegasus, 0x83, 1, &data);
+ if (ret < 0)
+ goto fail;
if (data == 0xa5)
pegasus->chip = 0x8513;
set_register(pegasus, Reg81, 6);
else
set_register(pegasus, Reg81, 2);
+
+ return;
+fail:
+ netif_dbg(pegasus, drv, pegasus->net, "%s failed\n", __func__);
}
static void check_carrier(struct work_struct *work)
| NETIF_MSG_PROBE | NETIF_MSG_LINK);
pegasus->features = usb_dev_id[dev_index].private;
- get_interrupt_interval(pegasus);
+ res = get_interrupt_interval(pegasus);
+ if (res)
+ goto out2;
if (reset_mac(pegasus)) {
dev_err(&intf->dev, "can't reset MAC\n");
res = -EIO;
static int __init pegasus_init(void)
{
- pr_info("%s: %s, " DRIVER_DESC "\n", driver_name, DRIVER_VERSION);
+ pr_info("%s: " DRIVER_DESC "\n", driver_name);
if (devid)
parse_id(devid);
return usb_register(&pegasus_driver);
dev->interrupt_count = 0;
dev->net = net;
- strcpy (net->name, "usb%d");
+ strscpy(net->name, "usb%d", sizeof(net->name));
memcpy (net->dev_addr, node_id, sizeof node_id);
/* rx and tx sides can use different message sizes;
if ((dev->driver_info->flags & FLAG_ETHER) != 0 &&
((dev->driver_info->flags & FLAG_POINTTOPOINT) == 0 ||
(net->dev_addr [0] & 0x02) == 0))
- strcpy (net->name, "eth%d");
+ strscpy(net->name, "eth%d", sizeof(net->name));
/* WLAN devices should always be named "wlan%d" */
if ((dev->driver_info->flags & FLAG_WLAN) != 0)
- strcpy(net->name, "wlan%d");
+ strscpy(net->name, "wlan%d", sizeof(net->name));
/* WWAN devices should always be named "wwan%d" */
if ((dev->driver_info->flags & FLAG_WWAN) != 0)
- strcpy(net->name, "wwan%d");
+ strscpy(net->name, "wwan%d", sizeof(net->name));
/* devices that cannot do ARP */
if ((dev->driver_info->flags & FLAG_NOARP) != 0)
struct page *page, unsigned int offset,
unsigned int len, unsigned int truesize,
bool hdr_valid, unsigned int metasize,
- bool whole_page)
+ unsigned int headroom)
{
struct sk_buff *skb;
struct virtio_net_hdr_mrg_rxbuf *hdr;
else
hdr_padded_len = sizeof(struct padded_vnet_hdr);
- /* If whole_page, there is an offset between the beginning of the
+ /* If headroom is not 0, there is an offset between the beginning of the
* data and the allocated space, otherwise the data and the allocated
* space are aligned.
*
* Buffers with headroom use PAGE_SIZE as alloc size, see
* add_recvbuf_mergeable() + get_mergeable_buf_len()
*/
- if (whole_page) {
- /* Buffers with whole_page use PAGE_SIZE as alloc size,
- * see add_recvbuf_mergeable() + get_mergeable_buf_len()
- */
- truesize = PAGE_SIZE;
-
- /* page maybe head page, so we should get the buf by p, not the
- * page
- */
- tailroom = truesize - len - offset_in_page(p);
- buf = (char *)((unsigned long)p & PAGE_MASK);
- } else {
- tailroom = truesize - len;
- buf = p;
- }
+ truesize = headroom ? PAGE_SIZE : truesize;
+ tailroom = truesize - len - headroom;
+ buf = p - headroom;
len -= hdr_len;
offset += hdr_padded_len;
put_page(page);
head_skb = page_to_skb(vi, rq, xdp_page, offset,
len, PAGE_SIZE, false,
- metasize, true);
+ metasize,
+ VIRTIO_XDP_HEADROOM);
return head_skb;
}
break;
rcu_read_unlock();
head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog,
- metasize, !!headroom);
+ metasize, headroom);
curr_skb = head_skb;
if (unlikely(!curr_skb))
if (vi->rq[0].xdp_prog)
return -EINVAL;
- get_online_cpus();
+ cpus_read_lock();
err = _virtnet_set_queues(vi, queue_pairs);
if (err) {
- put_online_cpus();
+ cpus_read_unlock();
goto err;
}
virtnet_set_affinity(vi);
- put_online_cpus();
+ cpus_read_unlock();
netif_set_real_num_tx_queues(dev, queue_pairs);
netif_set_real_num_rx_queues(dev, queue_pairs);
if (ret)
goto err_free;
- get_online_cpus();
+ cpus_read_lock();
virtnet_set_affinity(vi);
- put_online_cpus();
+ cpus_read_unlock();
return 0;
unsigned int hh_len = LL_RESERVED_SPACE(dev);
struct neighbour *neigh;
bool is_v6gw = false;
- int ret = -EINVAL;
nf_reset_ct(skb);
/* Be paranoid, rather than too clever. */
if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
- struct sk_buff *skb2;
-
- skb2 = skb_realloc_headroom(skb, LL_RESERVED_SPACE(dev));
- if (!skb2) {
- ret = -ENOMEM;
- goto err;
+ skb = skb_expand_head(skb, hh_len);
+ if (!skb) {
+ dev->stats.tx_errors++;
+ return -ENOMEM;
}
- if (skb->sk)
- skb_set_owner_w(skb2, skb->sk);
-
- consume_skb(skb);
- skb = skb2;
}
rcu_read_lock_bh();
neigh = ip_neigh_for_gw(rt, skb, &is_v6gw);
if (!IS_ERR(neigh)) {
+ int ret;
+
sock_confirm_neigh(skb, neigh);
/* if crossing protocols, can not use the cached header */
ret = neigh_output(neigh, skb, is_v6gw);
}
rcu_read_unlock_bh();
-err:
vrf_tx_error(skb->dev, skb);
- return ret;
+ return -EINVAL;
}
static int vrf_output(struct net *net, struct sock *sk, struct sk_buff *skb)
To compile this driver as a module, choose M here: the
module will be called slic_ds26522.
-config DSCC4_PCISYNC
- bool "Etinc PCISYNC features"
- depends on DSCC4
- help
- Due to Etinc's design choice for its PCISYNC cards, some operations
- are only allowed on specific ports of the DSCC4. This option is the
- only way for the driver to know that it shouldn't return a success
- code for these operations.
-
- Please say Y if your card is an Etinc's PCISYNC.
-
-config DSCC4_PCI_RST
- bool "Hard reset support"
- depends on DSCC4
- help
- Various DSCC4 bugs forbid any reliable software reset of the ASIC.
- As a replacement, some vendors provide a way to assert the PCI #RST
- pin of DSCC4 through the GPIO port of the card. If you choose Y,
- the driver will make use of this feature before module removal
- (i.e. rmmod). The feature is known to be available on Commtech's
- cards. Contact your manufacturer for details.
-
- Say Y if your card supports this feature.
-
config IXP4XX_HSS
tristate "Intel IXP4xx HSS (synchronous serial port) support"
depends on HDLC && IXP4XX_NPE && IXP4XX_QMGR
To compile this driver as a module, choose M here: the
module will be called lapbether.
- If unsure, say N.
-
-config SBNI
- tristate "Granch SBNI12 Leased Line adapter support"
- depends on X86
- help
- Driver for ISA SBNI12-xx cards which are low cost alternatives to
- leased line modems.
-
- You can find more information and last versions of drivers and
- utilities at <http://www.granch.ru/>. If you have any question you
- can send email to <sbni@granch.ru>.
-
- To compile this driver as a module, choose M here: the
- module will be called sbni.
-
- If unsure, say N.
-
-config SBNI_MULTILINE
- bool "Multiple line feature support"
- depends on SBNI
- help
- Schedule traffic for some parallel lines, via SBNI12 adapters.
-
- If you have two computers connected with two parallel lines it's
- possible to increase transfer rate nearly twice. You should have
- a program named 'sbniconfig' to configure adapters.
If unsure, say N.
obj-$(CONFIG_LANMEDIA) += lmc/
obj-$(CONFIG_LAPBETHER) += lapbether.o
-obj-$(CONFIG_SBNI) += sbni.o
obj-$(CONFIG_N2) += n2.o
obj-$(CONFIG_C101) += c101.o
obj-$(CONFIG_WANXL) += wanxl.o
static struct z8530_dev *sv11_unit;
-int init_module(void)
+static int sv11_module_init(void)
{
sv11_unit = sv11_init(io, irq);
if (!sv11_unit)
return -ENODEV;
return 0;
}
+module_init(sv11_module_init);
-void cleanup_module(void)
+static void sv11_module_cleanup(void)
{
if (sv11_unit)
sv11_shutdown(sv11_unit);
}
+module_exit(sv11_module_cleanup);
+++ /dev/null
-/* sbni.c: Granch SBNI12 leased line adapters driver for linux
- *
- * Written 2001 by Denis I.Timofeev (timofeev@granch.ru)
- *
- * Previous versions were written by Yaroslav Polyakov,
- * Alexey Zverev and Max Khon.
- *
- * Driver supports SBNI12-02,-04,-05,-10,-11 cards, single and
- * double-channel, PCI and ISA modifications.
- * More info and useful utilities to work with SBNI12 cards you can find
- * at http://www.granch.com (English) or http://www.granch.ru (Russian)
- *
- * This software may be used and distributed according to the terms
- * of the GNU General Public License.
- *
- *
- * 5.0.1 Jun 22 2001
- * - Fixed bug in probe
- * 5.0.0 Jun 06 2001
- * - Driver was completely redesigned by Denis I.Timofeev,
- * - now PCI/Dual, ISA/Dual (with single interrupt line) models are
- * - supported
- * 3.3.0 Thu Feb 24 21:30:28 NOVT 2000
- * - PCI cards support
- * 3.2.0 Mon Dec 13 22:26:53 NOVT 1999
- * - Completely rebuilt all the packet storage system
- * - to work in Ethernet-like style.
- * 3.1.1 just fixed some bugs (5 aug 1999)
- * 3.1.0 added balancing feature (26 apr 1999)
- * 3.0.1 just fixed some bugs (14 apr 1999).
- * 3.0.0 Initial Revision, Yaroslav Polyakov (24 Feb 1999)
- * - added pre-calculation for CRC, fixed bug with "len-2" frames,
- * - removed outbound fragmentation (MTU=1000), written CRC-calculation
- * - on asm, added work with hard_headers and now we have our own cache
- * - for them, optionally supported word-interchange on some chipsets,
- *
- * Known problem: this driver wasn't tested on multiprocessor machine.
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/ptrace.h>
-#include <linux/fcntl.h>
-#include <linux/ioport.h>
-#include <linux/interrupt.h>
-#include <linux/string.h>
-#include <linux/errno.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/pci.h>
-#include <linux/skbuff.h>
-#include <linux/timer.h>
-#include <linux/init.h>
-#include <linux/delay.h>
-
-#include <net/net_namespace.h>
-#include <net/arp.h>
-#include <net/Space.h>
-
-#include <asm/io.h>
-#include <asm/types.h>
-#include <asm/byteorder.h>
-#include <asm/irq.h>
-#include <linux/uaccess.h>
-
-#include "sbni.h"
-
-/* device private data */
-
-struct net_local {
- struct timer_list watchdog;
- struct net_device *watchdog_dev;
-
- spinlock_t lock;
- struct sk_buff *rx_buf_p; /* receive buffer ptr */
- struct sk_buff *tx_buf_p; /* transmit buffer ptr */
-
- unsigned int framelen; /* current frame length */
- unsigned int maxframe; /* maximum valid frame length */
- unsigned int state;
- unsigned int inppos, outpos; /* positions in rx/tx buffers */
-
- /* transmitting frame number - from frames qty to 1 */
- unsigned int tx_frameno;
-
- /* expected number of next receiving frame */
- unsigned int wait_frameno;
-
- /* count of failed attempts to frame send - 32 attempts do before
- error - while receiver tunes on opposite side of wire */
- unsigned int trans_errors;
-
- /* idle time; send pong when limit exceeded */
- unsigned int timer_ticks;
-
- /* fields used for receive level autoselection */
- int delta_rxl;
- unsigned int cur_rxl_index, timeout_rxl;
- unsigned long cur_rxl_rcvd, prev_rxl_rcvd;
-
- struct sbni_csr1 csr1; /* current value of CSR1 */
- struct sbni_in_stats in_stats; /* internal statistics */
-
- struct net_device *second; /* for ISA/dual cards */
-
-#ifdef CONFIG_SBNI_MULTILINE
- struct net_device *master;
- struct net_device *link;
-#endif
-};
-
-
-static int sbni_card_probe( unsigned long );
-static int sbni_pci_probe( struct net_device * );
-static struct net_device *sbni_probe1(struct net_device *, unsigned long, int);
-static int sbni_open( struct net_device * );
-static int sbni_close( struct net_device * );
-static netdev_tx_t sbni_start_xmit(struct sk_buff *,
- struct net_device * );
-static int sbni_siocdevprivate(struct net_device *, struct ifreq *,
- void __user *, int);
-static void set_multicast_list( struct net_device * );
-
-static irqreturn_t sbni_interrupt( int, void * );
-static void handle_channel( struct net_device * );
-static int recv_frame( struct net_device * );
-static void send_frame( struct net_device * );
-static int upload_data( struct net_device *,
- unsigned, unsigned, unsigned, u32 );
-static void download_data( struct net_device *, u32 * );
-static void sbni_watchdog(struct timer_list *);
-static void interpret_ack( struct net_device *, unsigned );
-static int append_frame_to_pkt( struct net_device *, unsigned, u32 );
-static void indicate_pkt( struct net_device * );
-static void card_start( struct net_device * );
-static void prepare_to_send( struct sk_buff *, struct net_device * );
-static void drop_xmit_queue( struct net_device * );
-static void send_frame_header( struct net_device *, u32 * );
-static int skip_tail( unsigned int, unsigned int, u32 );
-static int check_fhdr( u32, u32 *, u32 *, u32 *, u32 *, u32 * );
-static void change_level( struct net_device * );
-static void timeout_change_level( struct net_device * );
-static u32 calc_crc32( u32, u8 *, u32 );
-static struct sk_buff * get_rx_buf( struct net_device * );
-static int sbni_init( struct net_device * );
-
-#ifdef CONFIG_SBNI_MULTILINE
-static int enslave( struct net_device *, struct net_device * );
-static int emancipate( struct net_device * );
-#endif
-
-static const char version[] =
- "Granch SBNI12 driver ver 5.0.1 Jun 22 2001 Denis I.Timofeev.\n";
-
-static bool skip_pci_probe __initdata = false;
-static int scandone __initdata = 0;
-static int num __initdata = 0;
-
-static unsigned char rxl_tab[];
-static u32 crc32tab[];
-
-/* A list of all installed devices, for removing the driver module. */
-static struct net_device *sbni_cards[ SBNI_MAX_NUM_CARDS ];
-
-/* Lists of device's parameters */
-static u32 io[ SBNI_MAX_NUM_CARDS ] __initdata =
- { [0 ... SBNI_MAX_NUM_CARDS-1] = -1 };
-static u32 irq[ SBNI_MAX_NUM_CARDS ] __initdata;
-static u32 baud[ SBNI_MAX_NUM_CARDS ] __initdata;
-static u32 rxl[ SBNI_MAX_NUM_CARDS ] __initdata =
- { [0 ... SBNI_MAX_NUM_CARDS-1] = -1 };
-static u32 mac[ SBNI_MAX_NUM_CARDS ] __initdata;
-
-#ifndef MODULE
-typedef u32 iarr[];
-static iarr *dest[5] __initdata = { &io, &irq, &baud, &rxl, &mac };
-#endif
-
-/* A zero-terminated list of I/O addresses to be probed on ISA bus */
-static unsigned int netcard_portlist[ ] __initdata = {
- 0x210, 0x214, 0x220, 0x224, 0x230, 0x234, 0x240, 0x244, 0x250, 0x254,
- 0x260, 0x264, 0x270, 0x274, 0x280, 0x284, 0x290, 0x294, 0x2a0, 0x2a4,
- 0x2b0, 0x2b4, 0x2c0, 0x2c4, 0x2d0, 0x2d4, 0x2e0, 0x2e4, 0x2f0, 0x2f4,
- 0 };
-
-#define NET_LOCAL_LOCK(dev) (((struct net_local *)netdev_priv(dev))->lock)
-
-/*
- * Look for SBNI card which addr stored in dev->base_addr, if nonzero.
- * Otherwise, look through PCI bus. If none PCI-card was found, scan ISA.
- */
-
-static inline int __init
-sbni_isa_probe( struct net_device *dev )
-{
- if( dev->base_addr > 0x1ff &&
- request_region( dev->base_addr, SBNI_IO_EXTENT, dev->name ) &&
- sbni_probe1( dev, dev->base_addr, dev->irq ) )
-
- return 0;
- else {
- pr_err("base address 0x%lx is busy, or adapter is malfunctional!\n",
- dev->base_addr);
- return -ENODEV;
- }
-}
-
-static const struct net_device_ops sbni_netdev_ops = {
- .ndo_open = sbni_open,
- .ndo_stop = sbni_close,
- .ndo_start_xmit = sbni_start_xmit,
- .ndo_set_rx_mode = set_multicast_list,
- .ndo_siocdevprivate = sbni_siocdevprivate,
- .ndo_set_mac_address = eth_mac_addr,
- .ndo_validate_addr = eth_validate_addr,
-};
-
-static void __init sbni_devsetup(struct net_device *dev)
-{
- ether_setup( dev );
- dev->netdev_ops = &sbni_netdev_ops;
-}
-
-int __init sbni_probe(int unit)
-{
- struct net_device *dev;
- int err;
-
- dev = alloc_netdev(sizeof(struct net_local), "sbni",
- NET_NAME_UNKNOWN, sbni_devsetup);
- if (!dev)
- return -ENOMEM;
-
- dev->netdev_ops = &sbni_netdev_ops;
-
- sprintf(dev->name, "sbni%d", unit);
- netdev_boot_setup_check(dev);
-
- err = sbni_init(dev);
- if (err) {
- free_netdev(dev);
- return err;
- }
-
- err = register_netdev(dev);
- if (err) {
- release_region( dev->base_addr, SBNI_IO_EXTENT );
- free_netdev(dev);
- return err;
- }
- pr_info_once("%s", version);
- return 0;
-}
-
-static int __init sbni_init(struct net_device *dev)
-{
- int i;
- if( dev->base_addr )
- return sbni_isa_probe( dev );
- /* otherwise we have to perform search our adapter */
-
- if( io[ num ] != -1 ) {
- dev->base_addr = io[ num ];
- dev->irq = irq[ num ];
- } else if( scandone || io[ 0 ] != -1 ) {
- return -ENODEV;
- }
-
- /* if io[ num ] contains non-zero address, then that is on ISA bus */
- if( dev->base_addr )
- return sbni_isa_probe( dev );
-
- /* ...otherwise - scan PCI first */
- if( !skip_pci_probe && !sbni_pci_probe( dev ) )
- return 0;
-
- if( io[ num ] == -1 ) {
- /* Auto-scan will be stopped when first ISA card were found */
- scandone = 1;
- if( num > 0 )
- return -ENODEV;
- }
-
- for( i = 0; netcard_portlist[ i ]; ++i ) {
- int ioaddr = netcard_portlist[ i ];
- if( request_region( ioaddr, SBNI_IO_EXTENT, dev->name ) &&
- sbni_probe1( dev, ioaddr, 0 ))
- return 0;
- }
-
- return -ENODEV;
-}
-
-
-static int __init
-sbni_pci_probe( struct net_device *dev )
-{
- struct pci_dev *pdev = NULL;
-
- while( (pdev = pci_get_class( PCI_CLASS_NETWORK_OTHER << 8, pdev ))
- != NULL ) {
- int pci_irq_line;
- unsigned long pci_ioaddr;
-
- if( pdev->vendor != SBNI_PCI_VENDOR &&
- pdev->device != SBNI_PCI_DEVICE )
- continue;
-
- pci_ioaddr = pci_resource_start( pdev, 0 );
- pci_irq_line = pdev->irq;
-
- /* Avoid already found cards from previous calls */
- if( !request_region( pci_ioaddr, SBNI_IO_EXTENT, dev->name ) ) {
- if (pdev->subsystem_device != 2)
- continue;
-
- /* Dual adapter is present */
- if (!request_region(pci_ioaddr += 4, SBNI_IO_EXTENT,
- dev->name ) )
- continue;
- }
-
- if (pci_irq_line <= 0 || pci_irq_line >= nr_irqs)
- pr_warn(
-"WARNING: The PCI BIOS assigned this PCI card to IRQ %d, which is unlikely to work!.\n"
-"You should use the PCI BIOS setup to assign a valid IRQ line.\n",
- pci_irq_line );
-
- /* avoiding re-enable dual adapters */
- if( (pci_ioaddr & 7) == 0 && pci_enable_device( pdev ) ) {
- release_region( pci_ioaddr, SBNI_IO_EXTENT );
- pci_dev_put( pdev );
- return -EIO;
- }
- if( sbni_probe1( dev, pci_ioaddr, pci_irq_line ) ) {
- SET_NETDEV_DEV(dev, &pdev->dev);
- /* not the best thing to do, but this is all messed up
- for hotplug systems anyway... */
- pci_dev_put( pdev );
- return 0;
- }
- }
- return -ENODEV;
-}
-
-
-static struct net_device * __init
-sbni_probe1( struct net_device *dev, unsigned long ioaddr, int irq )
-{
- struct net_local *nl;
-
- if( sbni_card_probe( ioaddr ) ) {
- release_region( ioaddr, SBNI_IO_EXTENT );
- return NULL;
- }
-
- outb( 0, ioaddr + CSR0 );
-
- if( irq < 2 ) {
- unsigned long irq_mask;
-
- irq_mask = probe_irq_on();
- outb( EN_INT | TR_REQ, ioaddr + CSR0 );
- outb( PR_RES, ioaddr + CSR1 );
- mdelay(50);
- irq = probe_irq_off(irq_mask);
- outb( 0, ioaddr + CSR0 );
-
- if( !irq ) {
- pr_err("%s: can't detect device irq!\n", dev->name);
- release_region( ioaddr, SBNI_IO_EXTENT );
- return NULL;
- }
- } else if( irq == 2 )
- irq = 9;
-
- dev->irq = irq;
- dev->base_addr = ioaddr;
-
- /* Fill in sbni-specific dev fields. */
- nl = netdev_priv(dev);
- if( !nl ) {
- pr_err("%s: unable to get memory!\n", dev->name);
- release_region( ioaddr, SBNI_IO_EXTENT );
- return NULL;
- }
-
- memset( nl, 0, sizeof(struct net_local) );
- spin_lock_init( &nl->lock );
-
- /* store MAC address (generate if that isn't known) */
- *(__be16 *)dev->dev_addr = htons( 0x00ff );
- *(__be32 *)(dev->dev_addr + 2) = htonl( 0x01000000 |
- ((mac[num] ?
- mac[num] :
- (u32)((long)netdev_priv(dev))) & 0x00ffffff));
-
- /* store link settings (speed, receive level ) */
- nl->maxframe = DEFAULT_FRAME_LEN;
- nl->csr1.rate = baud[ num ];
-
- if( (nl->cur_rxl_index = rxl[ num ]) == -1 ) {
- /* autotune rxl */
- nl->cur_rxl_index = DEF_RXL;
- nl->delta_rxl = DEF_RXL_DELTA;
- } else {
- nl->delta_rxl = 0;
- }
- nl->csr1.rxl = rxl_tab[ nl->cur_rxl_index ];
- if( inb( ioaddr + CSR0 ) & 0x01 )
- nl->state |= FL_SLOW_MODE;
-
- pr_notice("%s: ioaddr %#lx, irq %d, MAC: 00:ff:01:%02x:%02x:%02x\n",
- dev->name, dev->base_addr, dev->irq,
- ((u8 *)dev->dev_addr)[3],
- ((u8 *)dev->dev_addr)[4],
- ((u8 *)dev->dev_addr)[5]);
-
- pr_notice("%s: speed %d",
- dev->name,
- ((nl->state & FL_SLOW_MODE) ? 500000 : 2000000)
- / (1 << nl->csr1.rate));
-
- if( nl->delta_rxl == 0 )
- pr_cont(", receive level 0x%x (fixed)\n", nl->cur_rxl_index);
- else
- pr_cont(", receive level (auto)\n");
-
-#ifdef CONFIG_SBNI_MULTILINE
- nl->master = dev;
- nl->link = NULL;
-#endif
-
- sbni_cards[ num++ ] = dev;
- return dev;
-}
-
-/* -------------------------------------------------------------------------- */
-
-#ifdef CONFIG_SBNI_MULTILINE
-
-static netdev_tx_t
-sbni_start_xmit( struct sk_buff *skb, struct net_device *dev )
-{
- struct net_device *p;
-
- netif_stop_queue( dev );
-
- /* Looking for idle device in the list */
- for( p = dev; p; ) {
- struct net_local *nl = netdev_priv(p);
- spin_lock( &nl->lock );
- if( nl->tx_buf_p || (nl->state & FL_LINE_DOWN) ) {
- p = nl->link;
- spin_unlock( &nl->lock );
- } else {
- /* Idle dev is found */
- prepare_to_send( skb, p );
- spin_unlock( &nl->lock );
- netif_start_queue( dev );
- return NETDEV_TX_OK;
- }
- }
-
- return NETDEV_TX_BUSY;
-}
-
-#else /* CONFIG_SBNI_MULTILINE */
-
-static netdev_tx_t
-sbni_start_xmit( struct sk_buff *skb, struct net_device *dev )
-{
- struct net_local *nl = netdev_priv(dev);
-
- netif_stop_queue( dev );
- spin_lock( &nl->lock );
-
- prepare_to_send( skb, dev );
-
- spin_unlock( &nl->lock );
- return NETDEV_TX_OK;
-}
-
-#endif /* CONFIG_SBNI_MULTILINE */
-
-/* -------------------------------------------------------------------------- */
-
-/* interrupt handler */
-
-/*
- * SBNI12D-10, -11/ISA boards within "common interrupt" mode could not
- * be looked as two independent single-channel devices. Every channel seems
- * as Ethernet interface but interrupt handler must be common. Really, first
- * channel ("master") driver only registers the handler. In its struct net_local
- * it has got pointer to "slave" channel's struct net_local and handles that's
- * interrupts too.
- * dev of successfully attached ISA SBNI boards is linked to list.
- * While next board driver is initialized, it scans this list. If one
- * has found dev with same irq and ioaddr different by 4 then it assumes
- * this board to be "master".
- */
-
-static irqreturn_t
-sbni_interrupt( int irq, void *dev_id )
-{
- struct net_device *dev = dev_id;
- struct net_local *nl = netdev_priv(dev);
- int repeat;
-
- spin_lock( &nl->lock );
- if( nl->second )
- spin_lock(&NET_LOCAL_LOCK(nl->second));
-
- do {
- repeat = 0;
- if( inb( dev->base_addr + CSR0 ) & (RC_RDY | TR_RDY) ) {
- handle_channel( dev );
- repeat = 1;
- }
- if( nl->second && /* second channel present */
- (inb( nl->second->base_addr+CSR0 ) & (RC_RDY | TR_RDY)) ) {
- handle_channel( nl->second );
- repeat = 1;
- }
- } while( repeat );
-
- if( nl->second )
- spin_unlock(&NET_LOCAL_LOCK(nl->second));
- spin_unlock( &nl->lock );
- return IRQ_HANDLED;
-}
-
-
-static void
-handle_channel( struct net_device *dev )
-{
- struct net_local *nl = netdev_priv(dev);
- unsigned long ioaddr = dev->base_addr;
-
- int req_ans;
- unsigned char csr0;
-
-#ifdef CONFIG_SBNI_MULTILINE
- /* Lock the master device because we going to change its local data */
- if( nl->state & FL_SLAVE )
- spin_lock(&NET_LOCAL_LOCK(nl->master));
-#endif
-
- outb( (inb( ioaddr + CSR0 ) & ~EN_INT) | TR_REQ, ioaddr + CSR0 );
-
- nl->timer_ticks = CHANGE_LEVEL_START_TICKS;
- for(;;) {
- csr0 = inb( ioaddr + CSR0 );
- if( ( csr0 & (RC_RDY | TR_RDY) ) == 0 )
- break;
-
- req_ans = !(nl->state & FL_PREV_OK);
-
- if( csr0 & RC_RDY )
- req_ans = recv_frame( dev );
-
- /*
- * TR_RDY always equals 1 here because we have owned the marker,
- * and we set TR_REQ when disabled interrupts
- */
- csr0 = inb( ioaddr + CSR0 );
- if( !(csr0 & TR_RDY) || (csr0 & RC_RDY) )
- netdev_err(dev, "internal error!\n");
-
- /* if state & FL_NEED_RESEND != 0 then tx_frameno != 0 */
- if( req_ans || nl->tx_frameno != 0 )
- send_frame( dev );
- else
- /* send marker without any data */
- outb( inb( ioaddr + CSR0 ) & ~TR_REQ, ioaddr + CSR0 );
- }
-
- outb( inb( ioaddr + CSR0 ) | EN_INT, ioaddr + CSR0 );
-
-#ifdef CONFIG_SBNI_MULTILINE
- if( nl->state & FL_SLAVE )
- spin_unlock(&NET_LOCAL_LOCK(nl->master));
-#endif
-}
-
-
-/*
- * Routine returns 1 if it needs to acknowledge received frame.
- * Empty frame received without errors won't be acknowledged.
- */
-
-static int
-recv_frame( struct net_device *dev )
-{
- struct net_local *nl = netdev_priv(dev);
- unsigned long ioaddr = dev->base_addr;
-
- u32 crc = CRC32_INITIAL;
-
- unsigned framelen = 0, frameno, ack;
- unsigned is_first, frame_ok = 0;
-
- if( check_fhdr( ioaddr, &framelen, &frameno, &ack, &is_first, &crc ) ) {
- frame_ok = framelen > 4
- ? upload_data( dev, framelen, frameno, is_first, crc )
- : skip_tail( ioaddr, framelen, crc );
- if( frame_ok )
- interpret_ack( dev, ack );
- }
-
- outb( inb( ioaddr + CSR0 ) ^ CT_ZER, ioaddr + CSR0 );
- if( frame_ok ) {
- nl->state |= FL_PREV_OK;
- if( framelen > 4 )
- nl->in_stats.all_rx_number++;
- } else {
- nl->state &= ~FL_PREV_OK;
- change_level( dev );
- nl->in_stats.all_rx_number++;
- nl->in_stats.bad_rx_number++;
- }
-
- return !frame_ok || framelen > 4;
-}
-
-
-static void
-send_frame( struct net_device *dev )
-{
- struct net_local *nl = netdev_priv(dev);
-
- u32 crc = CRC32_INITIAL;
-
- if( nl->state & FL_NEED_RESEND ) {
-
- /* if frame was sended but not ACK'ed - resend it */
- if( nl->trans_errors ) {
- --nl->trans_errors;
- if( nl->framelen != 0 )
- nl->in_stats.resend_tx_number++;
- } else {
- /* cannot xmit with many attempts */
-#ifdef CONFIG_SBNI_MULTILINE
- if( (nl->state & FL_SLAVE) || nl->link )
-#endif
- nl->state |= FL_LINE_DOWN;
- drop_xmit_queue( dev );
- goto do_send;
- }
- } else
- nl->trans_errors = TR_ERROR_COUNT;
-
- send_frame_header( dev, &crc );
- nl->state |= FL_NEED_RESEND;
- /*
- * FL_NEED_RESEND will be cleared after ACK, but if empty
- * frame sended then in prepare_to_send next frame
- */
-
-
- if( nl->framelen ) {
- download_data( dev, &crc );
- nl->in_stats.all_tx_number++;
- nl->state |= FL_WAIT_ACK;
- }
-
- outsb( dev->base_addr + DAT, (u8 *)&crc, sizeof crc );
-
-do_send:
- outb( inb( dev->base_addr + CSR0 ) & ~TR_REQ, dev->base_addr + CSR0 );
-
- if( nl->tx_frameno )
- /* next frame exists - we request card to send it */
- outb( inb( dev->base_addr + CSR0 ) | TR_REQ,
- dev->base_addr + CSR0 );
-}
-
-
-/*
- * Write the frame data into adapter's buffer memory, and calculate CRC.
- * Do padding if necessary.
- */
-
-static void
-download_data( struct net_device *dev, u32 *crc_p )
-{
- struct net_local *nl = netdev_priv(dev);
- struct sk_buff *skb = nl->tx_buf_p;
-
- unsigned len = min_t(unsigned int, skb->len - nl->outpos, nl->framelen);
-
- outsb( dev->base_addr + DAT, skb->data + nl->outpos, len );
- *crc_p = calc_crc32( *crc_p, skb->data + nl->outpos, len );
-
- /* if packet too short we should write some more bytes to pad */
- for( len = nl->framelen - len; len--; ) {
- outb( 0, dev->base_addr + DAT );
- *crc_p = CRC32( 0, *crc_p );
- }
-}
-
-
-static int
-upload_data( struct net_device *dev, unsigned framelen, unsigned frameno,
- unsigned is_first, u32 crc )
-{
- struct net_local *nl = netdev_priv(dev);
-
- int frame_ok;
-
- if( is_first ) {
- nl->wait_frameno = frameno;
- nl->inppos = 0;
- }
-
- if( nl->wait_frameno == frameno ) {
-
- if( nl->inppos + framelen <= ETHER_MAX_LEN )
- frame_ok = append_frame_to_pkt( dev, framelen, crc );
-
- /*
- * if CRC is right but framelen incorrect then transmitter
- * error was occurred... drop entire packet
- */
- else if( (frame_ok = skip_tail( dev->base_addr, framelen, crc ))
- != 0 ) {
- nl->wait_frameno = 0;
- nl->inppos = 0;
-#ifdef CONFIG_SBNI_MULTILINE
- nl->master->stats.rx_errors++;
- nl->master->stats.rx_missed_errors++;
-#else
- dev->stats.rx_errors++;
- dev->stats.rx_missed_errors++;
-#endif
- }
- /* now skip all frames until is_first != 0 */
- } else
- frame_ok = skip_tail( dev->base_addr, framelen, crc );
-
- if( is_first && !frame_ok ) {
- /*
- * Frame has been broken, but we had already stored
- * is_first... Drop entire packet.
- */
- nl->wait_frameno = 0;
-#ifdef CONFIG_SBNI_MULTILINE
- nl->master->stats.rx_errors++;
- nl->master->stats.rx_crc_errors++;
-#else
- dev->stats.rx_errors++;
- dev->stats.rx_crc_errors++;
-#endif
- }
-
- return frame_ok;
-}
-
-
-static inline void
-send_complete( struct net_device *dev )
-{
- struct net_local *nl = netdev_priv(dev);
-
-#ifdef CONFIG_SBNI_MULTILINE
- nl->master->stats.tx_packets++;
- nl->master->stats.tx_bytes += nl->tx_buf_p->len;
-#else
- dev->stats.tx_packets++;
- dev->stats.tx_bytes += nl->tx_buf_p->len;
-#endif
- dev_consume_skb_irq(nl->tx_buf_p);
-
- nl->tx_buf_p = NULL;
-
- nl->outpos = 0;
- nl->state &= ~(FL_WAIT_ACK | FL_NEED_RESEND);
- nl->framelen = 0;
-}
-
-
-static void
-interpret_ack( struct net_device *dev, unsigned ack )
-{
- struct net_local *nl = netdev_priv(dev);
-
- if( ack == FRAME_SENT_OK ) {
- nl->state &= ~FL_NEED_RESEND;
-
- if( nl->state & FL_WAIT_ACK ) {
- nl->outpos += nl->framelen;
-
- if( --nl->tx_frameno ) {
- nl->framelen = min_t(unsigned int,
- nl->maxframe,
- nl->tx_buf_p->len - nl->outpos);
- } else {
- send_complete( dev );
-#ifdef CONFIG_SBNI_MULTILINE
- netif_wake_queue( nl->master );
-#else
- netif_wake_queue( dev );
-#endif
- }
- }
- }
-
- nl->state &= ~FL_WAIT_ACK;
-}
-
-
-/*
- * Glue received frame with previous fragments of packet.
- * Indicate packet when last frame would be accepted.
- */
-
-static int
-append_frame_to_pkt( struct net_device *dev, unsigned framelen, u32 crc )
-{
- struct net_local *nl = netdev_priv(dev);
-
- u8 *p;
-
- if( nl->inppos + framelen > ETHER_MAX_LEN )
- return 0;
-
- if( !nl->rx_buf_p && !(nl->rx_buf_p = get_rx_buf( dev )) )
- return 0;
-
- p = nl->rx_buf_p->data + nl->inppos;
- insb( dev->base_addr + DAT, p, framelen );
- if( calc_crc32( crc, p, framelen ) != CRC32_REMAINDER )
- return 0;
-
- nl->inppos += framelen - 4;
- if( --nl->wait_frameno == 0 ) /* last frame received */
- indicate_pkt( dev );
-
- return 1;
-}
-
-
-/*
- * Prepare to start output on adapter.
- * Transmitter will be actually activated when marker is accepted.
- */
-
-static void
-prepare_to_send( struct sk_buff *skb, struct net_device *dev )
-{
- struct net_local *nl = netdev_priv(dev);
-
- unsigned int len;
-
- /* nl->tx_buf_p == NULL here! */
- if( nl->tx_buf_p )
- netdev_err(dev, "memory leak!\n");
-
- nl->outpos = 0;
- nl->state &= ~(FL_WAIT_ACK | FL_NEED_RESEND);
-
- len = skb->len;
- if( len < SBNI_MIN_LEN )
- len = SBNI_MIN_LEN;
-
- nl->tx_buf_p = skb;
- nl->tx_frameno = DIV_ROUND_UP(len, nl->maxframe);
- nl->framelen = len < nl->maxframe ? len : nl->maxframe;
-
- outb( inb( dev->base_addr + CSR0 ) | TR_REQ, dev->base_addr + CSR0 );
-#ifdef CONFIG_SBNI_MULTILINE
- netif_trans_update(nl->master);
-#else
- netif_trans_update(dev);
-#endif
-}
-
-
-static void
-drop_xmit_queue( struct net_device *dev )
-{
- struct net_local *nl = netdev_priv(dev);
-
- if( nl->tx_buf_p ) {
- dev_kfree_skb_any( nl->tx_buf_p );
- nl->tx_buf_p = NULL;
-#ifdef CONFIG_SBNI_MULTILINE
- nl->master->stats.tx_errors++;
- nl->master->stats.tx_carrier_errors++;
-#else
- dev->stats.tx_errors++;
- dev->stats.tx_carrier_errors++;
-#endif
- }
-
- nl->tx_frameno = 0;
- nl->framelen = 0;
- nl->outpos = 0;
- nl->state &= ~(FL_WAIT_ACK | FL_NEED_RESEND);
-#ifdef CONFIG_SBNI_MULTILINE
- netif_start_queue( nl->master );
- netif_trans_update(nl->master);
-#else
- netif_start_queue( dev );
- netif_trans_update(dev);
-#endif
-}
-
-
-static void
-send_frame_header( struct net_device *dev, u32 *crc_p )
-{
- struct net_local *nl = netdev_priv(dev);
-
- u32 crc = *crc_p;
- u32 len_field = nl->framelen + 6; /* CRC + frameno + reserved */
- u8 value;
-
- if( nl->state & FL_NEED_RESEND )
- len_field |= FRAME_RETRY; /* non-first attempt... */
-
- if( nl->outpos == 0 )
- len_field |= FRAME_FIRST;
-
- len_field |= (nl->state & FL_PREV_OK) ? FRAME_SENT_OK : FRAME_SENT_BAD;
- outb( SBNI_SIG, dev->base_addr + DAT );
-
- value = (u8) len_field;
- outb( value, dev->base_addr + DAT );
- crc = CRC32( value, crc );
- value = (u8) (len_field >> 8);
- outb( value, dev->base_addr + DAT );
- crc = CRC32( value, crc );
-
- outb( nl->tx_frameno, dev->base_addr + DAT );
- crc = CRC32( nl->tx_frameno, crc );
- outb( 0, dev->base_addr + DAT );
- crc = CRC32( 0, crc );
- *crc_p = crc;
-}
-
-
-/*
- * if frame tail not needed (incorrect number or received twice),
- * it won't store, but CRC will be calculated
- */
-
-static int
-skip_tail( unsigned int ioaddr, unsigned int tail_len, u32 crc )
-{
- while( tail_len-- )
- crc = CRC32( inb( ioaddr + DAT ), crc );
-
- return crc == CRC32_REMAINDER;
-}
-
-
-/*
- * Preliminary checks if frame header is correct, calculates its CRC
- * and split it to simple fields
- */
-
-static int
-check_fhdr( u32 ioaddr, u32 *framelen, u32 *frameno, u32 *ack,
- u32 *is_first, u32 *crc_p )
-{
- u32 crc = *crc_p;
- u8 value;
-
- if( inb( ioaddr + DAT ) != SBNI_SIG )
- return 0;
-
- value = inb( ioaddr + DAT );
- *framelen = (u32)value;
- crc = CRC32( value, crc );
- value = inb( ioaddr + DAT );
- *framelen |= ((u32)value) << 8;
- crc = CRC32( value, crc );
-
- *ack = *framelen & FRAME_ACK_MASK;
- *is_first = (*framelen & FRAME_FIRST) != 0;
-
- if( (*framelen &= FRAME_LEN_MASK) < 6 ||
- *framelen > SBNI_MAX_FRAME - 3 )
- return 0;
-
- value = inb( ioaddr + DAT );
- *frameno = (u32)value;
- crc = CRC32( value, crc );
-
- crc = CRC32( inb( ioaddr + DAT ), crc ); /* reserved byte */
- *framelen -= 2;
-
- *crc_p = crc;
- return 1;
-}
-
-
-static struct sk_buff *
-get_rx_buf( struct net_device *dev )
-{
- /* +2 is to compensate for the alignment fixup below */
- struct sk_buff *skb = dev_alloc_skb( ETHER_MAX_LEN + 2 );
- if( !skb )
- return NULL;
-
- skb_reserve( skb, 2 ); /* Align IP on longword boundaries */
- return skb;
-}
-
-
-static void
-indicate_pkt( struct net_device *dev )
-{
- struct net_local *nl = netdev_priv(dev);
- struct sk_buff *skb = nl->rx_buf_p;
-
- skb_put( skb, nl->inppos );
-
-#ifdef CONFIG_SBNI_MULTILINE
- skb->protocol = eth_type_trans( skb, nl->master );
- netif_rx( skb );
- ++nl->master->stats.rx_packets;
- nl->master->stats.rx_bytes += nl->inppos;
-#else
- skb->protocol = eth_type_trans( skb, dev );
- netif_rx( skb );
- ++dev->stats.rx_packets;
- dev->stats.rx_bytes += nl->inppos;
-#endif
- nl->rx_buf_p = NULL; /* protocol driver will clear this sk_buff */
-}
-
-
-/* -------------------------------------------------------------------------- */
-
-/*
- * Routine checks periodically wire activity and regenerates marker if
- * connect was inactive for a long time.
- */
-
-static void
-sbni_watchdog(struct timer_list *t)
-{
- struct net_local *nl = from_timer(nl, t, watchdog);
- struct net_device *dev = nl->watchdog_dev;
- unsigned long flags;
- unsigned char csr0;
-
- spin_lock_irqsave( &nl->lock, flags );
-
- csr0 = inb( dev->base_addr + CSR0 );
- if( csr0 & RC_CHK ) {
-
- if( nl->timer_ticks ) {
- if( csr0 & (RC_RDY | BU_EMP) )
- /* receiving not active */
- nl->timer_ticks--;
- } else {
- nl->in_stats.timeout_number++;
- if( nl->delta_rxl )
- timeout_change_level( dev );
-
- outb( *(u_char *)&nl->csr1 | PR_RES,
- dev->base_addr + CSR1 );
- csr0 = inb( dev->base_addr + CSR0 );
- }
- } else
- nl->state &= ~FL_LINE_DOWN;
-
- outb( csr0 | RC_CHK, dev->base_addr + CSR0 );
-
- mod_timer(t, jiffies + SBNI_TIMEOUT);
-
- spin_unlock_irqrestore( &nl->lock, flags );
-}
-
-
-static unsigned char rxl_tab[] = {
- 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x08,
- 0x0a, 0x0c, 0x0f, 0x16, 0x18, 0x1a, 0x1c, 0x1f
-};
-
-#define SIZE_OF_TIMEOUT_RXL_TAB 4
-static unsigned char timeout_rxl_tab[] = {
- 0x03, 0x05, 0x08, 0x0b
-};
-
-/* -------------------------------------------------------------------------- */
-
-static void
-card_start( struct net_device *dev )
-{
- struct net_local *nl = netdev_priv(dev);
-
- nl->timer_ticks = CHANGE_LEVEL_START_TICKS;
- nl->state &= ~(FL_WAIT_ACK | FL_NEED_RESEND);
- nl->state |= FL_PREV_OK;
-
- nl->inppos = nl->outpos = 0;
- nl->wait_frameno = 0;
- nl->tx_frameno = 0;
- nl->framelen = 0;
-
- outb( *(u_char *)&nl->csr1 | PR_RES, dev->base_addr + CSR1 );
- outb( EN_INT, dev->base_addr + CSR0 );
-}
-
-/* -------------------------------------------------------------------------- */
-
-/* Receive level auto-selection */
-
-static void
-change_level( struct net_device *dev )
-{
- struct net_local *nl = netdev_priv(dev);
-
- if( nl->delta_rxl == 0 ) /* do not auto-negotiate RxL */
- return;
-
- if( nl->cur_rxl_index == 0 )
- nl->delta_rxl = 1;
- else if( nl->cur_rxl_index == 15 )
- nl->delta_rxl = -1;
- else if( nl->cur_rxl_rcvd < nl->prev_rxl_rcvd )
- nl->delta_rxl = -nl->delta_rxl;
-
- nl->csr1.rxl = rxl_tab[ nl->cur_rxl_index += nl->delta_rxl ];
- inb( dev->base_addr + CSR0 ); /* needs for PCI cards */
- outb( *(u8 *)&nl->csr1, dev->base_addr + CSR1 );
-
- nl->prev_rxl_rcvd = nl->cur_rxl_rcvd;
- nl->cur_rxl_rcvd = 0;
-}
-
-
-static void
-timeout_change_level( struct net_device *dev )
-{
- struct net_local *nl = netdev_priv(dev);
-
- nl->cur_rxl_index = timeout_rxl_tab[ nl->timeout_rxl ];
- if( ++nl->timeout_rxl >= 4 )
- nl->timeout_rxl = 0;
-
- nl->csr1.rxl = rxl_tab[ nl->cur_rxl_index ];
- inb( dev->base_addr + CSR0 );
- outb( *(unsigned char *)&nl->csr1, dev->base_addr + CSR1 );
-
- nl->prev_rxl_rcvd = nl->cur_rxl_rcvd;
- nl->cur_rxl_rcvd = 0;
-}
-
-/* -------------------------------------------------------------------------- */
-
-/*
- * Open/initialize the board.
- */
-
-static int
-sbni_open( struct net_device *dev )
-{
- struct net_local *nl = netdev_priv(dev);
- struct timer_list *w = &nl->watchdog;
-
- /*
- * For double ISA adapters within "common irq" mode, we have to
- * determine whether primary or secondary channel is initialized,
- * and set the irq handler only in first case.
- */
- if( dev->base_addr < 0x400 ) { /* ISA only */
- struct net_device **p = sbni_cards;
- for( ; *p && p < sbni_cards + SBNI_MAX_NUM_CARDS; ++p )
- if( (*p)->irq == dev->irq &&
- ((*p)->base_addr == dev->base_addr + 4 ||
- (*p)->base_addr == dev->base_addr - 4) &&
- (*p)->flags & IFF_UP ) {
-
- ((struct net_local *) (netdev_priv(*p)))
- ->second = dev;
- netdev_notice(dev, "using shared irq with %s\n",
- (*p)->name);
- nl->state |= FL_SECONDARY;
- goto handler_attached;
- }
- }
-
- if( request_irq(dev->irq, sbni_interrupt, IRQF_SHARED, dev->name, dev) ) {
- netdev_err(dev, "unable to get IRQ %d\n", dev->irq);
- return -EAGAIN;
- }
-
-handler_attached:
-
- spin_lock( &nl->lock );
- memset( &dev->stats, 0, sizeof(struct net_device_stats) );
- memset( &nl->in_stats, 0, sizeof(struct sbni_in_stats) );
-
- card_start( dev );
-
- netif_start_queue( dev );
-
- /* set timer watchdog */
- nl->watchdog_dev = dev;
- timer_setup(w, sbni_watchdog, 0);
- w->expires = jiffies + SBNI_TIMEOUT;
- add_timer( w );
-
- spin_unlock( &nl->lock );
- return 0;
-}
-
-
-static int
-sbni_close( struct net_device *dev )
-{
- struct net_local *nl = netdev_priv(dev);
-
- if( nl->second && nl->second->flags & IFF_UP ) {
- netdev_notice(dev, "Secondary channel (%s) is active!\n",
- nl->second->name);
- return -EBUSY;
- }
-
-#ifdef CONFIG_SBNI_MULTILINE
- if( nl->state & FL_SLAVE )
- emancipate( dev );
- else
- while( nl->link ) /* it's master device! */
- emancipate( nl->link );
-#endif
-
- spin_lock( &nl->lock );
-
- nl->second = NULL;
- drop_xmit_queue( dev );
- netif_stop_queue( dev );
-
- del_timer( &nl->watchdog );
-
- outb( 0, dev->base_addr + CSR0 );
-
- if( !(nl->state & FL_SECONDARY) )
- free_irq( dev->irq, dev );
- nl->state &= FL_SECONDARY;
-
- spin_unlock( &nl->lock );
- return 0;
-}
-
-
-/*
- Valid combinations in CSR0 (for probing):
-
- VALID_DECODER 0000,0011,1011,1010
-
- ; 0 ; -
- TR_REQ ; 1 ; +
- TR_RDY ; 2 ; -
- TR_RDY TR_REQ ; 3 ; +
- BU_EMP ; 4 ; +
- BU_EMP TR_REQ ; 5 ; +
- BU_EMP TR_RDY ; 6 ; -
- BU_EMP TR_RDY TR_REQ ; 7 ; +
- RC_RDY ; 8 ; +
- RC_RDY TR_REQ ; 9 ; +
- RC_RDY TR_RDY ; 10 ; -
- RC_RDY TR_RDY TR_REQ ; 11 ; -
- RC_RDY BU_EMP ; 12 ; -
- RC_RDY BU_EMP TR_REQ ; 13 ; -
- RC_RDY BU_EMP TR_RDY ; 14 ; -
- RC_RDY BU_EMP TR_RDY TR_REQ ; 15 ; -
-*/
-
-#define VALID_DECODER (2 + 8 + 0x10 + 0x20 + 0x80 + 0x100 + 0x200)
-
-
-static int
-sbni_card_probe( unsigned long ioaddr )
-{
- unsigned char csr0;
-
- csr0 = inb( ioaddr + CSR0 );
- if( csr0 != 0xff && csr0 != 0x00 ) {
- csr0 &= ~EN_INT;
- if( csr0 & BU_EMP )
- csr0 |= EN_INT;
-
- if( VALID_DECODER & (1 << (csr0 >> 4)) )
- return 0;
- }
-
- return -ENODEV;
-}
-
-/* -------------------------------------------------------------------------- */
-
-static int
-sbni_siocdevprivate(struct net_device *dev, struct ifreq *ifr, void __user *data, int cmd)
-{
- struct net_local *nl = netdev_priv(dev);
- struct sbni_flags flags;
- int error = 0;
-
-#ifdef CONFIG_SBNI_MULTILINE
- struct net_device *slave_dev;
- char slave_name[ 8 ];
-#endif
-
- switch( cmd ) {
- case SIOCDEVGETINSTATS :
- if (copy_to_user(data, &nl->in_stats,
- sizeof(struct sbni_in_stats)))
- error = -EFAULT;
- break;
-
- case SIOCDEVRESINSTATS :
- if (!capable(CAP_NET_ADMIN))
- return -EPERM;
- memset( &nl->in_stats, 0, sizeof(struct sbni_in_stats) );
- break;
-
- case SIOCDEVGHWSTATE :
- flags.mac_addr = *(u32 *)(dev->dev_addr + 3);
- flags.rate = nl->csr1.rate;
- flags.slow_mode = (nl->state & FL_SLOW_MODE) != 0;
- flags.rxl = nl->cur_rxl_index;
- flags.fixed_rxl = nl->delta_rxl == 0;
-
- if (copy_to_user(data, &flags, sizeof(flags)))
- error = -EFAULT;
- break;
-
- case SIOCDEVSHWSTATE :
- if (!capable(CAP_NET_ADMIN))
- return -EPERM;
-
- spin_lock( &nl->lock );
- flags = *(struct sbni_flags*) &ifr->ifr_ifru;
- if( flags.fixed_rxl ) {
- nl->delta_rxl = 0;
- nl->cur_rxl_index = flags.rxl;
- } else {
- nl->delta_rxl = DEF_RXL_DELTA;
- nl->cur_rxl_index = DEF_RXL;
- }
-
- nl->csr1.rxl = rxl_tab[ nl->cur_rxl_index ];
- nl->csr1.rate = flags.rate;
- outb( *(u8 *)&nl->csr1 | PR_RES, dev->base_addr + CSR1 );
- spin_unlock( &nl->lock );
- break;
-
-#ifdef CONFIG_SBNI_MULTILINE
-
- case SIOCDEVENSLAVE :
- if (!capable(CAP_NET_ADMIN))
- return -EPERM;
-
- if (copy_from_user(slave_name, data, sizeof(slave_name)))
- return -EFAULT;
- slave_dev = dev_get_by_name(&init_net, slave_name );
- if( !slave_dev || !(slave_dev->flags & IFF_UP) ) {
- netdev_err(dev, "trying to enslave non-active device %s\n",
- slave_name);
- if (slave_dev)
- dev_put(slave_dev);
- return -EPERM;
- }
-
- return enslave( dev, slave_dev );
-
- case SIOCDEVEMANSIPATE :
- if (!capable(CAP_NET_ADMIN))
- return -EPERM;
-
- return emancipate( dev );
-
-#endif /* CONFIG_SBNI_MULTILINE */
-
- default :
- return -EOPNOTSUPP;
- }
-
- return error;
-}
-
-
-#ifdef CONFIG_SBNI_MULTILINE
-
-static int
-enslave( struct net_device *dev, struct net_device *slave_dev )
-{
- struct net_local *nl = netdev_priv(dev);
- struct net_local *snl = netdev_priv(slave_dev);
-
- if( nl->state & FL_SLAVE ) /* This isn't master or free device */
- return -EBUSY;
-
- if( snl->state & FL_SLAVE ) /* That was already enslaved */
- return -EBUSY;
-
- spin_lock( &nl->lock );
- spin_lock( &snl->lock );
-
- /* append to list */
- snl->link = nl->link;
- nl->link = slave_dev;
- snl->master = dev;
- snl->state |= FL_SLAVE;
-
- /* Summary statistics of MultiLine operation will be stored
- in master's counters */
- memset( &slave_dev->stats, 0, sizeof(struct net_device_stats) );
- netif_stop_queue( slave_dev );
- netif_wake_queue( dev ); /* Now we are able to transmit */
-
- spin_unlock( &snl->lock );
- spin_unlock( &nl->lock );
- netdev_notice(dev, "slave device (%s) attached\n", slave_dev->name);
- return 0;
-}
-
-
-static int
-emancipate( struct net_device *dev )
-{
- struct net_local *snl = netdev_priv(dev);
- struct net_device *p = snl->master;
- struct net_local *nl = netdev_priv(p);
-
- if( !(snl->state & FL_SLAVE) )
- return -EINVAL;
-
- spin_lock( &nl->lock );
- spin_lock( &snl->lock );
- drop_xmit_queue( dev );
-
- /* exclude from list */
- for(;;) { /* must be in list */
- struct net_local *t = netdev_priv(p);
- if( t->link == dev ) {
- t->link = snl->link;
- break;
- }
- p = t->link;
- }
-
- snl->link = NULL;
- snl->master = dev;
- snl->state &= ~FL_SLAVE;
-
- netif_start_queue( dev );
-
- spin_unlock( &snl->lock );
- spin_unlock( &nl->lock );
-
- dev_put( dev );
- return 0;
-}
-
-#endif
-
-static void
-set_multicast_list( struct net_device *dev )
-{
- return; /* sbni always operate in promiscuos mode */
-}
-
-
-#ifdef MODULE
-module_param_hw_array(io, int, ioport, NULL, 0);
-module_param_hw_array(irq, int, irq, NULL, 0);
-module_param_array(baud, int, NULL, 0);
-module_param_array(rxl, int, NULL, 0);
-module_param_array(mac, int, NULL, 0);
-module_param(skip_pci_probe, bool, 0);
-
-MODULE_LICENSE("GPL");
-
-
-int __init init_module( void )
-{
- struct net_device *dev;
- int err;
-
- while( num < SBNI_MAX_NUM_CARDS ) {
- dev = alloc_netdev(sizeof(struct net_local), "sbni%d",
- NET_NAME_UNKNOWN, sbni_devsetup);
- if( !dev)
- break;
-
- sprintf( dev->name, "sbni%d", num );
-
- err = sbni_init(dev);
- if (err) {
- free_netdev(dev);
- break;
- }
-
- if( register_netdev( dev ) ) {
- release_region( dev->base_addr, SBNI_IO_EXTENT );
- free_netdev( dev );
- break;
- }
- }
-
- return *sbni_cards ? 0 : -ENODEV;
-}
-
-void
-cleanup_module(void)
-{
- int i;
-
- for (i = 0; i < SBNI_MAX_NUM_CARDS; ++i) {
- struct net_device *dev = sbni_cards[i];
- if (dev != NULL) {
- unregister_netdev(dev);
- release_region(dev->base_addr, SBNI_IO_EXTENT);
- free_netdev(dev);
- }
- }
-}
-
-#else /* MODULE */
-
-static int __init
-sbni_setup( char *p )
-{
- int n, parm;
-
- if( *p++ != '(' )
- goto bad_param;
-
- for( n = 0, parm = 0; *p && n < 8; ) {
- (*dest[ parm ])[ n ] = simple_strtoul( p, &p, 0 );
- if( !*p || *p == ')' )
- return 1;
- if( *p == ';' ) {
- ++p;
- ++n;
- parm = 0;
- } else if( *p++ != ',' ) {
- break;
- } else {
- if( ++parm >= 5 )
- break;
- }
- }
-bad_param:
- pr_err("Error in sbni kernel parameter!\n");
- return 0;
-}
-
-__setup( "sbni=", sbni_setup );
-
-#endif /* MODULE */
-
-/* -------------------------------------------------------------------------- */
-
-static u32
-calc_crc32( u32 crc, u8 *p, u32 len )
-{
- while( len-- )
- crc = CRC32( *p++, crc );
-
- return crc;
-}
-
-static u32 crc32tab[] __attribute__ ((aligned(8))) = {
- 0xD202EF8D, 0xA505DF1B, 0x3C0C8EA1, 0x4B0BBE37,
- 0xD56F2B94, 0xA2681B02, 0x3B614AB8, 0x4C667A2E,
- 0xDCD967BF, 0xABDE5729, 0x32D70693, 0x45D03605,
- 0xDBB4A3A6, 0xACB39330, 0x35BAC28A, 0x42BDF21C,
- 0xCFB5FFE9, 0xB8B2CF7F, 0x21BB9EC5, 0x56BCAE53,
- 0xC8D83BF0, 0xBFDF0B66, 0x26D65ADC, 0x51D16A4A,
- 0xC16E77DB, 0xB669474D, 0x2F6016F7, 0x58672661,
- 0xC603B3C2, 0xB1048354, 0x280DD2EE, 0x5F0AE278,
- 0xE96CCF45, 0x9E6BFFD3, 0x0762AE69, 0x70659EFF,
- 0xEE010B5C, 0x99063BCA, 0x000F6A70, 0x77085AE6,
- 0xE7B74777, 0x90B077E1, 0x09B9265B, 0x7EBE16CD,
- 0xE0DA836E, 0x97DDB3F8, 0x0ED4E242, 0x79D3D2D4,
- 0xF4DBDF21, 0x83DCEFB7, 0x1AD5BE0D, 0x6DD28E9B,
- 0xF3B61B38, 0x84B12BAE, 0x1DB87A14, 0x6ABF4A82,
- 0xFA005713, 0x8D076785, 0x140E363F, 0x630906A9,
- 0xFD6D930A, 0x8A6AA39C, 0x1363F226, 0x6464C2B0,
- 0xA4DEAE1D, 0xD3D99E8B, 0x4AD0CF31, 0x3DD7FFA7,
- 0xA3B36A04, 0xD4B45A92, 0x4DBD0B28, 0x3ABA3BBE,
- 0xAA05262F, 0xDD0216B9, 0x440B4703, 0x330C7795,
- 0xAD68E236, 0xDA6FD2A0, 0x4366831A, 0x3461B38C,
- 0xB969BE79, 0xCE6E8EEF, 0x5767DF55, 0x2060EFC3,
- 0xBE047A60, 0xC9034AF6, 0x500A1B4C, 0x270D2BDA,
- 0xB7B2364B, 0xC0B506DD, 0x59BC5767, 0x2EBB67F1,
- 0xB0DFF252, 0xC7D8C2C4, 0x5ED1937E, 0x29D6A3E8,
- 0x9FB08ED5, 0xE8B7BE43, 0x71BEEFF9, 0x06B9DF6F,
- 0x98DD4ACC, 0xEFDA7A5A, 0x76D32BE0, 0x01D41B76,
- 0x916B06E7, 0xE66C3671, 0x7F6567CB, 0x0862575D,
- 0x9606C2FE, 0xE101F268, 0x7808A3D2, 0x0F0F9344,
- 0x82079EB1, 0xF500AE27, 0x6C09FF9D, 0x1B0ECF0B,
- 0x856A5AA8, 0xF26D6A3E, 0x6B643B84, 0x1C630B12,
- 0x8CDC1683, 0xFBDB2615, 0x62D277AF, 0x15D54739,
- 0x8BB1D29A, 0xFCB6E20C, 0x65BFB3B6, 0x12B88320,
- 0x3FBA6CAD, 0x48BD5C3B, 0xD1B40D81, 0xA6B33D17,
- 0x38D7A8B4, 0x4FD09822, 0xD6D9C998, 0xA1DEF90E,
- 0x3161E49F, 0x4666D409, 0xDF6F85B3, 0xA868B525,
- 0x360C2086, 0x410B1010, 0xD80241AA, 0xAF05713C,
- 0x220D7CC9, 0x550A4C5F, 0xCC031DE5, 0xBB042D73,
- 0x2560B8D0, 0x52678846, 0xCB6ED9FC, 0xBC69E96A,
- 0x2CD6F4FB, 0x5BD1C46D, 0xC2D895D7, 0xB5DFA541,
- 0x2BBB30E2, 0x5CBC0074, 0xC5B551CE, 0xB2B26158,
- 0x04D44C65, 0x73D37CF3, 0xEADA2D49, 0x9DDD1DDF,
- 0x03B9887C, 0x74BEB8EA, 0xEDB7E950, 0x9AB0D9C6,
- 0x0A0FC457, 0x7D08F4C1, 0xE401A57B, 0x930695ED,
- 0x0D62004E, 0x7A6530D8, 0xE36C6162, 0x946B51F4,
- 0x19635C01, 0x6E646C97, 0xF76D3D2D, 0x806A0DBB,
- 0x1E0E9818, 0x6909A88E, 0xF000F934, 0x8707C9A2,
- 0x17B8D433, 0x60BFE4A5, 0xF9B6B51F, 0x8EB18589,
- 0x10D5102A, 0x67D220BC, 0xFEDB7106, 0x89DC4190,
- 0x49662D3D, 0x3E611DAB, 0xA7684C11, 0xD06F7C87,
- 0x4E0BE924, 0x390CD9B2, 0xA0058808, 0xD702B89E,
- 0x47BDA50F, 0x30BA9599, 0xA9B3C423, 0xDEB4F4B5,
- 0x40D06116, 0x37D75180, 0xAEDE003A, 0xD9D930AC,
- 0x54D13D59, 0x23D60DCF, 0xBADF5C75, 0xCDD86CE3,
- 0x53BCF940, 0x24BBC9D6, 0xBDB2986C, 0xCAB5A8FA,
- 0x5A0AB56B, 0x2D0D85FD, 0xB404D447, 0xC303E4D1,
- 0x5D677172, 0x2A6041E4, 0xB369105E, 0xC46E20C8,
- 0x72080DF5, 0x050F3D63, 0x9C066CD9, 0xEB015C4F,
- 0x7565C9EC, 0x0262F97A, 0x9B6BA8C0, 0xEC6C9856,
- 0x7CD385C7, 0x0BD4B551, 0x92DDE4EB, 0xE5DAD47D,
- 0x7BBE41DE, 0x0CB97148, 0x95B020F2, 0xE2B71064,
- 0x6FBF1D91, 0x18B82D07, 0x81B17CBD, 0xF6B64C2B,
- 0x68D2D988, 0x1FD5E91E, 0x86DCB8A4, 0xF1DB8832,
- 0x616495A3, 0x1663A535, 0x8F6AF48F, 0xF86DC419,
- 0x660951BA, 0x110E612C, 0x88073096, 0xFF000000
-};
-
+++ /dev/null
-/* sbni.h: definitions for a Granch SBNI12 driver, version 5.0.0
- * Written 2001 Denis I.Timofeev (timofeev@granch.ru)
- * This file is distributed under the GNU GPL
- */
-
-#ifndef SBNI_H
-#define SBNI_H
-
-#ifdef SBNI_DEBUG
-#define DP( A ) A
-#else
-#define DP( A )
-#endif
-
-
-/* We don't have official vendor id yet... */
-#define SBNI_PCI_VENDOR 0x55
-#define SBNI_PCI_DEVICE 0x9f
-
-#define ISA_MODE 0x00
-#define PCI_MODE 0x01
-
-#define SBNI_IO_EXTENT 4
-
-enum sbni_reg {
- CSR0 = 0,
- CSR1 = 1,
- DAT = 2
-};
-
-/* CSR0 mapping */
-enum {
- BU_EMP = 0x02,
- RC_CHK = 0x04,
- CT_ZER = 0x08,
- TR_REQ = 0x10,
- TR_RDY = 0x20,
- EN_INT = 0x40,
- RC_RDY = 0x80
-};
-
-
-/* CSR1 mapping */
-#define PR_RES 0x80
-
-struct sbni_csr1 {
-#ifdef __LITTLE_ENDIAN_BITFIELD
- u8 rxl : 5;
- u8 rate : 2;
- u8 : 1;
-#else
- u8 : 1;
- u8 rate : 2;
- u8 rxl : 5;
-#endif
-};
-
-/* fields in frame header */
-#define FRAME_ACK_MASK (unsigned short)0x7000
-#define FRAME_LEN_MASK (unsigned short)0x03FF
-#define FRAME_FIRST (unsigned short)0x8000
-#define FRAME_RETRY (unsigned short)0x0800
-
-#define FRAME_SENT_BAD (unsigned short)0x4000
-#define FRAME_SENT_OK (unsigned short)0x3000
-
-
-/* state flags */
-enum {
- FL_WAIT_ACK = 0x01,
- FL_NEED_RESEND = 0x02,
- FL_PREV_OK = 0x04,
- FL_SLOW_MODE = 0x08,
- FL_SECONDARY = 0x10,
-#ifdef CONFIG_SBNI_MULTILINE
- FL_SLAVE = 0x20,
-#endif
- FL_LINE_DOWN = 0x40
-};
-
-
-enum {
- DEFAULT_IOBASEADDR = 0x210,
- DEFAULT_INTERRUPTNUMBER = 5,
- DEFAULT_RATE = 0,
- DEFAULT_FRAME_LEN = 1012
-};
-
-#define DEF_RXL_DELTA -1
-#define DEF_RXL 0xf
-
-#define SBNI_SIG 0x5a
-
-#define SBNI_MIN_LEN 60 /* Shortest Ethernet frame without FCS */
-#define SBNI_MAX_FRAME 1023
-#define ETHER_MAX_LEN 1518
-
-#define SBNI_TIMEOUT (HZ/10)
-
-#define TR_ERROR_COUNT 32
-#define CHANGE_LEVEL_START_TICKS 4
-
-#define SBNI_MAX_NUM_CARDS 16
-
-/* internal SBNI-specific statistics */
-struct sbni_in_stats {
- u32 all_rx_number;
- u32 bad_rx_number;
- u32 timeout_number;
- u32 all_tx_number;
- u32 resend_tx_number;
-};
-
-/* SBNI ioctl params */
-#define SIOCDEVGETINSTATS SIOCDEVPRIVATE
-#define SIOCDEVRESINSTATS SIOCDEVPRIVATE+1
-#define SIOCDEVGHWSTATE SIOCDEVPRIVATE+2
-#define SIOCDEVSHWSTATE SIOCDEVPRIVATE+3
-#define SIOCDEVENSLAVE SIOCDEVPRIVATE+4
-#define SIOCDEVEMANSIPATE SIOCDEVPRIVATE+5
-
-
-/* data packet for SIOCDEVGHWSTATE/SIOCDEVSHWSTATE ioctl requests */
-struct sbni_flags {
- u32 rxl : 4;
- u32 rate : 2;
- u32 fixed_rxl : 1;
- u32 slow_mode : 1;
- u32 mac_addr : 24;
-};
-
-/*
- * CRC-32 stuff
- */
-#define CRC32(c,crc) (crc32tab[((size_t)(crc) ^ (c)) & 0xff] ^ (((crc) >> 8) & 0x00FFFFFF))
- /* CRC generator 0xEDB88320 */
- /* CRC remainder 0x2144DF1C */
- /* CRC initial value 0x00000000 */
-#define CRC32_REMAINDER 0x2144DF1C
-#define CRC32_INITIAL 0x00000000
-
-#ifndef __initdata
-#define __initdata
-#endif
-
-#endif
-
/* Assigned at module init. Guaranteed locally-administered and unicast. */
static u8 fake_router_bssid[ETH_ALEN] __ro_after_init = {};
+static void virt_wifi_inform_bss(struct wiphy *wiphy)
+{
+ u64 tsf = div_u64(ktime_get_boottime_ns(), 1000);
+ struct cfg80211_bss *informed_bss;
+ static const struct {
+ u8 tag;
+ u8 len;
+ u8 ssid[8];
+ } __packed ssid = {
+ .tag = WLAN_EID_SSID,
+ .len = 8,
+ .ssid = "VirtWifi",
+ };
+
+ informed_bss = cfg80211_inform_bss(wiphy, &channel_5ghz,
+ CFG80211_BSS_FTYPE_PRESP,
+ fake_router_bssid, tsf,
+ WLAN_CAPABILITY_ESS, 0,
+ (void *)&ssid, sizeof(ssid),
+ DBM_TO_MBM(-50), GFP_KERNEL);
+ cfg80211_put_bss(wiphy, informed_bss);
+}
+
/* Called with the rtnl lock held. */
static int virt_wifi_scan(struct wiphy *wiphy,
struct cfg80211_scan_request *request)
/* Acquires and releases the rdev BSS lock. */
static void virt_wifi_scan_result(struct work_struct *work)
{
- struct {
- u8 tag;
- u8 len;
- u8 ssid[8];
- } __packed ssid = {
- .tag = WLAN_EID_SSID, .len = 8, .ssid = "VirtWifi",
- };
- struct cfg80211_bss *informed_bss;
struct virt_wifi_wiphy_priv *priv =
container_of(work, struct virt_wifi_wiphy_priv,
scan_result.work);
struct wiphy *wiphy = priv_to_wiphy(priv);
struct cfg80211_scan_info scan_info = { .aborted = false };
- u64 tsf = div_u64(ktime_get_boottime_ns(), 1000);
- informed_bss = cfg80211_inform_bss(wiphy, &channel_5ghz,
- CFG80211_BSS_FTYPE_PRESP,
- fake_router_bssid, tsf,
- WLAN_CAPABILITY_ESS, 0,
- (void *)&ssid, sizeof(ssid),
- DBM_TO_MBM(-50), GFP_KERNEL);
- cfg80211_put_bss(wiphy, informed_bss);
+ virt_wifi_inform_bss(wiphy);
/* Schedules work which acquires and releases the rtnl lock. */
cfg80211_scan_done(priv->scan_request, &scan_info);
if (!could_schedule)
return -EBUSY;
- if (sme->bssid)
+ if (sme->bssid) {
ether_addr_copy(priv->connect_requested_bss, sme->bssid);
- else
+ } else {
+ virt_wifi_inform_bss(wiphy);
eth_zero_addr(priv->connect_requested_bss);
+ }
wiphy_debug(wiphy, "connect\n");
struct virt_wifi_netdev_priv *priv =
container_of(work, struct virt_wifi_netdev_priv, connect.work);
u8 *requested_bss = priv->connect_requested_bss;
- bool has_addr = !is_zero_ether_addr(requested_bss);
bool right_addr = ether_addr_equal(requested_bss, fake_router_bssid);
u16 status = WLAN_STATUS_SUCCESS;
- if (!priv->is_up || (has_addr && !right_addr))
+ if (is_zero_ether_addr(requested_bss))
+ requested_bss = NULL;
+
+ if (!priv->is_up || (requested_bss && !right_addr))
status = WLAN_STATUS_UNSPECIFIED_FAILURE;
else
priv->is_connected = true;
To compile this driver as a module, choose M here: the module will be
called mhi_wwan_ctrl.
+config MHI_WWAN_MBIM
+ tristate "MHI WWAN MBIM network driver for QCOM-based PCIe modems"
+ depends on MHI_BUS
+ help
+ MHI WWAN MBIM is a WWAN network driver for QCOM-based PCIe modems.
+ It implements MBIM over MHI, for IP data aggregation and muxing.
+ A default wwan0 network interface is created for MBIM data session
+ ID 0. Additional links can be created via wwan rtnetlink type.
+
+ To compile this driver as a module, choose M here: the module will be
+ called mhi_wwan_mbim.
+
config RPMSG_WWAN_CTRL
tristate "RPMSG WWAN control driver"
depends on RPMSG
obj-$(CONFIG_WWAN_HWSIM) += wwan_hwsim.o
obj-$(CONFIG_MHI_WWAN_CTRL) += mhi_wwan_ctrl.o
+obj-$(CONFIG_MHI_WWAN_MBIM) += mhi_wwan_mbim.o
obj-$(CONFIG_RPMSG_WWAN_CTRL) += rpmsg_wwan_ctrl.o
obj-$(CONFIG_IOSM) += iosm/
#define IOSM_CP_VERSION 0x0100UL
/* DL dir Aggregation support mask */
-#define DL_AGGR BIT(23)
+#define DL_AGGR BIT(9)
/* UL dir Aggregation support mask */
-#define UL_AGGR BIT(22)
+#define UL_AGGR BIT(8)
/* UL flow credit support mask */
#define UL_FLOW_CREDIT BIT(21)
return;
}
- ul_credits = fct->vfl.nr_of_bytes;
+ ul_credits = le32_to_cpu(fct->vfl.nr_of_bytes);
dev_dbg(ipc_mux->dev, "Flow_Credit:: if_id[%d] Old: %d Grants: %d",
if_id, ipc_mux->session[if_id].ul_flow_credits, ul_credits);
qlt->reserved[0] = 0;
qlt->reserved[1] = 0;
- qlt->vfl.nr_of_bytes = session->ul_list.qlen;
+ qlt->vfl.nr_of_bytes = cpu_to_le32(session->ul_list.qlen);
/* Add QLT to the transfer list. */
skb_queue_tail(&ipc_mux->channel->ul_list,
* @nr_of_bytes: Number of bytes available to transmit in the queue.
*/
struct mux_lite_vfl {
- u32 nr_of_bytes;
+ __le32 nr_of_bytes;
};
/**
}
if (p_td->buffer.address != IPC_CB(skb)->mapping) {
- dev_err(ipc_protocol->dev, "invalid buf=%p or skb=%p",
- (void *)p_td->buffer.address, skb->data);
+ dev_err(ipc_protocol->dev, "invalid buf=%llx or skb=%p",
+ (unsigned long long)p_td->buffer.address, skb->data);
ipc_pcie_kfree_skb(ipc_protocol->pcie, skb);
skb = NULL;
goto ret;
RCU_INIT_POINTER(ipc_wwan->sub_netlist[if_id], NULL);
/* unregistering includes synchronize_net() */
- unregister_netdevice(dev);
+ unregister_netdevice_queue(dev, head);
unlock:
mutex_unlock(&ipc_wwan->if_mutex);
int ret;
/* Start mhi device's channel(s) */
- ret = mhi_prepare_for_transfer(mhiwwan->mhi_dev);
+ ret = mhi_prepare_for_transfer(mhiwwan->mhi_dev, 0);
if (ret)
return ret;
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* MHI MBIM Network driver - Network/MBIM over MHI bus
+ *
+ * Copyright (C) 2021 Linaro Ltd <loic.poulain@linaro.org>
+ *
+ * This driver copy some code from cdc_ncm, which is:
+ * Copyright (C) ST-Ericsson 2010-2012
+ * and cdc_mbim, which is:
+ * Copyright (c) 2012 Smith Micro Software, Inc.
+ * Copyright (c) 2012 Bjørn Mork <bjorn@mork.no>
+ *
+ */
+
+#include <linux/ethtool.h>
+#include <linux/if_arp.h>
+#include <linux/if_vlan.h>
+#include <linux/ip.h>
+#include <linux/mhi.h>
+#include <linux/mii.h>
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/u64_stats_sync.h>
+#include <linux/usb.h>
+#include <linux/usb/cdc.h>
+#include <linux/usb/usbnet.h>
+#include <linux/usb/cdc_ncm.h>
+#include <linux/wwan.h>
+
+/* 3500 allows to optimize skb allocation, the skbs will basically fit in
+ * one 4K page. Large MBIM packets will simply be split over several MHI
+ * transfers and chained by the MHI net layer (zerocopy).
+ */
+#define MHI_DEFAULT_MRU 3500
+
+#define MHI_MBIM_DEFAULT_MTU 1500
+#define MHI_MAX_BUF_SZ 0xffff
+
+#define MBIM_NDP16_SIGN_MASK 0x00ffffff
+
+#define MHI_MBIM_LINK_HASH_SIZE 8
+#define LINK_HASH(session) ((session) % MHI_MBIM_LINK_HASH_SIZE)
+
+struct mhi_mbim_link {
+ struct mhi_mbim_context *mbim;
+ struct net_device *ndev;
+ unsigned int session;
+
+ /* stats */
+ u64_stats_t rx_packets;
+ u64_stats_t rx_bytes;
+ u64_stats_t rx_errors;
+ u64_stats_t tx_packets;
+ u64_stats_t tx_bytes;
+ u64_stats_t tx_errors;
+ u64_stats_t tx_dropped;
+ struct u64_stats_sync tx_syncp;
+ struct u64_stats_sync rx_syncp;
+
+ struct hlist_node hlnode;
+};
+
+struct mhi_mbim_context {
+ struct mhi_device *mdev;
+ struct sk_buff *skbagg_head;
+ struct sk_buff *skbagg_tail;
+ unsigned int mru;
+ u32 rx_queue_sz;
+ u16 rx_seq;
+ u16 tx_seq;
+ struct delayed_work rx_refill;
+ spinlock_t tx_lock;
+ struct hlist_head link_list[MHI_MBIM_LINK_HASH_SIZE];
+};
+
+struct mbim_tx_hdr {
+ struct usb_cdc_ncm_nth16 nth16;
+ struct usb_cdc_ncm_ndp16 ndp16;
+ struct usb_cdc_ncm_dpe16 dpe16[2];
+} __packed;
+
+static struct mhi_mbim_link *mhi_mbim_get_link_rcu(struct mhi_mbim_context *mbim,
+ unsigned int session)
+{
+ struct mhi_mbim_link *link;
+
+ hlist_for_each_entry_rcu(link, &mbim->link_list[LINK_HASH(session)], hlnode) {
+ if (link->session == session)
+ return link;
+ }
+
+ return NULL;
+}
+
+static struct sk_buff *mbim_tx_fixup(struct sk_buff *skb, unsigned int session,
+ u16 tx_seq)
+{
+ unsigned int dgram_size = skb->len;
+ struct usb_cdc_ncm_nth16 *nth16;
+ struct usb_cdc_ncm_ndp16 *ndp16;
+ struct mbim_tx_hdr *mbim_hdr;
+
+ /* Only one NDP is sent, containing the IP packet (no aggregation) */
+
+ /* Ensure we have enough headroom for crafting MBIM header */
+ if (skb_cow_head(skb, sizeof(struct mbim_tx_hdr))) {
+ dev_kfree_skb_any(skb);
+ return NULL;
+ }
+
+ mbim_hdr = skb_push(skb, sizeof(struct mbim_tx_hdr));
+
+ /* Fill NTB header */
+ nth16 = &mbim_hdr->nth16;
+ nth16->dwSignature = cpu_to_le32(USB_CDC_NCM_NTH16_SIGN);
+ nth16->wHeaderLength = cpu_to_le16(sizeof(struct usb_cdc_ncm_nth16));
+ nth16->wSequence = cpu_to_le16(tx_seq);
+ nth16->wBlockLength = cpu_to_le16(skb->len);
+ nth16->wNdpIndex = cpu_to_le16(sizeof(struct usb_cdc_ncm_nth16));
+
+ /* Fill the unique NDP */
+ ndp16 = &mbim_hdr->ndp16;
+ ndp16->dwSignature = cpu_to_le32(USB_CDC_MBIM_NDP16_IPS_SIGN | (session << 24));
+ ndp16->wLength = cpu_to_le16(sizeof(struct usb_cdc_ncm_ndp16)
+ + sizeof(struct usb_cdc_ncm_dpe16) * 2);
+ ndp16->wNextNdpIndex = 0;
+
+ /* Datagram follows the mbim header */
+ ndp16->dpe16[0].wDatagramIndex = cpu_to_le16(sizeof(struct mbim_tx_hdr));
+ ndp16->dpe16[0].wDatagramLength = cpu_to_le16(dgram_size);
+
+ /* null termination */
+ ndp16->dpe16[1].wDatagramIndex = 0;
+ ndp16->dpe16[1].wDatagramLength = 0;
+
+ return skb;
+}
+
+static netdev_tx_t mhi_mbim_ndo_xmit(struct sk_buff *skb, struct net_device *ndev)
+{
+ struct mhi_mbim_link *link = wwan_netdev_drvpriv(ndev);
+ struct mhi_mbim_context *mbim = link->mbim;
+ unsigned long flags;
+ int err = -ENOMEM;
+
+ /* Serialize MHI channel queuing and MBIM seq */
+ spin_lock_irqsave(&mbim->tx_lock, flags);
+
+ skb = mbim_tx_fixup(skb, link->session, mbim->tx_seq);
+ if (unlikely(!skb))
+ goto exit_unlock;
+
+ err = mhi_queue_skb(mbim->mdev, DMA_TO_DEVICE, skb, skb->len, MHI_EOT);
+
+ if (mhi_queue_is_full(mbim->mdev, DMA_TO_DEVICE))
+ netif_stop_queue(ndev);
+
+ if (!err)
+ mbim->tx_seq++;
+
+exit_unlock:
+ spin_unlock_irqrestore(&mbim->tx_lock, flags);
+
+ if (unlikely(err)) {
+ net_err_ratelimited("%s: Failed to queue TX buf (%d)\n",
+ ndev->name, err);
+ dev_kfree_skb_any(skb);
+ goto exit_drop;
+ }
+
+ return NETDEV_TX_OK;
+
+exit_drop:
+ u64_stats_update_begin(&link->tx_syncp);
+ u64_stats_inc(&link->tx_dropped);
+ u64_stats_update_end(&link->tx_syncp);
+
+ return NETDEV_TX_OK;
+}
+
+static int mbim_rx_verify_nth16(struct mhi_mbim_context *mbim, struct sk_buff *skb)
+{
+ struct usb_cdc_ncm_nth16 *nth16;
+ int len;
+
+ if (skb->len < sizeof(struct usb_cdc_ncm_nth16) +
+ sizeof(struct usb_cdc_ncm_ndp16)) {
+ net_err_ratelimited("frame too short\n");
+ return -EINVAL;
+ }
+
+ nth16 = (struct usb_cdc_ncm_nth16 *)skb->data;
+
+ if (nth16->dwSignature != cpu_to_le32(USB_CDC_NCM_NTH16_SIGN)) {
+ net_err_ratelimited("invalid NTH16 signature <%#010x>\n",
+ le32_to_cpu(nth16->dwSignature));
+ return -EINVAL;
+ }
+
+ /* No limit on the block length, except the size of the data pkt */
+ len = le16_to_cpu(nth16->wBlockLength);
+ if (len > skb->len) {
+ net_err_ratelimited("NTB does not fit into the skb %u/%u\n",
+ len, skb->len);
+ return -EINVAL;
+ }
+
+ if (mbim->rx_seq + 1 != le16_to_cpu(nth16->wSequence) &&
+ (mbim->rx_seq || le16_to_cpu(nth16->wSequence)) &&
+ !(mbim->rx_seq == 0xffff && !le16_to_cpu(nth16->wSequence))) {
+ net_err_ratelimited("sequence number glitch prev=%d curr=%d\n",
+ mbim->rx_seq, le16_to_cpu(nth16->wSequence));
+ }
+ mbim->rx_seq = le16_to_cpu(nth16->wSequence);
+
+ return le16_to_cpu(nth16->wNdpIndex);
+}
+
+static int mbim_rx_verify_ndp16(struct sk_buff *skb, struct usb_cdc_ncm_ndp16 *ndp16)
+{
+ int ret;
+
+ if (le16_to_cpu(ndp16->wLength) < USB_CDC_NCM_NDP16_LENGTH_MIN) {
+ net_err_ratelimited("invalid DPT16 length <%u>\n",
+ le16_to_cpu(ndp16->wLength));
+ return -EINVAL;
+ }
+
+ ret = ((le16_to_cpu(ndp16->wLength) - sizeof(struct usb_cdc_ncm_ndp16))
+ / sizeof(struct usb_cdc_ncm_dpe16));
+ ret--; /* Last entry is always a NULL terminator */
+
+ if (sizeof(struct usb_cdc_ncm_ndp16) +
+ ret * sizeof(struct usb_cdc_ncm_dpe16) > skb->len) {
+ net_err_ratelimited("Invalid nframes = %d\n", ret);
+ return -EINVAL;
+ }
+
+ return ret;
+}
+
+static void mhi_mbim_rx(struct mhi_mbim_context *mbim, struct sk_buff *skb)
+{
+ int ndpoffset;
+
+ /* Check NTB header and retrieve first NDP offset */
+ ndpoffset = mbim_rx_verify_nth16(mbim, skb);
+ if (ndpoffset < 0) {
+ net_err_ratelimited("mbim: Incorrect NTB header\n");
+ goto error;
+ }
+
+ /* Process each NDP */
+ while (1) {
+ struct usb_cdc_ncm_ndp16 ndp16;
+ struct usb_cdc_ncm_dpe16 dpe16;
+ struct mhi_mbim_link *link;
+ int nframes, n, dpeoffset;
+ unsigned int session;
+
+ if (skb_copy_bits(skb, ndpoffset, &ndp16, sizeof(ndp16))) {
+ net_err_ratelimited("mbim: Incorrect NDP offset (%u)\n",
+ ndpoffset);
+ goto error;
+ }
+
+ /* Check NDP header and retrieve number of datagrams */
+ nframes = mbim_rx_verify_ndp16(skb, &ndp16);
+ if (nframes < 0) {
+ net_err_ratelimited("mbim: Incorrect NDP16\n");
+ goto error;
+ }
+
+ /* Only IP data type supported, no DSS in MHI context */
+ if ((ndp16.dwSignature & cpu_to_le32(MBIM_NDP16_SIGN_MASK))
+ != cpu_to_le32(USB_CDC_MBIM_NDP16_IPS_SIGN)) {
+ net_err_ratelimited("mbim: Unsupported NDP type\n");
+ goto next_ndp;
+ }
+
+ session = (le32_to_cpu(ndp16.dwSignature) & ~MBIM_NDP16_SIGN_MASK) >> 24;
+
+ rcu_read_lock();
+
+ link = mhi_mbim_get_link_rcu(mbim, session);
+ if (!link) {
+ net_err_ratelimited("mbim: bad packet session (%u)\n", session);
+ goto unlock;
+ }
+
+ /* de-aggregate and deliver IP packets */
+ dpeoffset = ndpoffset + sizeof(struct usb_cdc_ncm_ndp16);
+ for (n = 0; n < nframes; n++, dpeoffset += sizeof(dpe16)) {
+ u16 dgram_offset, dgram_len;
+ struct sk_buff *skbn;
+
+ if (skb_copy_bits(skb, dpeoffset, &dpe16, sizeof(dpe16)))
+ break;
+
+ dgram_offset = le16_to_cpu(dpe16.wDatagramIndex);
+ dgram_len = le16_to_cpu(dpe16.wDatagramLength);
+
+ if (!dgram_offset || !dgram_len)
+ break; /* null terminator */
+
+ skbn = netdev_alloc_skb(link->ndev, dgram_len);
+ if (!skbn)
+ continue;
+
+ skb_put(skbn, dgram_len);
+ skb_copy_bits(skb, dgram_offset, skbn->data, dgram_len);
+
+ switch (skbn->data[0] & 0xf0) {
+ case 0x40:
+ skbn->protocol = htons(ETH_P_IP);
+ break;
+ case 0x60:
+ skbn->protocol = htons(ETH_P_IPV6);
+ break;
+ default:
+ net_err_ratelimited("%s: unknown protocol\n",
+ link->ndev->name);
+ dev_kfree_skb_any(skbn);
+ u64_stats_update_begin(&link->rx_syncp);
+ u64_stats_inc(&link->rx_errors);
+ u64_stats_update_end(&link->rx_syncp);
+ continue;
+ }
+
+ u64_stats_update_begin(&link->rx_syncp);
+ u64_stats_inc(&link->rx_packets);
+ u64_stats_add(&link->rx_bytes, skbn->len);
+ u64_stats_update_end(&link->rx_syncp);
+
+ netif_rx(skbn);
+ }
+unlock:
+ rcu_read_unlock();
+next_ndp:
+ /* Other NDP to process? */
+ ndpoffset = (int)le16_to_cpu(ndp16.wNextNdpIndex);
+ if (!ndpoffset)
+ break;
+ }
+
+ /* free skb */
+ dev_consume_skb_any(skb);
+ return;
+error:
+ dev_kfree_skb_any(skb);
+}
+
+static struct sk_buff *mhi_net_skb_agg(struct mhi_mbim_context *mbim,
+ struct sk_buff *skb)
+{
+ struct sk_buff *head = mbim->skbagg_head;
+ struct sk_buff *tail = mbim->skbagg_tail;
+
+ /* This is non-paged skb chaining using frag_list */
+ if (!head) {
+ mbim->skbagg_head = skb;
+ return skb;
+ }
+
+ if (!skb_shinfo(head)->frag_list)
+ skb_shinfo(head)->frag_list = skb;
+ else
+ tail->next = skb;
+
+ head->len += skb->len;
+ head->data_len += skb->len;
+ head->truesize += skb->truesize;
+
+ mbim->skbagg_tail = skb;
+
+ return mbim->skbagg_head;
+}
+
+static void mhi_net_rx_refill_work(struct work_struct *work)
+{
+ struct mhi_mbim_context *mbim = container_of(work, struct mhi_mbim_context,
+ rx_refill.work);
+ struct mhi_device *mdev = mbim->mdev;
+ int err;
+
+ while (!mhi_queue_is_full(mdev, DMA_FROM_DEVICE)) {
+ struct sk_buff *skb = alloc_skb(MHI_DEFAULT_MRU, GFP_KERNEL);
+
+ if (unlikely(!skb))
+ break;
+
+ err = mhi_queue_skb(mdev, DMA_FROM_DEVICE, skb,
+ MHI_DEFAULT_MRU, MHI_EOT);
+ if (unlikely(err)) {
+ kfree_skb(skb);
+ break;
+ }
+
+ /* Do not hog the CPU if rx buffers are consumed faster than
+ * queued (unlikely).
+ */
+ cond_resched();
+ }
+
+ /* If we're still starved of rx buffers, reschedule later */
+ if (mhi_get_free_desc_count(mdev, DMA_FROM_DEVICE) == mbim->rx_queue_sz)
+ schedule_delayed_work(&mbim->rx_refill, HZ / 2);
+}
+
+static void mhi_mbim_dl_callback(struct mhi_device *mhi_dev,
+ struct mhi_result *mhi_res)
+{
+ struct mhi_mbim_context *mbim = dev_get_drvdata(&mhi_dev->dev);
+ struct sk_buff *skb = mhi_res->buf_addr;
+ int free_desc_count;
+
+ free_desc_count = mhi_get_free_desc_count(mhi_dev, DMA_FROM_DEVICE);
+
+ if (unlikely(mhi_res->transaction_status)) {
+ switch (mhi_res->transaction_status) {
+ case -EOVERFLOW:
+ /* Packet has been split over multiple transfers */
+ skb_put(skb, mhi_res->bytes_xferd);
+ mhi_net_skb_agg(mbim, skb);
+ break;
+ case -ENOTCONN:
+ /* MHI layer stopping/resetting the DL channel */
+ dev_kfree_skb_any(skb);
+ return;
+ default:
+ /* Unknown error, simply drop */
+ dev_kfree_skb_any(skb);
+ }
+ } else {
+ skb_put(skb, mhi_res->bytes_xferd);
+
+ if (mbim->skbagg_head) {
+ /* Aggregate the final fragment */
+ skb = mhi_net_skb_agg(mbim, skb);
+ mbim->skbagg_head = NULL;
+ }
+
+ mhi_mbim_rx(mbim, skb);
+ }
+
+ /* Refill if RX buffers queue becomes low */
+ if (free_desc_count >= mbim->rx_queue_sz / 2)
+ schedule_delayed_work(&mbim->rx_refill, 0);
+}
+
+static void mhi_mbim_ndo_get_stats64(struct net_device *ndev,
+ struct rtnl_link_stats64 *stats)
+{
+ struct mhi_mbim_link *link = wwan_netdev_drvpriv(ndev);
+ unsigned int start;
+
+ do {
+ start = u64_stats_fetch_begin_irq(&link->rx_syncp);
+ stats->rx_packets = u64_stats_read(&link->rx_packets);
+ stats->rx_bytes = u64_stats_read(&link->rx_bytes);
+ stats->rx_errors = u64_stats_read(&link->rx_errors);
+ } while (u64_stats_fetch_retry_irq(&link->rx_syncp, start));
+
+ do {
+ start = u64_stats_fetch_begin_irq(&link->tx_syncp);
+ stats->tx_packets = u64_stats_read(&link->tx_packets);
+ stats->tx_bytes = u64_stats_read(&link->tx_bytes);
+ stats->tx_errors = u64_stats_read(&link->tx_errors);
+ stats->tx_dropped = u64_stats_read(&link->tx_dropped);
+ } while (u64_stats_fetch_retry_irq(&link->tx_syncp, start));
+}
+
+static void mhi_mbim_ul_callback(struct mhi_device *mhi_dev,
+ struct mhi_result *mhi_res)
+{
+ struct mhi_mbim_context *mbim = dev_get_drvdata(&mhi_dev->dev);
+ struct sk_buff *skb = mhi_res->buf_addr;
+ struct net_device *ndev = skb->dev;
+ struct mhi_mbim_link *link = wwan_netdev_drvpriv(ndev);
+
+ /* Hardware has consumed the buffer, so free the skb (which is not
+ * freed by the MHI stack) and perform accounting.
+ */
+ dev_consume_skb_any(skb);
+
+ u64_stats_update_begin(&link->tx_syncp);
+ if (unlikely(mhi_res->transaction_status)) {
+ /* MHI layer stopping/resetting the UL channel */
+ if (mhi_res->transaction_status == -ENOTCONN) {
+ u64_stats_update_end(&link->tx_syncp);
+ return;
+ }
+
+ u64_stats_inc(&link->tx_errors);
+ } else {
+ u64_stats_inc(&link->tx_packets);
+ u64_stats_add(&link->tx_bytes, mhi_res->bytes_xferd);
+ }
+ u64_stats_update_end(&link->tx_syncp);
+
+ if (netif_queue_stopped(ndev) && !mhi_queue_is_full(mbim->mdev, DMA_TO_DEVICE))
+ netif_wake_queue(ndev);
+}
+
+static int mhi_mbim_ndo_open(struct net_device *ndev)
+{
+ struct mhi_mbim_link *link = wwan_netdev_drvpriv(ndev);
+
+ /* Feed the MHI rx buffer pool */
+ schedule_delayed_work(&link->mbim->rx_refill, 0);
+
+ /* Carrier is established via out-of-band channel (e.g. qmi) */
+ netif_carrier_on(ndev);
+
+ netif_start_queue(ndev);
+
+ return 0;
+}
+
+static int mhi_mbim_ndo_stop(struct net_device *ndev)
+{
+ netif_stop_queue(ndev);
+ netif_carrier_off(ndev);
+
+ return 0;
+}
+
+static const struct net_device_ops mhi_mbim_ndo = {
+ .ndo_open = mhi_mbim_ndo_open,
+ .ndo_stop = mhi_mbim_ndo_stop,
+ .ndo_start_xmit = mhi_mbim_ndo_xmit,
+ .ndo_get_stats64 = mhi_mbim_ndo_get_stats64,
+};
+
+static int mhi_mbim_newlink(void *ctxt, struct net_device *ndev, u32 if_id,
+ struct netlink_ext_ack *extack)
+{
+ struct mhi_mbim_link *link = wwan_netdev_drvpriv(ndev);
+ struct mhi_mbim_context *mbim = ctxt;
+
+ link->session = if_id;
+ link->mbim = mbim;
+ link->ndev = ndev;
+ u64_stats_init(&link->rx_syncp);
+ u64_stats_init(&link->tx_syncp);
+
+ rcu_read_lock();
+ if (mhi_mbim_get_link_rcu(mbim, if_id)) {
+ rcu_read_unlock();
+ return -EEXIST;
+ }
+ rcu_read_unlock();
+
+ /* Already protected by RTNL lock */
+ hlist_add_head_rcu(&link->hlnode, &mbim->link_list[LINK_HASH(if_id)]);
+
+ return register_netdevice(ndev);
+}
+
+static void mhi_mbim_dellink(void *ctxt, struct net_device *ndev,
+ struct list_head *head)
+{
+ struct mhi_mbim_link *link = wwan_netdev_drvpriv(ndev);
+
+ hlist_del_init_rcu(&link->hlnode);
+ synchronize_rcu();
+
+ unregister_netdevice_queue(ndev, head);
+}
+
+static void mhi_mbim_setup(struct net_device *ndev)
+{
+ ndev->header_ops = NULL; /* No header */
+ ndev->type = ARPHRD_RAWIP;
+ ndev->needed_headroom = sizeof(struct mbim_tx_hdr);
+ ndev->hard_header_len = 0;
+ ndev->addr_len = 0;
+ ndev->flags = IFF_POINTOPOINT | IFF_NOARP;
+ ndev->netdev_ops = &mhi_mbim_ndo;
+ ndev->mtu = MHI_MBIM_DEFAULT_MTU;
+ ndev->min_mtu = ETH_MIN_MTU;
+ ndev->max_mtu = MHI_MAX_BUF_SZ - ndev->needed_headroom;
+ ndev->tx_queue_len = 1000;
+}
+
+static const struct wwan_ops mhi_mbim_wwan_ops = {
+ .priv_size = sizeof(struct mhi_mbim_link),
+ .setup = mhi_mbim_setup,
+ .newlink = mhi_mbim_newlink,
+ .dellink = mhi_mbim_dellink,
+};
+
+static int mhi_mbim_probe(struct mhi_device *mhi_dev, const struct mhi_device_id *id)
+{
+ struct mhi_controller *cntrl = mhi_dev->mhi_cntrl;
+ struct mhi_mbim_context *mbim;
+ int err;
+
+ mbim = devm_kzalloc(&mhi_dev->dev, sizeof(*mbim), GFP_KERNEL);
+ if (!mbim)
+ return -ENOMEM;
+
+ spin_lock_init(&mbim->tx_lock);
+ dev_set_drvdata(&mhi_dev->dev, mbim);
+ mbim->mdev = mhi_dev;
+ mbim->mru = mhi_dev->mhi_cntrl->mru ? mhi_dev->mhi_cntrl->mru : MHI_DEFAULT_MRU;
+
+ INIT_DELAYED_WORK(&mbim->rx_refill, mhi_net_rx_refill_work);
+
+ /* Start MHI channels */
+ err = mhi_prepare_for_transfer(mhi_dev, 0);
+ if (err)
+ return err;
+
+ /* Number of transfer descriptors determines size of the queue */
+ mbim->rx_queue_sz = mhi_get_free_desc_count(mhi_dev, DMA_FROM_DEVICE);
+
+ /* Register wwan link ops with MHI controller representing WWAN instance */
+ return wwan_register_ops(&cntrl->mhi_dev->dev, &mhi_mbim_wwan_ops, mbim, 0);
+}
+
+static void mhi_mbim_remove(struct mhi_device *mhi_dev)
+{
+ struct mhi_mbim_context *mbim = dev_get_drvdata(&mhi_dev->dev);
+ struct mhi_controller *cntrl = mhi_dev->mhi_cntrl;
+
+ mhi_unprepare_from_transfer(mhi_dev);
+ cancel_delayed_work_sync(&mbim->rx_refill);
+ wwan_unregister_ops(&cntrl->mhi_dev->dev);
+ kfree_skb(mbim->skbagg_head);
+ dev_set_drvdata(&mhi_dev->dev, NULL);
+}
+
+static const struct mhi_device_id mhi_mbim_id_table[] = {
+ /* Hardware accelerated data PATH (to modem IPA), MBIM protocol */
+ { .chan = "IP_HW0_MBIM", .driver_data = 0 },
+ {}
+};
+MODULE_DEVICE_TABLE(mhi, mhi_mbim_id_table);
+
+static struct mhi_driver mhi_mbim_driver = {
+ .probe = mhi_mbim_probe,
+ .remove = mhi_mbim_remove,
+ .dl_xfer_cb = mhi_mbim_dl_callback,
+ .ul_xfer_cb = mhi_mbim_ul_callback,
+ .id_table = mhi_mbim_id_table,
+ .driver = {
+ .name = "mhi_wwan_mbim",
+ .owner = THIS_MODULE,
+ },
+};
+
+module_mhi_driver(mhi_mbim_driver);
+
+MODULE_AUTHOR("Loic Poulain <loic.poulain@linaro.org>");
+MODULE_DESCRIPTION("Network/MBIM over MHI");
+MODULE_LICENSE("GPL v2");
if (!IS_ERR(skb))
dev_kfree_skb(skb);
-
- skb = ERR_PTR(-ENODEV);
+ return;
}
dev->cb(dev->nfc_digital_dev, dev->arg, skb);
tfm = crypto_alloc_shash("sha1", 0, 0);
if (IS_ERR(tfm)) {
dev_err(&fw_info->ndev->nfc_dev->dev,
- "Cannot allocate shash (code=%d)\n", ret);
+ "Cannot allocate shash (code=%pe)\n", tfm);
return PTR_ERR(tfm);
}
cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req)));
cmnd->write_zeroes.length =
cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1);
- cmnd->write_zeroes.control = 0;
+ if (nvme_ns_has_pi(ns))
+ cmnd->write_zeroes.control = cpu_to_le16(NVME_RW_PRINFO_PRACT);
+ else
+ cmnd->write_zeroes.control = 0;
return BLK_STS_OK;
}
static void nvme_ns_remove(struct nvme_ns *ns)
{
+ bool last_path = false;
+
if (test_and_set_bit(NVME_NS_REMOVING, &ns->flags))
return;
mutex_lock(&ns->ctrl->subsys->lock);
list_del_rcu(&ns->siblings);
- if (list_empty(&ns->head->list))
- list_del_init(&ns->head->entry);
mutex_unlock(&ns->ctrl->subsys->lock);
synchronize_rcu(); /* guarantee not available in head->list */
list_del_init(&ns->list);
up_write(&ns->ctrl->namespaces_rwsem);
- nvme_mpath_check_last_path(ns);
+ /* Synchronize with nvme_init_ns_head() */
+ mutex_lock(&ns->head->subsys->lock);
+ if (list_empty(&ns->head->list)) {
+ list_del_init(&ns->head->entry);
+ last_path = true;
+ }
+ mutex_unlock(&ns->head->subsys->lock);
+ if (last_path)
+ nvme_mpath_shutdown_disk(ns->head);
nvme_put_ns(ns);
}
#endif
}
-void nvme_mpath_remove_disk(struct nvme_ns_head *head)
+void nvme_mpath_shutdown_disk(struct nvme_ns_head *head)
{
if (!head->disk)
return;
+ kblockd_schedule_work(&head->requeue_work);
if (head->disk->flags & GENHD_FL_UP) {
nvme_cdev_del(&head->cdev, &head->cdev_device);
del_gendisk(head->disk);
}
+}
+
+void nvme_mpath_remove_disk(struct nvme_ns_head *head)
+{
+ if (!head->disk)
+ return;
blk_set_queue_dying(head->disk->queue);
/* make sure all pending bios are cleaned up */
kblockd_schedule_work(&head->requeue_work);
void nvme_mpath_stop(struct nvme_ctrl *ctrl);
bool nvme_mpath_clear_current_path(struct nvme_ns *ns);
void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl);
-
-static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
-{
- struct nvme_ns_head *head = ns->head;
-
- if (head->disk && list_empty(&head->list))
- kblockd_schedule_work(&head->requeue_work);
-}
+void nvme_mpath_shutdown_disk(struct nvme_ns_head *head);
static inline void nvme_trace_bio_complete(struct request *req)
{
static inline void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl)
{
}
-static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
+static inline void nvme_mpath_shutdown_disk(struct nvme_ns_head *head)
{
}
static inline void nvme_trace_bio_complete(struct request *req)
bool was_suspend = !!(dev->ctrl.ctrl_config & NVME_CC_SHN_NORMAL);
int result;
- if (WARN_ON(dev->ctrl.state != NVME_CTRL_RESETTING)) {
+ if (dev->ctrl.state != NVME_CTRL_RESETTING) {
+ dev_warn(dev->ctrl.device, "ctrl state %d is not RESETTING\n",
+ dev->ctrl.state);
result = -ENODEV;
goto out;
}
__field(u8, fctype)
__field(u16, cid)
__field(u32, nsid)
- __field(u64, metadata)
+ __field(bool, metadata)
__array(u8, cdw10, 24)
),
TP_fast_assign(
__entry->flags = cmd->common.flags;
__entry->cid = cmd->common.command_id;
__entry->nsid = le32_to_cpu(cmd->common.nsid);
- __entry->metadata = le64_to_cpu(cmd->common.metadata);
+ __entry->metadata = !!blk_integrity_rq(req);
__entry->fctype = cmd->fabrics.fctype;
__assign_disk_name(__entry->disk, req->rq_disk);
memcpy(__entry->cdw10, &cmd->common.cdw10,
sizeof(__entry->cdw10));
),
- TP_printk("nvme%d: %sqid=%d, cmdid=%u, nsid=%u, flags=0x%x, meta=0x%llx, cmd=(%s %s)",
+ TP_printk("nvme%d: %sqid=%d, cmdid=%u, nsid=%u, flags=0x%x, meta=0x%x, cmd=(%s %s)",
__entry->ctrl_id, __print_disk_name(__entry->disk),
__entry->qid, __entry->cid, __entry->nsid,
__entry->flags, __entry->metadata,
for (i = 0; i < socket_count; i++) {
sockets[i].card_state = 1; /* 1 = present but empty */
sockets[i].io_base = pci_resource_start(dev, 0);
+ sockets[i].dev = dev;
sockets[i].socket.features |= SS_CAP_PCCARD;
sockets[i].socket.map_size = 0x1000;
sockets[i].socket.irq_mask = 0;
#define AMD_PMC_RESULT_CMD_UNKNOWN 0xFE
#define AMD_PMC_RESULT_FAILED 0xFF
+/* FCH SSC Registers */
+#define FCH_S0I3_ENTRY_TIME_L_OFFSET 0x30
+#define FCH_S0I3_ENTRY_TIME_H_OFFSET 0x34
+#define FCH_S0I3_EXIT_TIME_L_OFFSET 0x38
+#define FCH_S0I3_EXIT_TIME_H_OFFSET 0x3C
+#define FCH_SSC_MAPPING_SIZE 0x800
+#define FCH_BASE_PHY_ADDR_LOW 0xFED81100
+#define FCH_BASE_PHY_ADDR_HIGH 0x00000000
+
+/* SMU Message Definations */
+#define SMU_MSG_GETSMUVERSION 0x02
+#define SMU_MSG_LOG_GETDRAM_ADDR_HI 0x04
+#define SMU_MSG_LOG_GETDRAM_ADDR_LO 0x05
+#define SMU_MSG_LOG_START 0x06
+#define SMU_MSG_LOG_RESET 0x07
+#define SMU_MSG_LOG_DUMP_DATA 0x08
+#define SMU_MSG_GET_SUP_CONSTRAINTS 0x09
/* List of supported CPU ids */
#define AMD_CPU_ID_RV 0x15D0
#define AMD_CPU_ID_RN 0x1630
#define AMD_CPU_ID_PCO AMD_CPU_ID_RV
#define AMD_CPU_ID_CZN AMD_CPU_ID_RN
+#define AMD_CPU_ID_YC 0x14B5
-#define AMD_SMU_FW_VERSION 0x0
#define PMC_MSG_DELAY_MIN_US 100
#define RESPONSE_REGISTER_LOOP_MAX 200
+#define SOC_SUBSYSTEM_IP_MAX 12
+#define DELAY_MIN_US 2000
+#define DELAY_MAX_US 3000
enum amd_pmc_def {
MSG_TEST = 0x01,
MSG_OS_HINT_PCO,
MSG_OS_HINT_RN,
};
+struct amd_pmc_bit_map {
+ const char *name;
+ u32 bit_mask;
+};
+
+static const struct amd_pmc_bit_map soc15_ip_blk[] = {
+ {"DISPLAY", BIT(0)},
+ {"CPU", BIT(1)},
+ {"GFX", BIT(2)},
+ {"VDD", BIT(3)},
+ {"ACP", BIT(4)},
+ {"VCN", BIT(5)},
+ {"ISP", BIT(6)},
+ {"NBIO", BIT(7)},
+ {"DF", BIT(8)},
+ {"USB0", BIT(9)},
+ {"USB1", BIT(10)},
+ {"LAPIC", BIT(11)},
+ {}
+};
+
struct amd_pmc_dev {
void __iomem *regbase;
- void __iomem *smu_base;
+ void __iomem *smu_virt_addr;
+ void __iomem *fch_virt_addr;
u32 base_addr;
u32 cpu_id;
+ u32 active_ips;
struct device *dev;
+ struct mutex lock; /* generic mutex lock */
#if IS_ENABLED(CONFIG_DEBUG_FS)
struct dentry *dbgfs_dir;
#endif /* CONFIG_DEBUG_FS */
};
static struct amd_pmc_dev pmc;
+static int amd_pmc_send_cmd(struct amd_pmc_dev *dev, bool set, u32 *data, u8 msg, bool ret);
static inline u32 amd_pmc_reg_read(struct amd_pmc_dev *dev, int reg_offset)
{
iowrite32(val, dev->regbase + reg_offset);
}
+struct smu_metrics {
+ u32 table_version;
+ u32 hint_count;
+ u32 s0i3_cyclecount;
+ u32 timein_s0i2;
+ u64 timeentering_s0i3_lastcapture;
+ u64 timeentering_s0i3_totaltime;
+ u64 timeto_resume_to_os_lastcapture;
+ u64 timeto_resume_to_os_totaltime;
+ u64 timein_s0i3_lastcapture;
+ u64 timein_s0i3_totaltime;
+ u64 timein_swdrips_lastcapture;
+ u64 timein_swdrips_totaltime;
+ u64 timecondition_notmet_lastcapture[SOC_SUBSYSTEM_IP_MAX];
+ u64 timecondition_notmet_totaltime[SOC_SUBSYSTEM_IP_MAX];
+} __packed;
+
#ifdef CONFIG_DEBUG_FS
static int smu_fw_info_show(struct seq_file *s, void *unused)
{
struct amd_pmc_dev *dev = s->private;
- u32 value;
+ struct smu_metrics table;
+ int idx;
+
+ if (dev->cpu_id == AMD_CPU_ID_PCO)
+ return -EINVAL;
+
+ memcpy_fromio(&table, dev->smu_virt_addr, sizeof(struct smu_metrics));
+
+ seq_puts(s, "\n=== SMU Statistics ===\n");
+ seq_printf(s, "Table Version: %d\n", table.table_version);
+ seq_printf(s, "Hint Count: %d\n", table.hint_count);
+ seq_printf(s, "S0i3 Cycle Count: %d\n", table.s0i3_cyclecount);
+ seq_printf(s, "Time (in us) to S0i3: %lld\n", table.timeentering_s0i3_lastcapture);
+ seq_printf(s, "Time (in us) in S0i3: %lld\n", table.timein_s0i3_lastcapture);
+
+ seq_puts(s, "\n=== Active time (in us) ===\n");
+ for (idx = 0 ; idx < SOC_SUBSYSTEM_IP_MAX ; idx++) {
+ if (soc15_ip_blk[idx].bit_mask & dev->active_ips)
+ seq_printf(s, "%-8s : %lld\n", soc15_ip_blk[idx].name,
+ table.timecondition_notmet_lastcapture[idx]);
+ }
- value = ioread32(dev->smu_base + AMD_SMU_FW_VERSION);
- seq_printf(s, "SMU FW Info: %x\n", value);
return 0;
}
DEFINE_SHOW_ATTRIBUTE(smu_fw_info);
+static int s0ix_stats_show(struct seq_file *s, void *unused)
+{
+ struct amd_pmc_dev *dev = s->private;
+ u64 entry_time, exit_time, residency;
+
+ entry_time = ioread32(dev->fch_virt_addr + FCH_S0I3_ENTRY_TIME_H_OFFSET);
+ entry_time = entry_time << 32 | ioread32(dev->fch_virt_addr + FCH_S0I3_ENTRY_TIME_L_OFFSET);
+
+ exit_time = ioread32(dev->fch_virt_addr + FCH_S0I3_EXIT_TIME_H_OFFSET);
+ exit_time = exit_time << 32 | ioread32(dev->fch_virt_addr + FCH_S0I3_EXIT_TIME_L_OFFSET);
+
+ /* It's in 48MHz. We need to convert it */
+ residency = exit_time - entry_time;
+ do_div(residency, 48);
+
+ seq_puts(s, "=== S0ix statistics ===\n");
+ seq_printf(s, "S0ix Entry Time: %lld\n", entry_time);
+ seq_printf(s, "S0ix Exit Time: %lld\n", exit_time);
+ seq_printf(s, "Residency Time: %lld\n", residency);
+
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(s0ix_stats);
+
static void amd_pmc_dbgfs_unregister(struct amd_pmc_dev *dev)
{
debugfs_remove_recursive(dev->dbgfs_dir);
dev->dbgfs_dir = debugfs_create_dir("amd_pmc", NULL);
debugfs_create_file("smu_fw_info", 0644, dev->dbgfs_dir, dev,
&smu_fw_info_fops);
+ debugfs_create_file("s0ix_stats", 0644, dev->dbgfs_dir, dev,
+ &s0ix_stats_fops);
}
#else
static inline void amd_pmc_dbgfs_register(struct amd_pmc_dev *dev)
}
#endif /* CONFIG_DEBUG_FS */
+static int amd_pmc_setup_smu_logging(struct amd_pmc_dev *dev)
+{
+ u32 phys_addr_low, phys_addr_hi;
+ u64 smu_phys_addr;
+
+ if (dev->cpu_id == AMD_CPU_ID_PCO)
+ return -EINVAL;
+
+ /* Get Active devices list from SMU */
+ amd_pmc_send_cmd(dev, 0, &dev->active_ips, SMU_MSG_GET_SUP_CONSTRAINTS, 1);
+
+ /* Get dram address */
+ amd_pmc_send_cmd(dev, 0, &phys_addr_low, SMU_MSG_LOG_GETDRAM_ADDR_LO, 1);
+ amd_pmc_send_cmd(dev, 0, &phys_addr_hi, SMU_MSG_LOG_GETDRAM_ADDR_HI, 1);
+ smu_phys_addr = ((u64)phys_addr_hi << 32 | phys_addr_low);
+
+ dev->smu_virt_addr = devm_ioremap(dev->dev, smu_phys_addr, sizeof(struct smu_metrics));
+ if (!dev->smu_virt_addr)
+ return -ENOMEM;
+
+ /* Start the logging */
+ amd_pmc_send_cmd(dev, 0, NULL, SMU_MSG_LOG_START, 0);
+
+ return 0;
+}
+
static void amd_pmc_dump_registers(struct amd_pmc_dev *dev)
{
u32 value;
dev_dbg(dev->dev, "AMD_PMC_REGISTER_MESSAGE:%x\n", value);
}
-static int amd_pmc_send_cmd(struct amd_pmc_dev *dev, bool set)
+static int amd_pmc_send_cmd(struct amd_pmc_dev *dev, bool set, u32 *data, u8 msg, bool ret)
{
int rc;
- u8 msg;
u32 val;
+ mutex_lock(&dev->lock);
/* Wait until we get a valid response */
rc = readx_poll_timeout(ioread32, dev->regbase + AMD_PMC_REGISTER_RESPONSE,
- val, val > 0, PMC_MSG_DELAY_MIN_US,
+ val, val != 0, PMC_MSG_DELAY_MIN_US,
PMC_MSG_DELAY_MIN_US * RESPONSE_REGISTER_LOOP_MAX);
if (rc) {
dev_err(dev->dev, "failed to talk to SMU\n");
- return rc;
+ goto out_unlock;
}
/* Write zero to response register */
amd_pmc_reg_write(dev, AMD_PMC_REGISTER_ARGUMENT, set);
/* Write message ID to message ID register */
- msg = (dev->cpu_id == AMD_CPU_ID_RN) ? MSG_OS_HINT_RN : MSG_OS_HINT_PCO;
amd_pmc_reg_write(dev, AMD_PMC_REGISTER_MESSAGE, msg);
- return 0;
+
+ /* Wait until we get a valid response */
+ rc = readx_poll_timeout(ioread32, dev->regbase + AMD_PMC_REGISTER_RESPONSE,
+ val, val != 0, PMC_MSG_DELAY_MIN_US,
+ PMC_MSG_DELAY_MIN_US * RESPONSE_REGISTER_LOOP_MAX);
+ if (rc) {
+ dev_err(dev->dev, "SMU response timed out\n");
+ goto out_unlock;
+ }
+
+ switch (val) {
+ case AMD_PMC_RESULT_OK:
+ if (ret) {
+ /* PMFW may take longer time to return back the data */
+ usleep_range(DELAY_MIN_US, 10 * DELAY_MAX_US);
+ *data = amd_pmc_reg_read(dev, AMD_PMC_REGISTER_ARGUMENT);
+ }
+ break;
+ case AMD_PMC_RESULT_CMD_REJECT_BUSY:
+ dev_err(dev->dev, "SMU not ready. err: 0x%x\n", val);
+ rc = -EBUSY;
+ goto out_unlock;
+ case AMD_PMC_RESULT_CMD_UNKNOWN:
+ dev_err(dev->dev, "SMU cmd unknown. err: 0x%x\n", val);
+ rc = -EINVAL;
+ goto out_unlock;
+ case AMD_PMC_RESULT_CMD_REJECT_PREREQ:
+ case AMD_PMC_RESULT_FAILED:
+ default:
+ dev_err(dev->dev, "SMU cmd failed. err: 0x%x\n", val);
+ rc = -EIO;
+ goto out_unlock;
+ }
+
+out_unlock:
+ mutex_unlock(&dev->lock);
+ amd_pmc_dump_registers(dev);
+ return rc;
+}
+
+static int amd_pmc_get_os_hint(struct amd_pmc_dev *dev)
+{
+ switch (dev->cpu_id) {
+ case AMD_CPU_ID_PCO:
+ return MSG_OS_HINT_PCO;
+ case AMD_CPU_ID_RN:
+ case AMD_CPU_ID_YC:
+ return MSG_OS_HINT_RN;
+ }
+ return -EINVAL;
}
static int __maybe_unused amd_pmc_suspend(struct device *dev)
{
struct amd_pmc_dev *pdev = dev_get_drvdata(dev);
int rc;
+ u8 msg;
+
+ /* Reset and Start SMU logging - to monitor the s0i3 stats */
+ amd_pmc_send_cmd(pdev, 0, NULL, SMU_MSG_LOG_RESET, 0);
+ amd_pmc_send_cmd(pdev, 0, NULL, SMU_MSG_LOG_START, 0);
- rc = amd_pmc_send_cmd(pdev, 1);
+ msg = amd_pmc_get_os_hint(pdev);
+ rc = amd_pmc_send_cmd(pdev, 1, NULL, msg, 0);
if (rc)
dev_err(pdev->dev, "suspend failed\n");
- amd_pmc_dump_registers(pdev);
- return 0;
+ return rc;
}
static int __maybe_unused amd_pmc_resume(struct device *dev)
{
struct amd_pmc_dev *pdev = dev_get_drvdata(dev);
int rc;
+ u8 msg;
+
+ /* Let SMU know that we are looking for stats */
+ amd_pmc_send_cmd(pdev, 0, NULL, SMU_MSG_LOG_DUMP_DATA, 0);
- rc = amd_pmc_send_cmd(pdev, 0);
+ msg = amd_pmc_get_os_hint(pdev);
+ rc = amd_pmc_send_cmd(pdev, 0, NULL, msg, 0);
if (rc)
dev_err(pdev->dev, "resume failed\n");
- amd_pmc_dump_registers(pdev);
return 0;
}
};
static const struct pci_device_id pmc_pci_ids[] = {
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, AMD_CPU_ID_YC) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, AMD_CPU_ID_CZN) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, AMD_CPU_ID_RN) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, AMD_CPU_ID_PCO) },
{
struct amd_pmc_dev *dev = &pmc;
struct pci_dev *rdev;
- u32 base_addr_lo;
- u32 base_addr_hi;
- u64 base_addr;
+ u32 base_addr_lo, base_addr_hi;
+ u64 base_addr, fch_phys_addr;
int err;
u32 val;
pci_dev_put(rdev);
base_addr = ((u64)base_addr_hi << 32 | base_addr_lo);
- dev->smu_base = devm_ioremap(dev->dev, base_addr, AMD_PMC_MAPPING_SIZE);
- if (!dev->smu_base)
- return -ENOMEM;
-
dev->regbase = devm_ioremap(dev->dev, base_addr + AMD_PMC_BASE_ADDR_OFFSET,
AMD_PMC_MAPPING_SIZE);
if (!dev->regbase)
return -ENOMEM;
- amd_pmc_dump_registers(dev);
+ mutex_init(&dev->lock);
+
+ /* Use FCH registers to get the S0ix stats */
+ base_addr_lo = FCH_BASE_PHY_ADDR_LOW;
+ base_addr_hi = FCH_BASE_PHY_ADDR_HIGH;
+ fch_phys_addr = ((u64)base_addr_hi << 32 | base_addr_lo);
+ dev->fch_virt_addr = devm_ioremap(dev->dev, fch_phys_addr, FCH_SSC_MAPPING_SIZE);
+ if (!dev->fch_virt_addr)
+ return -ENOMEM;
+
+ /* Use SMU to get the s0i3 debug stats */
+ err = amd_pmc_setup_smu_logging(dev);
+ if (err)
+ dev_err(dev->dev, "SMU debugging info not supported on this platform\n");
platform_set_drvdata(pdev, dev);
amd_pmc_dbgfs_register(dev);
struct amd_pmc_dev *dev = platform_get_drvdata(pdev);
amd_pmc_dbgfs_unregister(dev);
+ mutex_destroy(&dev->lock);
return 0;
}
static const struct acpi_device_id amd_pmc_acpi_ids[] = {
{"AMDI0005", 0},
+ {"AMDI0006", 0},
+ {"AMDI0007", 0},
{"AMD0004", 0},
{ }
};
static const struct dmi_system_id gigabyte_wmi_known_working_platforms[] = {
DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550 AORUS ELITE"),
+ DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550 AORUS ELITE V2"),
DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550 GAMING X V2"),
DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550M AORUS PRO-P"),
DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550M DS3H"),
{"INT33D5", 0},
{"INTC1051", 0},
{"INTC1054", 0},
+ {"INTC1070", 0},
{"", 0},
};
MODULE_DEVICE_TABLE(acpi, intel_hid_ids);
else
ret = tlmi_save_bios_settings("");
+ if (!ret && !tlmi_priv.pending_changes) {
+ tlmi_priv.pending_changes = true;
+ /* let userland know it may need to check reboot pending again */
+ kobject_uevent(&tlmi_priv.class_dev->kobj, KOBJ_CHANGE);
+ }
out:
kfree(auth_str);
kfree(set_str);
.sysfs_ops = &tlmi_kobj_sysfs_ops,
};
+static ssize_t pending_reboot_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ return sprintf(buf, "%d\n", tlmi_priv.pending_changes);
+}
+
+static struct kobj_attribute pending_reboot = __ATTR_RO(pending_reboot);
+
/* ---- Initialisation --------------------------------------------------------- */
static void tlmi_release_attr(void)
{
kobject_put(&tlmi_priv.setting[i]->kobj);
}
}
+ sysfs_remove_file(&tlmi_priv.attribute_kset->kobj, &pending_reboot.attr);
kset_unregister(tlmi_priv.attribute_kset);
/* Authentication structures */
/* Build attribute */
tlmi_priv.setting[i]->kobj.kset = tlmi_priv.attribute_kset;
- ret = kobject_init_and_add(&tlmi_priv.setting[i]->kobj, &tlmi_attr_setting_ktype,
- NULL, "%s", tlmi_priv.setting[i]->display_name);
+ ret = kobject_add(&tlmi_priv.setting[i]->kobj, NULL,
+ "%s", tlmi_priv.setting[i]->display_name);
if (ret)
goto fail_create_attr;
goto fail_create_attr;
}
+ ret = sysfs_create_file(&tlmi_priv.attribute_kset->kobj, &pending_reboot.attr);
+ if (ret)
+ goto fail_create_attr;
+
/* Create authentication entries */
tlmi_priv.authentication_kset = kset_create_and_add("authentication", NULL,
&tlmi_priv.class_dev->kobj);
goto fail_create_attr;
}
tlmi_priv.pwd_admin->kobj.kset = tlmi_priv.authentication_kset;
- ret = kobject_init_and_add(&tlmi_priv.pwd_admin->kobj, &tlmi_pwd_setting_ktype,
- NULL, "%s", "Admin");
+ ret = kobject_add(&tlmi_priv.pwd_admin->kobj, NULL, "%s", "Admin");
if (ret)
goto fail_create_attr;
goto fail_create_attr;
tlmi_priv.pwd_power->kobj.kset = tlmi_priv.authentication_kset;
- ret = kobject_init_and_add(&tlmi_priv.pwd_power->kobj, &tlmi_pwd_setting_ktype,
- NULL, "%s", "System");
+ ret = kobject_add(&tlmi_priv.pwd_power->kobj, NULL, "%s", "System");
if (ret)
goto fail_create_attr;
pr_info("Error retrieving possible values for %d : %s\n",
i, setting->display_name);
}
+ kobject_init(&setting->kobj, &tlmi_attr_setting_ktype);
tlmi_priv.setting[i] = setting;
tlmi_priv.settings_count++;
kfree(item);
if (pwdcfg.password_state & TLMI_PAP_PWD)
tlmi_priv.pwd_admin->valid = true;
+ kobject_init(&tlmi_priv.pwd_admin->kobj, &tlmi_pwd_setting_ktype);
+
tlmi_priv.pwd_power = kzalloc(sizeof(struct tlmi_pwd_setting), GFP_KERNEL);
if (!tlmi_priv.pwd_power) {
ret = -ENOMEM;
- goto fail_clear_attr;
+ goto fail_free_pwd_admin;
}
strscpy(tlmi_priv.pwd_power->kbdlang, "us", TLMI_LANG_MAXLEN);
tlmi_priv.pwd_power->encoding = TLMI_ENCODING_ASCII;
if (pwdcfg.password_state & TLMI_POP_PWD)
tlmi_priv.pwd_power->valid = true;
+ kobject_init(&tlmi_priv.pwd_power->kobj, &tlmi_pwd_setting_ktype);
+
return 0;
+fail_free_pwd_admin:
+ kfree(tlmi_priv.pwd_admin);
fail_clear_attr:
- for (i = 0; i < TLMI_SETTINGS_COUNT; ++i)
- kfree(tlmi_priv.setting[i]);
+ for (i = 0; i < TLMI_SETTINGS_COUNT; ++i) {
+ if (tlmi_priv.setting[i]) {
+ kfree(tlmi_priv.setting[i]->possible_values);
+ kfree(tlmi_priv.setting[i]);
+ }
+ }
return ret;
}
bool can_get_bios_selections;
bool can_set_bios_password;
bool can_get_password_settings;
+ bool pending_changes;
struct tlmi_attr_setting *setting[TLMI_SETTINGS_COUNT];
struct device *class_dev;
err = wireless_input_setup();
if (err)
- pr_err("Failed to setup hp wireless hotkeys\n");
+ pr_err("Failed to setup wireless hotkeys\n");
return err;
}
tristate "OpenCompute TimeCard as PTP clock"
depends on PTP_1588_CLOCK
depends on HAS_IOMEM && PCI
+ depends on SPI && I2C && MTD
+ imply SPI_MEM
+ imply SPI_XILINX
+ imply MTD_SPI_NOR
+ imply I2C_XILINX
+ select SERIAL_8250
+
default n
help
This driver adds support for an OpenCompute time card.
#include <linux/module.h>
#include <linux/init.h>
#include <linux/pci.h>
+#include <linux/serial_8250.h>
+#include <linux/clkdev.h>
+#include <linux/clk-provider.h>
+#include <linux/platform_device.h>
#include <linux/ptp_clock_kernel.h>
+#include <linux/spi/spi.h>
+#include <linux/spi/xilinx_spi.h>
+#include <net/devlink.h>
+#include <linux/i2c.h>
+#include <linux/mtd/mtd.h>
-static const struct pci_device_id ptp_ocp_pcidev_id[] = {
- { PCI_DEVICE(0x1d9b, 0x0400) },
- { 0 }
-};
-MODULE_DEVICE_TABLE(pci, ptp_ocp_pcidev_id);
+#ifndef PCI_VENDOR_ID_FACEBOOK
+#define PCI_VENDOR_ID_FACEBOOK 0x1d9b
+#endif
-#define OCP_REGISTER_OFFSET 0x01000000
+#ifndef PCI_DEVICE_ID_FACEBOOK_TIMECARD
+#define PCI_DEVICE_ID_FACEBOOK_TIMECARD 0x0400
+#endif
+
+static struct class timecard_class = {
+ .owner = THIS_MODULE,
+ .name = "timecard",
+};
struct ocp_reg {
u32 ctrl;
u32 __pad1[2];
u32 offset_ns;
u32 offset_window_ns;
+ u32 __pad2[2];
+ u32 drift_ns;
+ u32 drift_window_ns;
+ u32 __pad3[6];
+ u32 servo_offset_p;
+ u32 servo_offset_i;
+ u32 servo_drift_p;
+ u32 servo_drift_i;
};
#define OCP_CTRL_ENABLE BIT(0)
#define OCP_CTRL_ADJUST_TIME BIT(1)
#define OCP_CTRL_ADJUST_OFFSET BIT(2)
+#define OCP_CTRL_ADJUST_DRIFT BIT(3)
+#define OCP_CTRL_ADJUST_SERVO BIT(8)
#define OCP_CTRL_READ_TIME_REQ BIT(30)
#define OCP_CTRL_READ_TIME_DONE BIT(31)
#define OCP_STATUS_IN_SYNC BIT(0)
+#define OCP_STATUS_IN_HOLDOVER BIT(1)
#define OCP_SELECT_CLK_NONE 0
-#define OCP_SELECT_CLK_REG 6
+#define OCP_SELECT_CLK_REG 0xfe
struct tod_reg {
u32 ctrl;
u32 leap;
};
-#define TOD_REGISTER_OFFSET 0x01050000
-
#define TOD_CTRL_PROTOCOL BIT(28)
#define TOD_CTRL_DISABLE_FMT_A BIT(17)
#define TOD_CTRL_DISABLE_FMT_B BIT(16)
#define TOD_STATUS_UTC_VALID BIT(8)
#define TOD_STATUS_LEAP_VALID BIT(16)
+struct ts_reg {
+ u32 enable;
+ u32 error;
+ u32 polarity;
+ u32 version;
+ u32 __pad0[4];
+ u32 cable_delay;
+ u32 __pad1[3];
+ u32 intr;
+ u32 intr_mask;
+ u32 event_count;
+ u32 __pad2[1];
+ u32 ts_count;
+ u32 time_ns;
+ u32 time_sec;
+ u32 data_width;
+ u32 data;
+};
+
+struct pps_reg {
+ u32 ctrl;
+ u32 status;
+ u32 __pad0[6];
+ u32 cable_delay;
+};
+
+#define PPS_STATUS_FILTER_ERR BIT(0)
+#define PPS_STATUS_SUPERV_ERR BIT(1)
+
+struct img_reg {
+ u32 version;
+};
+
+struct ptp_ocp_flash_info {
+ const char *name;
+ int pci_offset;
+ int data_size;
+ void *data;
+};
+
+struct ptp_ocp_ext_info {
+ const char *name;
+ int index;
+ irqreturn_t (*irq_fcn)(int irq, void *priv);
+ int (*enable)(void *priv, bool enable);
+};
+
+struct ptp_ocp_ext_src {
+ void __iomem *mem;
+ struct ptp_ocp *bp;
+ struct ptp_ocp_ext_info *info;
+ int irq_vec;
+};
+
struct ptp_ocp {
struct pci_dev *pdev;
+ struct device dev;
spinlock_t lock;
- void __iomem *base;
struct ocp_reg __iomem *reg;
struct tod_reg __iomem *tod;
+ struct pps_reg __iomem *pps_to_ext;
+ struct pps_reg __iomem *pps_to_clk;
+ struct ptp_ocp_ext_src *pps;
+ struct ptp_ocp_ext_src *ts0;
+ struct ptp_ocp_ext_src *ts1;
+ struct img_reg __iomem *image;
struct ptp_clock *ptp;
struct ptp_clock_info ptp_info;
+ struct platform_device *i2c_ctrl;
+ struct platform_device *spi_flash;
+ struct clk_hw *i2c_clk;
+ struct timer_list watchdog;
+ time64_t gnss_lost;
+ int id;
+ int n_irqs;
+ int gnss_port;
+ int mac_port; /* miniature atomic clock */
+ u8 serial[6];
+ int flash_start;
+ bool has_serial;
};
+struct ocp_resource {
+ unsigned long offset;
+ int size;
+ int irq_vec;
+ int (*setup)(struct ptp_ocp *bp, struct ocp_resource *r);
+ void *extra;
+ unsigned long bp_offset;
+};
+
+static int ptp_ocp_register_mem(struct ptp_ocp *bp, struct ocp_resource *r);
+static int ptp_ocp_register_i2c(struct ptp_ocp *bp, struct ocp_resource *r);
+static int ptp_ocp_register_spi(struct ptp_ocp *bp, struct ocp_resource *r);
+static int ptp_ocp_register_serial(struct ptp_ocp *bp, struct ocp_resource *r);
+static int ptp_ocp_register_ext(struct ptp_ocp *bp, struct ocp_resource *r);
+static int ptp_ocp_fb_board_init(struct ptp_ocp *bp, struct ocp_resource *r);
+static irqreturn_t ptp_ocp_ts_irq(int irq, void *priv);
+static int ptp_ocp_ts_enable(void *priv, bool enable);
+
+#define bp_assign_entry(bp, res, val) ({ \
+ uintptr_t addr = (uintptr_t)(bp) + (res)->bp_offset; \
+ *(typeof(val) *)addr = val; \
+})
+
+#define OCP_RES_LOCATION(member) \
+ .bp_offset = offsetof(struct ptp_ocp, member)
+
+#define OCP_MEM_RESOURCE(member) \
+ OCP_RES_LOCATION(member), .setup = ptp_ocp_register_mem
+
+#define OCP_SERIAL_RESOURCE(member) \
+ OCP_RES_LOCATION(member), .setup = ptp_ocp_register_serial
+
+#define OCP_I2C_RESOURCE(member) \
+ OCP_RES_LOCATION(member), .setup = ptp_ocp_register_i2c
+
+#define OCP_SPI_RESOURCE(member) \
+ OCP_RES_LOCATION(member), .setup = ptp_ocp_register_spi
+
+#define OCP_EXT_RESOURCE(member) \
+ OCP_RES_LOCATION(member), .setup = ptp_ocp_register_ext
+
+/* This is the MSI vector mapping used.
+ * 0: N/C
+ * 1: TS0
+ * 2: TS1
+ * 3: GPS
+ * 4: GPS2 (n/c)
+ * 5: MAC
+ * 6: SPI IMU (inertial measurement unit)
+ * 7: I2C oscillator
+ * 8: HWICAP
+ * 9: SPI Flash
+ */
+
+static struct ocp_resource ocp_fb_resource[] = {
+ {
+ OCP_MEM_RESOURCE(reg),
+ .offset = 0x01000000, .size = 0x10000,
+ },
+ {
+ OCP_EXT_RESOURCE(ts0),
+ .offset = 0x01010000, .size = 0x10000, .irq_vec = 1,
+ .extra = &(struct ptp_ocp_ext_info) {
+ .name = "ts0", .index = 0,
+ .irq_fcn = ptp_ocp_ts_irq,
+ .enable = ptp_ocp_ts_enable,
+ },
+ },
+ {
+ OCP_EXT_RESOURCE(ts1),
+ .offset = 0x01020000, .size = 0x10000, .irq_vec = 2,
+ .extra = &(struct ptp_ocp_ext_info) {
+ .name = "ts1", .index = 1,
+ .irq_fcn = ptp_ocp_ts_irq,
+ .enable = ptp_ocp_ts_enable,
+ },
+ },
+ {
+ OCP_MEM_RESOURCE(pps_to_ext),
+ .offset = 0x01030000, .size = 0x10000,
+ },
+ {
+ OCP_MEM_RESOURCE(pps_to_clk),
+ .offset = 0x01040000, .size = 0x10000,
+ },
+ {
+ OCP_MEM_RESOURCE(tod),
+ .offset = 0x01050000, .size = 0x10000,
+ },
+ {
+ OCP_MEM_RESOURCE(image),
+ .offset = 0x00020000, .size = 0x1000,
+ },
+ {
+ OCP_I2C_RESOURCE(i2c_ctrl),
+ .offset = 0x00150000, .size = 0x10000, .irq_vec = 7,
+ },
+ {
+ OCP_SERIAL_RESOURCE(gnss_port),
+ .offset = 0x00160000 + 0x1000, .irq_vec = 3,
+ },
+ {
+ OCP_SERIAL_RESOURCE(mac_port),
+ .offset = 0x00180000 + 0x1000, .irq_vec = 5,
+ },
+ {
+ OCP_SPI_RESOURCE(spi_flash),
+ .offset = 0x00310000, .size = 0x10000, .irq_vec = 9,
+ .extra = &(struct ptp_ocp_flash_info) {
+ .name = "xilinx_spi", .pci_offset = 0,
+ .data_size = sizeof(struct xspi_platform_data),
+ .data = &(struct xspi_platform_data) {
+ .num_chipselect = 1,
+ .bits_per_word = 8,
+ .num_devices = 1,
+ .devices = &(struct spi_board_info) {
+ .modalias = "spi-nor",
+ },
+ },
+ },
+ },
+ {
+ .setup = ptp_ocp_fb_board_init,
+ },
+ { }
+};
+
+static const struct pci_device_id ptp_ocp_pcidev_id[] = {
+ { PCI_DEVICE_DATA(FACEBOOK, TIMECARD, &ocp_fb_resource) },
+ { 0 }
+};
+MODULE_DEVICE_TABLE(pci, ptp_ocp_pcidev_id);
+
+static DEFINE_MUTEX(ptp_ocp_lock);
+static DEFINE_IDR(ptp_ocp_idr);
+
+static struct {
+ const char *name;
+ int value;
+} ptp_ocp_clock[] = {
+ { .name = "NONE", .value = 0 },
+ { .name = "TOD", .value = 1 },
+ { .name = "IRIG", .value = 2 },
+ { .name = "PPS", .value = 3 },
+ { .name = "PTP", .value = 4 },
+ { .name = "RTC", .value = 5 },
+ { .name = "DCF", .value = 6 },
+ { .name = "REGS", .value = 0xfe },
+ { .name = "EXT", .value = 0xff },
+};
+
+static const char *
+ptp_ocp_clock_name_from_val(int val)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(ptp_ocp_clock); i++)
+ if (ptp_ocp_clock[i].value == val)
+ return ptp_ocp_clock[i].name;
+ return NULL;
+}
+
+static int
+ptp_ocp_clock_val_from_name(const char *name)
+{
+ const char *clk;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(ptp_ocp_clock); i++) {
+ clk = ptp_ocp_clock[i].name;
+ if (!strncasecmp(name, clk, strlen(clk)))
+ return ptp_ocp_clock[i].value;
+ }
+ return -EINVAL;
+}
+
static int
__ptp_ocp_gettime_locked(struct ptp_ocp *bp, struct timespec64 *ts,
struct ptp_system_timestamp *sts)
return -EOPNOTSUPP;
}
+static int
+ptp_ocp_adjphase(struct ptp_clock_info *ptp_info, s32 phase_ns)
+{
+ return -EOPNOTSUPP;
+}
+
+static int
+ptp_ocp_enable(struct ptp_clock_info *ptp_info, struct ptp_clock_request *rq,
+ int on)
+{
+ struct ptp_ocp *bp = container_of(ptp_info, struct ptp_ocp, ptp_info);
+ struct ptp_ocp_ext_src *ext = NULL;
+ int err;
+
+ switch (rq->type) {
+ case PTP_CLK_REQ_EXTTS:
+ switch (rq->extts.index) {
+ case 0:
+ ext = bp->ts0;
+ break;
+ case 1:
+ ext = bp->ts1;
+ break;
+ }
+ break;
+ case PTP_CLK_REQ_PPS:
+ ext = bp->pps;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ err = -ENXIO;
+ if (ext)
+ err = ext->info->enable(ext, on);
+
+ return err;
+}
+
static const struct ptp_clock_info ptp_ocp_clock_info = {
.owner = THIS_MODULE,
.name = KBUILD_MODNAME,
.settime64 = ptp_ocp_settime,
.adjtime = ptp_ocp_adjtime,
.adjfine = ptp_ocp_null_adjfine,
+ .adjphase = ptp_ocp_adjphase,
+ .enable = ptp_ocp_enable,
+ .pps = true,
+ .n_ext_ts = 2,
};
+static void
+__ptp_ocp_clear_drift_locked(struct ptp_ocp *bp)
+{
+ u32 ctrl, select;
+
+ select = ioread32(&bp->reg->select);
+ iowrite32(OCP_SELECT_CLK_REG, &bp->reg->select);
+
+ iowrite32(0, &bp->reg->drift_ns);
+
+ ctrl = ioread32(&bp->reg->ctrl);
+ ctrl |= OCP_CTRL_ADJUST_DRIFT;
+ iowrite32(ctrl, &bp->reg->ctrl);
+
+ /* restore clock selection */
+ iowrite32(select >> 16, &bp->reg->select);
+}
+
+static void
+ptp_ocp_watchdog(struct timer_list *t)
+{
+ struct ptp_ocp *bp = from_timer(bp, t, watchdog);
+ unsigned long flags;
+ u32 status;
+
+ status = ioread32(&bp->pps_to_clk->status);
+
+ if (status & PPS_STATUS_SUPERV_ERR) {
+ iowrite32(status, &bp->pps_to_clk->status);
+ if (!bp->gnss_lost) {
+ spin_lock_irqsave(&bp->lock, flags);
+ __ptp_ocp_clear_drift_locked(bp);
+ spin_unlock_irqrestore(&bp->lock, flags);
+ bp->gnss_lost = ktime_get_real_seconds();
+ }
+
+ } else if (bp->gnss_lost) {
+ bp->gnss_lost = 0;
+ }
+
+ mod_timer(&bp->watchdog, jiffies + HZ);
+}
+
static int
-ptp_ocp_check_clock(struct ptp_ocp *bp)
+ptp_ocp_init_clock(struct ptp_ocp *bp)
{
struct timespec64 ts;
bool sync;
ctrl |= OCP_CTRL_ENABLE;
iowrite32(ctrl, &bp->reg->ctrl);
+ /* NO DRIFT Correction */
+ /* offset_p:i 1/8, offset_i: 1/16, drift_p: 0, drift_i: 0 */
+ iowrite32(0x2000, &bp->reg->servo_offset_p);
+ iowrite32(0x1000, &bp->reg->servo_offset_i);
+ iowrite32(0, &bp->reg->servo_drift_p);
+ iowrite32(0, &bp->reg->servo_drift_i);
+
+ /* latch servo values */
+ ctrl |= OCP_CTRL_ADJUST_SERVO;
+ iowrite32(ctrl, &bp->reg->ctrl);
+
if ((ioread32(&bp->reg->ctrl) & OCP_CTRL_ENABLE) == 0) {
dev_err(&bp->pdev->dev, "clock not enabled\n");
return -ENODEV;
ts.tv_sec, ts.tv_nsec,
sync ? "in-sync" : "UNSYNCED");
+ timer_setup(&bp->watchdog, ptp_ocp_watchdog, 0);
+ mod_timer(&bp->watchdog, jiffies + HZ);
+
return 0;
}
reg & TOD_STATUS_LEAP_VALID ? 1 : 0);
}
+static int
+ptp_ocp_firstchild(struct device *dev, void *data)
+{
+ return 1;
+}
+
+static int
+ptp_ocp_read_i2c(struct i2c_adapter *adap, u8 addr, u8 reg, u8 sz, u8 *data)
+{
+ struct i2c_msg msgs[2] = {
+ {
+ .addr = addr,
+ .len = 1,
+ .buf = ®,
+ },
+ {
+ .addr = addr,
+ .flags = I2C_M_RD,
+ .len = 2,
+ .buf = data,
+ },
+ };
+ int err;
+ u8 len;
+
+ /* xiic-i2c for some stupid reason only does 2 byte reads. */
+ while (sz) {
+ len = min_t(u8, sz, 2);
+ msgs[1].len = len;
+ err = i2c_transfer(adap, msgs, 2);
+ if (err != msgs[1].len)
+ return err;
+ msgs[1].buf += len;
+ reg += len;
+ sz -= len;
+ }
+ return 0;
+}
+
+static void
+ptp_ocp_get_serial_number(struct ptp_ocp *bp)
+{
+ struct i2c_adapter *adap;
+ struct device *dev;
+ int err;
+
+ dev = device_find_child(&bp->i2c_ctrl->dev, NULL, ptp_ocp_firstchild);
+ if (!dev) {
+ dev_err(&bp->pdev->dev, "Can't find I2C adapter\n");
+ return;
+ }
+
+ adap = i2c_verify_adapter(dev);
+ if (!adap) {
+ dev_err(&bp->pdev->dev, "device '%s' isn't an I2C adapter\n",
+ dev_name(dev));
+ goto out;
+ }
+
+ err = ptp_ocp_read_i2c(adap, 0x58, 0x9A, 6, bp->serial);
+ if (err) {
+ dev_err(&bp->pdev->dev, "could not read eeprom: %d\n", err);
+ goto out;
+ }
+
+ bp->has_serial = true;
+
+out:
+ put_device(dev);
+}
+
static void
ptp_ocp_info(struct ptp_ocp *bp)
{
- static const char * const clock_name[] = {
- "NO", "TOD", "IRIG", "PPS", "PTP", "RTC", "REGS", "EXT"
- };
u32 version, select;
version = ioread32(&bp->reg->version);
select = ioread32(&bp->reg->select);
dev_info(&bp->pdev->dev, "Version %d.%d.%d, clock %s, device ptp%d\n",
version >> 24, (version >> 16) & 0xff, version & 0xffff,
- clock_name[select & 7],
+ ptp_ocp_clock_name_from_val(select >> 16),
ptp_clock_index(bp->ptp));
ptp_ocp_tod_info(bp);
}
+static struct device *
+ptp_ocp_find_flash(struct ptp_ocp *bp)
+{
+ struct device *dev, *last;
+
+ last = NULL;
+ dev = &bp->spi_flash->dev;
+
+ while ((dev = device_find_child(dev, NULL, ptp_ocp_firstchild))) {
+ if (!strcmp("mtd", dev_bus_name(dev)))
+ break;
+ put_device(last);
+ last = dev;
+ }
+ put_device(last);
+
+ return dev;
+}
+
static int
-ptp_ocp_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+ptp_ocp_devlink_flash(struct devlink *devlink, struct device *dev,
+ const struct firmware *fw)
{
- struct ptp_ocp *bp;
+ struct mtd_info *mtd = dev_get_drvdata(dev);
+ struct ptp_ocp *bp = devlink_priv(devlink);
+ size_t off, len, resid, wrote;
+ struct erase_info erase;
+ size_t base, blksz;
+ int err;
+
+ off = 0;
+ base = bp->flash_start;
+ blksz = 4096;
+ resid = fw->size;
+
+ while (resid) {
+ devlink_flash_update_status_notify(devlink, "Flashing",
+ NULL, off, fw->size);
+
+ len = min_t(size_t, resid, blksz);
+ erase.addr = base + off;
+ erase.len = blksz;
+
+ err = mtd_erase(mtd, &erase);
+ if (err)
+ goto out;
+
+ err = mtd_write(mtd, base + off, len, &wrote, &fw->data[off]);
+ if (err)
+ goto out;
+
+ off += blksz;
+ resid -= len;
+ }
+out:
+ return err;
+}
+
+static int
+ptp_ocp_devlink_flash_update(struct devlink *devlink,
+ struct devlink_flash_update_params *params,
+ struct netlink_ext_ack *extack)
+{
+ struct ptp_ocp *bp = devlink_priv(devlink);
+ struct device *dev;
+ const char *msg;
+ int err;
+
+ dev = ptp_ocp_find_flash(bp);
+ if (!dev) {
+ dev_err(&bp->pdev->dev, "Can't find Flash SPI adapter\n");
+ return -ENODEV;
+ }
+
+ devlink_flash_update_status_notify(devlink, "Preparing to flash",
+ NULL, 0, 0);
+
+ err = ptp_ocp_devlink_flash(devlink, dev, params->fw);
+
+ msg = err ? "Flash error" : "Flash complete";
+ devlink_flash_update_status_notify(devlink, msg, NULL, 0, 0);
+
+ put_device(dev);
+ return err;
+}
+
+static int
+ptp_ocp_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req,
+ struct netlink_ext_ack *extack)
+{
+ struct ptp_ocp *bp = devlink_priv(devlink);
+ char buf[32];
+ int err;
+
+ err = devlink_info_driver_name_put(req, KBUILD_MODNAME);
+ if (err)
+ return err;
+
+ if (bp->image) {
+ u32 ver = ioread32(&bp->image->version);
+
+ if (ver & 0xffff) {
+ sprintf(buf, "%d", ver);
+ err = devlink_info_version_running_put(req,
+ "fw",
+ buf);
+ } else {
+ sprintf(buf, "%d", ver >> 16);
+ err = devlink_info_version_running_put(req,
+ "loader",
+ buf);
+ }
+ if (err)
+ return err;
+ }
+
+ if (!bp->has_serial)
+ ptp_ocp_get_serial_number(bp);
+
+ if (bp->has_serial) {
+ sprintf(buf, "%pM", bp->serial);
+ err = devlink_info_serial_number_put(req, buf);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static const struct devlink_ops ptp_ocp_devlink_ops = {
+ .flash_update = ptp_ocp_devlink_flash_update,
+ .info_get = ptp_ocp_devlink_info_get,
+};
+
+static void __iomem *
+__ptp_ocp_get_mem(struct ptp_ocp *bp, unsigned long start, int size)
+{
+ struct resource res = DEFINE_RES_MEM_NAMED(start, size, "ptp_ocp");
+
+ return devm_ioremap_resource(&bp->pdev->dev, &res);
+}
+
+static void __iomem *
+ptp_ocp_get_mem(struct ptp_ocp *bp, struct ocp_resource *r)
+{
+ unsigned long start;
+
+ start = pci_resource_start(bp->pdev, 0) + r->offset;
+ return __ptp_ocp_get_mem(bp, start, r->size);
+}
+
+static void
+ptp_ocp_set_irq_resource(struct resource *res, int irq)
+{
+ struct resource r = DEFINE_RES_IRQ(irq);
+ *res = r;
+}
+
+static void
+ptp_ocp_set_mem_resource(struct resource *res, unsigned long start, int size)
+{
+ struct resource r = DEFINE_RES_MEM(start, size);
+ *res = r;
+}
+
+static int
+ptp_ocp_register_spi(struct ptp_ocp *bp, struct ocp_resource *r)
+{
+ struct ptp_ocp_flash_info *info;
+ struct pci_dev *pdev = bp->pdev;
+ struct platform_device *p;
+ struct resource res[2];
+ unsigned long start;
+ int id;
+
+ /* XXX hack to work around old FPGA */
+ if (bp->n_irqs < 10) {
+ dev_err(&bp->pdev->dev, "FPGA does not have SPI devices\n");
+ return 0;
+ }
+
+ if (r->irq_vec > bp->n_irqs) {
+ dev_err(&bp->pdev->dev, "spi device irq %d out of range\n",
+ r->irq_vec);
+ return 0;
+ }
+
+ start = pci_resource_start(pdev, 0) + r->offset;
+ ptp_ocp_set_mem_resource(&res[0], start, r->size);
+ ptp_ocp_set_irq_resource(&res[1], pci_irq_vector(pdev, r->irq_vec));
+
+ info = r->extra;
+ id = pci_dev_id(pdev) << 1;
+ id += info->pci_offset;
+
+ p = platform_device_register_resndata(&pdev->dev, info->name, id,
+ res, 2, info->data,
+ info->data_size);
+ if (IS_ERR(p))
+ return PTR_ERR(p);
+
+ bp_assign_entry(bp, r, p);
+
+ return 0;
+}
+
+static struct platform_device *
+ptp_ocp_i2c_bus(struct pci_dev *pdev, struct ocp_resource *r, int id)
+{
+ struct resource res[2];
+ unsigned long start;
+
+ start = pci_resource_start(pdev, 0) + r->offset;
+ ptp_ocp_set_mem_resource(&res[0], start, r->size);
+ ptp_ocp_set_irq_resource(&res[1], pci_irq_vector(pdev, r->irq_vec));
+
+ return platform_device_register_resndata(&pdev->dev, "xiic-i2c",
+ id, res, 2, NULL, 0);
+}
+
+static int
+ptp_ocp_register_i2c(struct ptp_ocp *bp, struct ocp_resource *r)
+{
+ struct pci_dev *pdev = bp->pdev;
+ struct platform_device *p;
+ struct clk_hw *clk;
+ char buf[32];
+ int id;
+
+ if (r->irq_vec > bp->n_irqs) {
+ dev_err(&bp->pdev->dev, "i2c device irq %d out of range\n",
+ r->irq_vec);
+ return 0;
+ }
+
+ id = pci_dev_id(bp->pdev);
+
+ sprintf(buf, "AXI.%d", id);
+ clk = clk_hw_register_fixed_rate(&pdev->dev, buf, NULL, 0, 50000000);
+ if (IS_ERR(clk))
+ return PTR_ERR(clk);
+ bp->i2c_clk = clk;
+
+ sprintf(buf, "xiic-i2c.%d", id);
+ devm_clk_hw_register_clkdev(&pdev->dev, clk, NULL, buf);
+ p = ptp_ocp_i2c_bus(bp->pdev, r, id);
+ if (IS_ERR(p))
+ return PTR_ERR(p);
+
+ bp_assign_entry(bp, r, p);
+
+ return 0;
+}
+
+static irqreturn_t
+ptp_ocp_ts_irq(int irq, void *priv)
+{
+ struct ptp_ocp_ext_src *ext = priv;
+ struct ts_reg __iomem *reg = ext->mem;
+ struct ptp_clock_event ev;
+ u32 sec, nsec;
+
+ /* XXX should fix API - this converts s/ns -> ts -> s/ns */
+ sec = ioread32(®->time_sec);
+ nsec = ioread32(®->time_ns);
+
+ ev.type = PTP_CLOCK_EXTTS;
+ ev.index = ext->info->index;
+ ev.timestamp = sec * 1000000000ULL + nsec;
+
+ ptp_clock_event(ext->bp->ptp, &ev);
+
+ iowrite32(1, ®->intr); /* write 1 to ack */
+
+ return IRQ_HANDLED;
+}
+
+static int
+ptp_ocp_ts_enable(void *priv, bool enable)
+{
+ struct ptp_ocp_ext_src *ext = priv;
+ struct ts_reg __iomem *reg = ext->mem;
+
+ if (enable) {
+ iowrite32(1, ®->enable);
+ iowrite32(1, ®->intr_mask);
+ iowrite32(1, ®->intr);
+ } else {
+ iowrite32(0, ®->intr_mask);
+ iowrite32(0, ®->enable);
+ }
+
+ return 0;
+}
+
+static void
+ptp_ocp_unregister_ext(struct ptp_ocp_ext_src *ext)
+{
+ ext->info->enable(ext, false);
+ pci_free_irq(ext->bp->pdev, ext->irq_vec, ext);
+ kfree(ext);
+}
+
+static int
+ptp_ocp_register_ext(struct ptp_ocp *bp, struct ocp_resource *r)
+{
+ struct pci_dev *pdev = bp->pdev;
+ struct ptp_ocp_ext_src *ext;
int err;
- bp = kzalloc(sizeof(*bp), GFP_KERNEL);
- if (!bp)
+ ext = kzalloc(sizeof(*ext), GFP_KERNEL);
+ if (!ext)
return -ENOMEM;
- bp->pdev = pdev;
- pci_set_drvdata(pdev, bp);
- err = pci_enable_device(pdev);
+ err = -EINVAL;
+ ext->mem = ptp_ocp_get_mem(bp, r);
+ if (!ext->mem)
+ goto out;
+
+ ext->bp = bp;
+ ext->info = r->extra;
+ ext->irq_vec = r->irq_vec;
+
+ err = pci_request_irq(pdev, r->irq_vec, ext->info->irq_fcn, NULL,
+ ext, "ocp%d.%s", bp->id, ext->info->name);
if (err) {
- dev_err(&pdev->dev, "pci_enable_device\n");
- goto out_free;
+ dev_err(&pdev->dev, "Could not get irq %d\n", r->irq_vec);
+ goto out;
}
- err = pci_request_regions(pdev, KBUILD_MODNAME);
- if (err) {
- dev_err(&pdev->dev, "pci_request_region\n");
- goto out_disable;
+ bp_assign_entry(bp, r, ext);
+
+ return 0;
+
+out:
+ kfree(ext);
+ return err;
+}
+
+static int
+ptp_ocp_serial_line(struct ptp_ocp *bp, struct ocp_resource *r)
+{
+ struct pci_dev *pdev = bp->pdev;
+ struct uart_8250_port uart;
+
+ /* Setting UPF_IOREMAP and leaving port.membase unspecified lets
+ * the serial port device claim and release the pci resource.
+ */
+ memset(&uart, 0, sizeof(uart));
+ uart.port.dev = &pdev->dev;
+ uart.port.iotype = UPIO_MEM;
+ uart.port.regshift = 2;
+ uart.port.mapbase = pci_resource_start(pdev, 0) + r->offset;
+ uart.port.irq = pci_irq_vector(pdev, r->irq_vec);
+ uart.port.uartclk = 50000000;
+ uart.port.flags = UPF_FIXED_TYPE | UPF_IOREMAP;
+ uart.port.type = PORT_16550A;
+
+ return serial8250_register_8250_port(&uart);
+}
+
+static int
+ptp_ocp_register_serial(struct ptp_ocp *bp, struct ocp_resource *r)
+{
+ int port;
+
+ if (r->irq_vec > bp->n_irqs) {
+ dev_err(&bp->pdev->dev, "serial device irq %d out of range\n",
+ r->irq_vec);
+ return 0;
}
- bp->base = pci_ioremap_bar(pdev, 0);
- if (!bp->base) {
- dev_err(&pdev->dev, "io_remap bar0\n");
- err = -ENOMEM;
- goto out_release_regions;
+ port = ptp_ocp_serial_line(bp, r);
+ if (port < 0)
+ return port;
+
+ bp_assign_entry(bp, r, port);
+
+ return 0;
+}
+
+static int
+ptp_ocp_register_mem(struct ptp_ocp *bp, struct ocp_resource *r)
+{
+ void __iomem *mem;
+
+ mem = ptp_ocp_get_mem(bp, r);
+ if (!mem)
+ return -EINVAL;
+
+ bp_assign_entry(bp, r, mem);
+
+ return 0;
+}
+
+/* FB specific board initializers; last "resource" registered. */
+static int
+ptp_ocp_fb_board_init(struct ptp_ocp *bp, struct ocp_resource *r)
+{
+ bp->flash_start = 1024 * 4096;
+
+ return ptp_ocp_init_clock(bp);
+}
+
+static int
+ptp_ocp_register_resources(struct ptp_ocp *bp, kernel_ulong_t driver_data)
+{
+ struct ocp_resource *r, *table;
+ int err = 0;
+
+ table = (struct ocp_resource *)driver_data;
+ for (r = table; r->setup; r++) {
+ err = r->setup(bp, r);
+ if (err)
+ break;
+ }
+ return err;
+}
+
+static ssize_t
+serialnum_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct ptp_ocp *bp = dev_get_drvdata(dev);
+
+ if (!bp->has_serial)
+ ptp_ocp_get_serial_number(bp);
+
+ return sysfs_emit(buf, "%pM\n", bp->serial);
+}
+static DEVICE_ATTR_RO(serialnum);
+
+static ssize_t
+gnss_sync_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct ptp_ocp *bp = dev_get_drvdata(dev);
+ ssize_t ret;
+
+ if (bp->gnss_lost)
+ ret = sysfs_emit(buf, "LOST @ %ptT\n", &bp->gnss_lost);
+ else
+ ret = sysfs_emit(buf, "SYNC\n");
+
+ return ret;
+}
+static DEVICE_ATTR_RO(gnss_sync);
+
+static ssize_t
+clock_source_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct ptp_ocp *bp = dev_get_drvdata(dev);
+ const char *p;
+ u32 select;
+
+ select = ioread32(&bp->reg->select);
+ p = ptp_ocp_clock_name_from_val(select >> 16);
+
+ return sysfs_emit(buf, "%s\n", p);
+}
+
+static ssize_t
+clock_source_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct ptp_ocp *bp = dev_get_drvdata(dev);
+ unsigned long flags;
+ int val;
+
+ val = ptp_ocp_clock_val_from_name(buf);
+ if (val < 0)
+ return val;
+
+ spin_lock_irqsave(&bp->lock, flags);
+ iowrite32(val, &bp->reg->select);
+ spin_unlock_irqrestore(&bp->lock, flags);
+
+ return count;
+}
+static DEVICE_ATTR_RW(clock_source);
+
+static ssize_t
+available_clock_sources_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ const char *clk;
+ ssize_t count;
+ int i;
+
+ count = 0;
+ for (i = 0; i < ARRAY_SIZE(ptp_ocp_clock); i++) {
+ clk = ptp_ocp_clock[i].name;
+ count += sysfs_emit_at(buf, count, "%s ", clk);
+ }
+ if (count)
+ count--;
+ count += sysfs_emit_at(buf, count, "\n");
+ return count;
+}
+static DEVICE_ATTR_RO(available_clock_sources);
+
+static struct attribute *timecard_attrs[] = {
+ &dev_attr_serialnum.attr,
+ &dev_attr_gnss_sync.attr,
+ &dev_attr_clock_source.attr,
+ &dev_attr_available_clock_sources.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(timecard);
+
+static void
+ptp_ocp_dev_release(struct device *dev)
+{
+ struct ptp_ocp *bp = dev_get_drvdata(dev);
+
+ mutex_lock(&ptp_ocp_lock);
+ idr_remove(&ptp_ocp_idr, bp->id);
+ mutex_unlock(&ptp_ocp_lock);
+}
+
+static int
+ptp_ocp_device_init(struct ptp_ocp *bp, struct pci_dev *pdev)
+{
+ int err;
+
+ mutex_lock(&ptp_ocp_lock);
+ err = idr_alloc(&ptp_ocp_idr, bp, 0, 0, GFP_KERNEL);
+ mutex_unlock(&ptp_ocp_lock);
+ if (err < 0) {
+ dev_err(&pdev->dev, "idr_alloc failed: %d\n", err);
+ return err;
}
- bp->reg = bp->base + OCP_REGISTER_OFFSET;
- bp->tod = bp->base + TOD_REGISTER_OFFSET;
+ bp->id = err;
+
bp->ptp_info = ptp_ocp_clock_info;
spin_lock_init(&bp->lock);
+ bp->gnss_port = -1;
+ bp->mac_port = -1;
+ bp->pdev = pdev;
+
+ device_initialize(&bp->dev);
+ dev_set_name(&bp->dev, "ocp%d", bp->id);
+ bp->dev.class = &timecard_class;
+ bp->dev.parent = &pdev->dev;
+ bp->dev.release = ptp_ocp_dev_release;
+ dev_set_drvdata(&bp->dev, bp);
+
+ err = device_add(&bp->dev);
+ if (err) {
+ dev_err(&bp->dev, "device add failed: %d\n", err);
+ goto out;
+ }
+
+ pci_set_drvdata(pdev, bp);
+
+ return 0;
+
+out:
+ ptp_ocp_dev_release(&bp->dev);
+ put_device(&bp->dev);
+ return err;
+}
+
+static void
+ptp_ocp_symlink(struct ptp_ocp *bp, struct device *child, const char *link)
+{
+ struct device *dev = &bp->dev;
+
+ if (sysfs_create_link(&dev->kobj, &child->kobj, link))
+ dev_err(dev, "%s symlink failed\n", link);
+}
+
+static void
+ptp_ocp_link_child(struct ptp_ocp *bp, const char *name, const char *link)
+{
+ struct device *dev, *child;
+
+ dev = &bp->pdev->dev;
+
+ child = device_find_child_by_name(dev, name);
+ if (!child) {
+ dev_err(dev, "Could not find device %s\n", name);
+ return;
+ }
+
+ ptp_ocp_symlink(bp, child, link);
+ put_device(child);
+}
+
+static int
+ptp_ocp_complete(struct ptp_ocp *bp)
+{
+ struct pps_device *pps;
+ char buf[32];
+
+ if (bp->gnss_port != -1) {
+ sprintf(buf, "ttyS%d", bp->gnss_port);
+ ptp_ocp_link_child(bp, buf, "ttyGNSS");
+ }
+ if (bp->mac_port != -1) {
+ sprintf(buf, "ttyS%d", bp->mac_port);
+ ptp_ocp_link_child(bp, buf, "ttyMAC");
+ }
+ sprintf(buf, "ptp%d", ptp_clock_index(bp->ptp));
+ ptp_ocp_link_child(bp, buf, "ptp");
+
+ pps = pps_lookup_dev(bp->ptp);
+ if (pps)
+ ptp_ocp_symlink(bp, pps->dev, "pps");
+
+ if (device_add_groups(&bp->dev, timecard_groups))
+ pr_err("device add groups failed\n");
- err = ptp_ocp_check_clock(bp);
+ return 0;
+}
+
+static void
+ptp_ocp_resource_summary(struct ptp_ocp *bp)
+{
+ struct device *dev = &bp->pdev->dev;
+
+ if (bp->image) {
+ u32 ver = ioread32(&bp->image->version);
+
+ dev_info(dev, "version %x\n", ver);
+ if (ver & 0xffff)
+ dev_info(dev, "regular image, version %d\n",
+ ver & 0xffff);
+ else
+ dev_info(dev, "golden image, version %d\n",
+ ver >> 16);
+ }
+ if (bp->gnss_port != -1)
+ dev_info(dev, "GNSS @ /dev/ttyS%d 115200\n", bp->gnss_port);
+ if (bp->mac_port != -1)
+ dev_info(dev, "MAC @ /dev/ttyS%d 57600\n", bp->mac_port);
+}
+
+static void
+ptp_ocp_detach_sysfs(struct ptp_ocp *bp)
+{
+ struct device *dev = &bp->dev;
+
+ sysfs_remove_link(&dev->kobj, "ttyGNSS");
+ sysfs_remove_link(&dev->kobj, "ttyMAC");
+ sysfs_remove_link(&dev->kobj, "ptp");
+ sysfs_remove_link(&dev->kobj, "pps");
+ device_remove_groups(dev, timecard_groups);
+}
+
+static void
+ptp_ocp_detach(struct ptp_ocp *bp)
+{
+ ptp_ocp_detach_sysfs(bp);
+ if (timer_pending(&bp->watchdog))
+ del_timer_sync(&bp->watchdog);
+ if (bp->ts0)
+ ptp_ocp_unregister_ext(bp->ts0);
+ if (bp->ts1)
+ ptp_ocp_unregister_ext(bp->ts1);
+ if (bp->pps)
+ ptp_ocp_unregister_ext(bp->pps);
+ if (bp->gnss_port != -1)
+ serial8250_unregister_port(bp->gnss_port);
+ if (bp->mac_port != -1)
+ serial8250_unregister_port(bp->mac_port);
+ if (bp->spi_flash)
+ platform_device_unregister(bp->spi_flash);
+ if (bp->i2c_ctrl)
+ platform_device_unregister(bp->i2c_ctrl);
+ if (bp->i2c_clk)
+ clk_hw_unregister_fixed_rate(bp->i2c_clk);
+ if (bp->n_irqs)
+ pci_free_irq_vectors(bp->pdev);
+ if (bp->ptp)
+ ptp_clock_unregister(bp->ptp);
+ device_unregister(&bp->dev);
+}
+
+static int
+ptp_ocp_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+ struct devlink *devlink;
+ struct ptp_ocp *bp;
+ int err;
+
+ devlink = devlink_alloc(&ptp_ocp_devlink_ops, sizeof(*bp), &pdev->dev);
+ if (!devlink) {
+ dev_err(&pdev->dev, "devlink_alloc failed\n");
+ return -ENOMEM;
+ }
+
+ err = devlink_register(devlink);
+ if (err)
+ goto out_free;
+
+ err = pci_enable_device(pdev);
+ if (err) {
+ dev_err(&pdev->dev, "pci_enable_device\n");
+ goto out_unregister;
+ }
+
+ bp = devlink_priv(devlink);
+ err = ptp_ocp_device_init(bp, pdev);
+ if (err)
+ goto out_unregister;
+
+ /* compat mode.
+ * Older FPGA firmware only returns 2 irq's.
+ * allow this - if not all of the IRQ's are returned, skip the
+ * extra devices and just register the clock.
+ */
+ err = pci_alloc_irq_vectors(pdev, 1, 10, PCI_IRQ_MSI | PCI_IRQ_MSIX);
+ if (err < 0) {
+ dev_err(&pdev->dev, "alloc_irq_vectors err: %d\n", err);
+ goto out;
+ }
+ bp->n_irqs = err;
+ pci_set_master(pdev);
+
+ err = ptp_ocp_register_resources(bp, id->driver_data);
if (err)
goto out;
bp->ptp = ptp_clock_register(&bp->ptp_info, &pdev->dev);
if (IS_ERR(bp->ptp)) {
- dev_err(&pdev->dev, "ptp_clock_register\n");
err = PTR_ERR(bp->ptp);
+ dev_err(&pdev->dev, "ptp_clock_register: %d\n", err);
+ bp->ptp = NULL;
goto out;
}
+ err = ptp_ocp_complete(bp);
+ if (err)
+ goto out;
+
ptp_ocp_info(bp);
+ ptp_ocp_resource_summary(bp);
return 0;
out:
- pci_iounmap(pdev, bp->base);
-out_release_regions:
- pci_release_regions(pdev);
-out_disable:
+ ptp_ocp_detach(bp);
pci_disable_device(pdev);
+ pci_set_drvdata(pdev, NULL);
+out_unregister:
+ devlink_unregister(devlink);
out_free:
- kfree(bp);
+ devlink_free(devlink);
return err;
}
ptp_ocp_remove(struct pci_dev *pdev)
{
struct ptp_ocp *bp = pci_get_drvdata(pdev);
+ struct devlink *devlink = priv_to_devlink(bp);
- ptp_clock_unregister(bp->ptp);
- pci_iounmap(pdev, bp->base);
- pci_release_regions(pdev);
+ ptp_ocp_detach(bp);
pci_disable_device(pdev);
pci_set_drvdata(pdev, NULL);
- kfree(bp);
+
+ devlink_unregister(devlink);
+ devlink_free(devlink);
}
static struct pci_driver ptp_ocp_driver = {
.remove = ptp_ocp_remove,
};
+static int
+ptp_ocp_i2c_notifier_call(struct notifier_block *nb,
+ unsigned long action, void *data)
+{
+ struct device *dev, *child = data;
+ struct ptp_ocp *bp;
+ bool add;
+
+ switch (action) {
+ case BUS_NOTIFY_ADD_DEVICE:
+ case BUS_NOTIFY_DEL_DEVICE:
+ add = action == BUS_NOTIFY_ADD_DEVICE;
+ break;
+ default:
+ return 0;
+ }
+
+ if (!i2c_verify_adapter(child))
+ return 0;
+
+ dev = child;
+ while ((dev = dev->parent))
+ if (dev->driver && !strcmp(dev->driver->name, KBUILD_MODNAME))
+ goto found;
+ return 0;
+
+found:
+ bp = dev_get_drvdata(dev);
+ if (add)
+ ptp_ocp_symlink(bp, child, "i2c");
+ else
+ sysfs_remove_link(&bp->dev.kobj, "i2c");
+
+ return 0;
+}
+
+static struct notifier_block ptp_ocp_i2c_notifier = {
+ .notifier_call = ptp_ocp_i2c_notifier_call,
+};
+
static int __init
ptp_ocp_init(void)
{
+ const char *what;
int err;
+ what = "timecard class";
+ err = class_register(&timecard_class);
+ if (err)
+ goto out;
+
+ what = "i2c notifier";
+ err = bus_register_notifier(&i2c_bus_type, &ptp_ocp_i2c_notifier);
+ if (err)
+ goto out_notifier;
+
+ what = "ptp_ocp driver";
err = pci_register_driver(&ptp_ocp_driver);
+ if (err)
+ goto out_register;
+
+ return 0;
+
+out_register:
+ bus_unregister_notifier(&i2c_bus_type, &ptp_ocp_i2c_notifier);
+out_notifier:
+ class_unregister(&timecard_class);
+out:
+ pr_err(KBUILD_MODNAME ": failed to register %s: %d\n", what, err);
return err;
}
static void __exit
ptp_ocp_fini(void)
{
+ bus_unregister_notifier(&i2c_bus_type, &ptp_ocp_i2c_notifier);
pci_unregister_driver(&ptp_ocp_driver);
+ class_unregister(&timecard_class);
}
module_init(ptp_ocp_init);
def_tristate y
prompt "qeth layer 2 device support"
depends on QETH
+ depends on BRIDGE || BRIDGE=n
help
Select this option to be able to run qeth devices in layer 2 mode.
To compile as a module, choose M. The module name is qeth_l2.
return rc;
}
+struct qeth_l2_br2dev_event_work {
+ struct work_struct work;
+ struct net_device *br_dev;
+ struct net_device *lsync_dev;
+ struct net_device *dst_dev;
+ unsigned long event;
+ unsigned char addr[ETH_ALEN];
+};
+
+static const struct net_device_ops qeth_l2_netdev_ops;
+
+static bool qeth_l2_must_learn(struct net_device *netdev,
+ struct net_device *dstdev)
+{
+ struct qeth_priv *priv;
+
+ priv = netdev_priv(netdev);
+ return (netdev != dstdev &&
+ (priv->brport_features & BR_LEARNING_SYNC) &&
+ !(br_port_flag_is_set(netdev, BR_ISOLATED) &&
+ br_port_flag_is_set(dstdev, BR_ISOLATED)) &&
+ netdev->netdev_ops == &qeth_l2_netdev_ops);
+}
+
+/**
+ * qeth_l2_br2dev_worker() - update local MACs
+ * @work: bridge to device FDB update
+ *
+ * Update local MACs of a learning_sync bridgeport so it can receive
+ * messages for a destination port.
+ * In case of an isolated learning_sync port, also update its isolated
+ * siblings.
+ */
+static void qeth_l2_br2dev_worker(struct work_struct *work)
+{
+ struct qeth_l2_br2dev_event_work *br2dev_event_work =
+ container_of(work, struct qeth_l2_br2dev_event_work, work);
+ struct net_device *lsyncdev = br2dev_event_work->lsync_dev;
+ struct net_device *dstdev = br2dev_event_work->dst_dev;
+ struct net_device *brdev = br2dev_event_work->br_dev;
+ unsigned long event = br2dev_event_work->event;
+ unsigned char *addr = br2dev_event_work->addr;
+ struct qeth_card *card = lsyncdev->ml_priv;
+ struct net_device *lowerdev;
+ struct list_head *iter;
+ int err = 0;
+
+ kfree(br2dev_event_work);
+ QETH_CARD_TEXT_(card, 4, "b2dw%04x", event);
+ QETH_CARD_TEXT_(card, 4, "ma%012lx", ether_addr_to_u64(addr));
+
+ rcu_read_lock();
+ /* Verify preconditions are still valid: */
+ if (!netif_is_bridge_port(lsyncdev) ||
+ brdev != netdev_master_upper_dev_get_rcu(lsyncdev))
+ goto unlock;
+ if (!qeth_l2_must_learn(lsyncdev, dstdev))
+ goto unlock;
+
+ if (br_port_flag_is_set(lsyncdev, BR_ISOLATED)) {
+ /* Update lsyncdev and its isolated sibling(s): */
+ iter = &brdev->adj_list.lower;
+ lowerdev = netdev_next_lower_dev_rcu(brdev, &iter);
+ while (lowerdev) {
+ if (br_port_flag_is_set(lowerdev, BR_ISOLATED)) {
+ switch (event) {
+ case SWITCHDEV_FDB_ADD_TO_DEVICE:
+ err = dev_uc_add(lowerdev, addr);
+ break;
+ case SWITCHDEV_FDB_DEL_TO_DEVICE:
+ err = dev_uc_del(lowerdev, addr);
+ break;
+ default:
+ break;
+ }
+ if (err) {
+ QETH_CARD_TEXT(card, 2, "b2derris");
+ QETH_CARD_TEXT_(card, 2,
+ "err%02x%03d", event,
+ lowerdev->ifindex);
+ }
+ }
+ lowerdev = netdev_next_lower_dev_rcu(brdev, &iter);
+ }
+ } else {
+ switch (event) {
+ case SWITCHDEV_FDB_ADD_TO_DEVICE:
+ err = dev_uc_add(lsyncdev, addr);
+ break;
+ case SWITCHDEV_FDB_DEL_TO_DEVICE:
+ err = dev_uc_del(lsyncdev, addr);
+ break;
+ default:
+ break;
+ }
+ if (err)
+ QETH_CARD_TEXT_(card, 2, "b2derr%02x", event);
+ }
+
+unlock:
+ rcu_read_unlock();
+ dev_put(brdev);
+ dev_put(lsyncdev);
+ dev_put(dstdev);
+}
+
+static int qeth_l2_br2dev_queue_work(struct net_device *brdev,
+ struct net_device *lsyncdev,
+ struct net_device *dstdev,
+ unsigned long event,
+ const unsigned char *addr)
+{
+ struct qeth_l2_br2dev_event_work *worker_data;
+ struct qeth_card *card;
+
+ worker_data = kzalloc(sizeof(*worker_data), GFP_ATOMIC);
+ if (!worker_data)
+ return -ENOMEM;
+ INIT_WORK(&worker_data->work, qeth_l2_br2dev_worker);
+ worker_data->br_dev = brdev;
+ worker_data->lsync_dev = lsyncdev;
+ worker_data->dst_dev = dstdev;
+ worker_data->event = event;
+ ether_addr_copy(worker_data->addr, addr);
+
+ card = lsyncdev->ml_priv;
+ /* Take a reference on the sw port devices and the bridge */
+ dev_hold(brdev);
+ dev_hold(lsyncdev);
+ dev_hold(dstdev);
+ queue_work(card->event_wq, &worker_data->work);
+ return 0;
+}
+
+/* Called under rtnl_lock */
+static int qeth_l2_switchdev_event(struct notifier_block *unused,
+ unsigned long event, void *ptr)
+{
+ struct net_device *dstdev, *brdev, *lowerdev;
+ struct switchdev_notifier_fdb_info *fdb_info;
+ struct switchdev_notifier_info *info = ptr;
+ struct list_head *iter;
+ struct qeth_card *card;
+ int rc;
+
+ if (!(event == SWITCHDEV_FDB_ADD_TO_DEVICE ||
+ event == SWITCHDEV_FDB_DEL_TO_DEVICE))
+ return NOTIFY_DONE;
+
+ dstdev = switchdev_notifier_info_to_dev(info);
+ brdev = netdev_master_upper_dev_get_rcu(dstdev);
+ if (!brdev || !netif_is_bridge_master(brdev))
+ return NOTIFY_DONE;
+ fdb_info = container_of(info,
+ struct switchdev_notifier_fdb_info,
+ info);
+ iter = &brdev->adj_list.lower;
+ lowerdev = netdev_next_lower_dev_rcu(brdev, &iter);
+ while (lowerdev) {
+ if (qeth_l2_must_learn(lowerdev, dstdev)) {
+ card = lowerdev->ml_priv;
+ QETH_CARD_TEXT_(card, 4, "b2dqw%03x", event);
+ rc = qeth_l2_br2dev_queue_work(brdev, lowerdev,
+ dstdev, event,
+ fdb_info->addr);
+ if (rc) {
+ QETH_CARD_TEXT(card, 2, "b2dqwerr");
+ return NOTIFY_BAD;
+ }
+ }
+ lowerdev = netdev_next_lower_dev_rcu(brdev, &iter);
+ }
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block qeth_l2_sw_notifier = {
+ .notifier_call = qeth_l2_switchdev_event,
+};
+
+static refcount_t qeth_l2_switchdev_notify_refcnt;
+
+/* Called under rtnl_lock */
+static void qeth_l2_br2dev_get(void)
+{
+ int rc;
+
+ if (!refcount_inc_not_zero(&qeth_l2_switchdev_notify_refcnt)) {
+ rc = register_switchdev_notifier(&qeth_l2_sw_notifier);
+ if (rc) {
+ QETH_DBF_MESSAGE(2,
+ "failed to register qeth_l2_sw_notifier: %d\n",
+ rc);
+ } else {
+ refcount_set(&qeth_l2_switchdev_notify_refcnt, 1);
+ QETH_DBF_MESSAGE(2, "qeth_l2_sw_notifier registered\n");
+ }
+ }
+ QETH_DBF_TEXT_(SETUP, 2, "b2d+%04d",
+ qeth_l2_switchdev_notify_refcnt.refs.counter);
+}
+
+/* Called under rtnl_lock */
+static void qeth_l2_br2dev_put(void)
+{
+ int rc;
+
+ if (refcount_dec_and_test(&qeth_l2_switchdev_notify_refcnt)) {
+ rc = unregister_switchdev_notifier(&qeth_l2_sw_notifier);
+ if (rc) {
+ QETH_DBF_MESSAGE(2,
+ "failed to unregister qeth_l2_sw_notifier: %d\n",
+ rc);
+ } else {
+ QETH_DBF_MESSAGE(2,
+ "qeth_l2_sw_notifier unregistered\n");
+ }
+ }
+ QETH_DBF_TEXT_(SETUP, 2, "b2d-%04d",
+ qeth_l2_switchdev_notify_refcnt.refs.counter);
+}
+
static int qeth_l2_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
struct net_device *dev, u32 filter_mask,
int nlflags)
} else if (enable) {
qeth_l2_set_pnso_mode(card, QETH_PNSO_ADDR_INFO);
rc = qeth_l2_dev2br_an_set(card, true);
- if (rc)
+ if (rc) {
qeth_l2_set_pnso_mode(card, QETH_PNSO_NONE);
- else
+ } else {
priv->brport_features |= BR_LEARNING_SYNC;
+ qeth_l2_br2dev_get();
+ }
} else {
rc = qeth_l2_dev2br_an_set(card, false);
if (!rc) {
qeth_l2_set_pnso_mode(card, QETH_PNSO_NONE);
priv->brport_features ^= BR_LEARNING_SYNC;
qeth_l2_dev2br_fdb_flush(card);
+ qeth_l2_br2dev_put();
}
}
mutex_unlock(&card->sbp_lock);
static void qeth_l2_remove_device(struct ccwgroup_device *gdev)
{
struct qeth_card *card = dev_get_drvdata(&gdev->dev);
+ struct qeth_priv *priv;
if (gdev->dev.type != &qeth_l2_devtype)
device_remove_groups(&gdev->dev, qeth_l2_attr_groups);
qeth_set_offline(card, card->discipline, false);
cancel_work_sync(&card->close_dev_work);
- if (card->dev->reg_state == NETREG_REGISTERED)
+ if (card->dev->reg_state == NETREG_REGISTERED) {
+ priv = netdev_priv(card->dev);
+ if (priv->brport_features & BR_LEARNING_SYNC) {
+ rtnl_lock();
+ qeth_l2_br2dev_put();
+ rtnl_unlock();
+ }
unregister_netdev(card->dev);
+ }
}
static int qeth_l2_set_online(struct qeth_card *card, bool carrier_ok)
static int __init qeth_l2_init(void)
{
pr_info("register layer 2 discipline\n");
+ refcount_set(&qeth_l2_switchdev_notify_refcnt, 0);
return 0;
}
//#endif
clear_bit(SCpnt->device->id * 8 +
(u8)(SCpnt->device->lun & 0x7), host->busyluns);
+ fallthrough;
/*
* We found the command, and cleared it out. Either
case IS_COMPLETE:
break;
}
+ break;
default:
break;
if (!h->ctlr)
err = SCSI_DH_RES_TEMP_UNAVAIL;
else {
- list_add_rcu(&h->node, &h->ctlr->dh_list);
h->sdev = sdev;
+ list_add_rcu(&h->node, &h->ctlr->dh_list);
}
spin_unlock(&list_lock);
err = SCSI_DH_OK;
spin_lock(&list_lock);
if (h->ctlr) {
list_del_rcu(&h->node);
- h->sdev = NULL;
kref_put(&h->ctlr->kref, release_controller);
}
spin_unlock(&list_lock);
sdev->handler_data = NULL;
+ synchronize_rcu();
kfree(h);
}
for (i = 0; i < size; ++i) {
struct ibmvfc_event *evt = &pool->events[i];
+ /*
+ * evt->active states
+ * 1 = in flight
+ * 0 = being completed
+ * -1 = free/freed
+ */
+ atomic_set(&evt->active, -1);
atomic_set(&evt->free, 1);
evt->crq.valid = 0x80;
evt->crq.ioba = cpu_to_be64(pool->iu_token + (sizeof(*evt->xfer_iu) * i));
BUG_ON(!ibmvfc_valid_event(pool, evt));
BUG_ON(atomic_inc_return(&evt->free) != 1);
+ BUG_ON(atomic_dec_and_test(&evt->active));
spin_lock_irqsave(&evt->queue->l_lock, flags);
list_add_tail(&evt->queue_list, &evt->queue->free);
**/
static void ibmvfc_fail_request(struct ibmvfc_event *evt, int error_code)
{
+ /*
+ * Anything we are failing should still be active. Otherwise, it
+ * implies we already got a response for the command and are doing
+ * something bad like double completing it.
+ */
+ BUG_ON(!atomic_dec_and_test(&evt->active));
if (evt->cmnd) {
evt->cmnd->result = (error_code << 16);
evt->done = ibmvfc_scsi_eh_done;
evt->done(evt);
} else {
+ atomic_set(&evt->active, 1);
spin_unlock_irqrestore(&evt->queue->l_lock, flags);
ibmvfc_trc_start(evt);
}
return;
}
- if (unlikely(atomic_read(&evt->free))) {
+ if (unlikely(atomic_dec_if_positive(&evt->active))) {
dev_err(vhost->dev, "Received duplicate correlation_token 0x%08llx!\n",
crq->ioba);
return;
return;
}
- if (unlikely(atomic_read(&evt->free))) {
+ if (unlikely(atomic_dec_if_positive(&evt->active))) {
dev_err(vhost->dev, "Received duplicate correlation_token 0x%08llx!\n",
crq->ioba);
return;
struct ibmvfc_target *tgt;
struct scsi_cmnd *cmnd;
atomic_t free;
+ atomic_t active;
union ibmvfc_iu *xfer_iu;
void (*done)(struct ibmvfc_event *evt);
void (*_done)(struct ibmvfc_event *evt);
mimd_t mimd;
uint32_t adapno;
int iterator;
-
+ bool is_found;
if (copy_from_user(&mimd, umimd, sizeof(mimd_t))) {
*rval = -EFAULT;
adapter = NULL;
iterator = 0;
+ is_found = false;
list_for_each_entry(adapter, &adapters_list_g, list) {
- if (iterator++ == adapno) break;
+ if (iterator++ == adapno) {
+ is_found = true;
+ break;
+ }
}
- if (!adapter) {
+ if (!is_found) {
*rval = -ENODEV;
return NULL;
}
uint32_t adapno;
int iterator;
mraid_mmadp_t* adapter;
+ bool is_found;
/*
* When the kioc returns from driver, make sure it still doesn't
iterator = 0;
adapter = NULL;
adapno = kioc->adapno;
+ is_found = false;
con_log(CL_ANN, ( KERN_WARNING "megaraid cmm: completed "
"ioctl that was timedout before\n"));
list_for_each_entry(adapter, &adapters_list_g, list) {
- if (iterator++ == adapno) break;
+ if (iterator++ == adapno) {
+ is_found = true;
+ break;
+ }
}
kioc->timedout = 0;
- if (adapter) {
+ if (is_found)
mraid_mm_dealloc_kioc( adapter, kioc );
- }
+
}
else {
wake_up(&wait_q);
}
/**
- * _base_free_irq - free irq
+ * mpt3sas_base_free_irq - free irq
* @ioc: per adapter object
*
* Freeing respective reply_queue from the list.
*/
-static void
-_base_free_irq(struct MPT3SAS_ADAPTER *ioc)
+void
+mpt3sas_base_free_irq(struct MPT3SAS_ADAPTER *ioc)
{
struct adapter_reply_queue *reply_q, *next;
}
/**
- * _base_disable_msix - disables msix
+ * mpt3sas_base_disable_msix - disables msix
* @ioc: per adapter object
*
*/
-static void
-_base_disable_msix(struct MPT3SAS_ADAPTER *ioc)
+void
+mpt3sas_base_disable_msix(struct MPT3SAS_ADAPTER *ioc)
{
if (!ioc->msix_enable)
return;
for (i = 0; i < ioc->reply_queue_count; i++) {
r = _base_request_irq(ioc, i);
if (r) {
- _base_free_irq(ioc);
- _base_disable_msix(ioc);
+ mpt3sas_base_free_irq(ioc);
+ mpt3sas_base_disable_msix(ioc);
goto try_ioapic;
}
}
dexitprintk(ioc, ioc_info(ioc, "%s\n", __func__));
- _base_free_irq(ioc);
- _base_disable_msix(ioc);
+ mpt3sas_base_free_irq(ioc);
+ mpt3sas_base_disable_msix(ioc);
kfree(ioc->replyPostRegisterIndex);
ioc->replyPostRegisterIndex = NULL;
}
/**
- * _base_make_ioc_ready - put controller in READY state
+ * mpt3sas_base_make_ioc_ready - put controller in READY state
* @ioc: per adapter object
* @type: FORCE_BIG_HAMMER or SOFT_RESET
*
* Return: 0 for success, non-zero for failure.
*/
-static int
-_base_make_ioc_ready(struct MPT3SAS_ADAPTER *ioc, enum reset_type type)
+int
+mpt3sas_base_make_ioc_ready(struct MPT3SAS_ADAPTER *ioc, enum reset_type type)
{
u32 ioc_state;
int rc;
if (ioc->chip_phys && ioc->chip) {
mpt3sas_base_mask_interrupts(ioc);
ioc->shost_recovery = 1;
- _base_make_ioc_ready(ioc, SOFT_RESET);
+ mpt3sas_base_make_ioc_ready(ioc, SOFT_RESET);
ioc->shost_recovery = 0;
}
ioc->build_sg_mpi = &_base_build_sg;
ioc->build_zero_len_sge_mpi = &_base_build_zero_len_sge;
- r = _base_make_ioc_ready(ioc, SOFT_RESET);
+ r = mpt3sas_base_make_ioc_ready(ioc, SOFT_RESET);
if (r)
goto out_free_resources;
_base_pre_reset_handler(ioc);
mpt3sas_wait_for_commands_to_complete(ioc);
mpt3sas_base_mask_interrupts(ioc);
- r = _base_make_ioc_ready(ioc, type);
+ r = mpt3sas_base_make_ioc_ready(ioc, type);
if (r)
goto out;
_base_clear_outstanding_commands(ioc);
status, mpi_request, sz); } while (0)
int mpt3sas_wait_for_ioc(struct MPT3SAS_ADAPTER *ioc, int wait_count);
+int
+mpt3sas_base_make_ioc_ready(struct MPT3SAS_ADAPTER *ioc, enum reset_type type);
+void mpt3sas_base_free_irq(struct MPT3SAS_ADAPTER *ioc);
+void mpt3sas_base_disable_msix(struct MPT3SAS_ADAPTER *ioc);
/* scsih shared API */
struct scsi_cmnd *mpt3sas_scsih_scsi_lookup_get(struct MPT3SAS_ADAPTER *ioc,
_scsih_ir_shutdown(ioc);
_scsih_nvme_shutdown(ioc);
- mpt3sas_base_detach(ioc);
+ mpt3sas_base_mask_interrupts(ioc);
+ ioc->shost_recovery = 1;
+ mpt3sas_base_make_ioc_ready(ioc, SOFT_RESET);
+ ioc->shost_recovery = 0;
+ mpt3sas_base_free_irq(ioc);
+ mpt3sas_base_disable_msix(ioc);
}
void pm8001_task_done(struct sas_task *task)
{
- if (!del_timer(&task->slow_task->timer))
- return;
+ del_timer(&task->slow_task->timer);
complete(&task->slow_task->completion);
}
{
struct sas_task_slow *slow = from_timer(slow, t, timer);
struct sas_task *task = slow->task;
+ unsigned long flags;
- task->task_state_flags |= SAS_TASK_STATE_ABORTED;
- complete(&task->slow_task->completion);
+ spin_lock_irqsave(&task->task_state_lock, flags);
+ if (!(task->task_state_flags & SAS_TASK_STATE_DONE)) {
+ task->task_state_flags |= SAS_TASK_STATE_ABORTED;
+ complete(&task->slow_task->completion);
+ }
+ spin_unlock_irqrestore(&task->task_state_lock, flags);
}
#define PM8001_TASK_TIMEOUT 20
}
res = -TMF_RESP_FUNC_FAILED;
/* Even TMF timed out, return direct. */
- if ((task->task_state_flags & SAS_TASK_STATE_ABORTED)) {
- if (!(task->task_state_flags & SAS_TASK_STATE_DONE)) {
- pm8001_dbg(pm8001_ha, FAIL,
- "TMF task[%x]timeout.\n",
- tmf->tmf);
- goto ex_err;
- }
+ if (task->task_state_flags & SAS_TASK_STATE_ABORTED) {
+ pm8001_dbg(pm8001_ha, FAIL, "TMF task[%x]timeout.\n",
+ tmf->tmf);
+ goto ex_err;
}
if (task->task_status.resp == SAS_TASK_COMPLETE &&
wait_for_completion(&task->slow_task->completion);
res = TMF_RESP_FUNC_FAILED;
/* Even TMF timed out, return direct. */
- if ((task->task_state_flags & SAS_TASK_STATE_ABORTED)) {
- if (!(task->task_state_flags & SAS_TASK_STATE_DONE)) {
- pm8001_dbg(pm8001_ha, FAIL,
- "TMF task timeout.\n");
- goto ex_err;
- }
+ if (task->task_state_flags & SAS_TASK_STATE_ABORTED) {
+ pm8001_dbg(pm8001_ha, FAIL, "TMF task timeout.\n");
+ goto ex_err;
}
if (task->task_status.resp == SAS_TASK_COMPLETE &&
error = shost->hostt->target_alloc(starget);
if(error) {
- dev_printk(KERN_ERR, dev, "target allocation failed, error %d\n", error);
+ if (error != -ENXIO)
+ dev_err(dev, "target allocation failed, error %d\n", error);
/* don't want scsi_target_reap to do the final
* put because it will be under the host lock */
scsi_target_destroy(starget);
mutex_lock(&sdev->state_mutex);
ret = scsi_device_set_state(sdev, state);
/*
- * If the device state changes to SDEV_RUNNING, we need to run
- * the queue to avoid I/O hang.
+ * If the device state changes to SDEV_RUNNING, we need to
+ * rescan the device to revalidate it, and run the queue to
+ * avoid I/O hang.
*/
- if (ret == 0 && state == SDEV_RUNNING)
+ if (ret == 0 && state == SDEV_RUNNING) {
+ scsi_rescan_device(dev);
blk_mq_run_hw_queues(sdev->request_queue, true);
+ }
mutex_unlock(&sdev->state_mutex);
return ret == 0 ? count : -EINVAL;
struct device *dev = container_of(kobj, struct device, kobj);
struct iscsi_iface *iface = iscsi_dev_to_iface(dev);
struct iscsi_transport *t = iface->transport;
- int param;
- int param_type;
+ int param = -1;
if (attr == &dev_attr_iface_enabled.attr)
param = ISCSI_NET_PARAM_IFACE_ENABLE;
- else if (attr == &dev_attr_iface_vlan_id.attr)
- param = ISCSI_NET_PARAM_VLAN_ID;
- else if (attr == &dev_attr_iface_vlan_priority.attr)
- param = ISCSI_NET_PARAM_VLAN_PRIORITY;
- else if (attr == &dev_attr_iface_vlan_enabled.attr)
- param = ISCSI_NET_PARAM_VLAN_ENABLED;
- else if (attr == &dev_attr_iface_mtu.attr)
- param = ISCSI_NET_PARAM_MTU;
- else if (attr == &dev_attr_iface_port.attr)
- param = ISCSI_NET_PARAM_PORT;
- else if (attr == &dev_attr_iface_ipaddress_state.attr)
- param = ISCSI_NET_PARAM_IPADDR_STATE;
- else if (attr == &dev_attr_iface_delayed_ack_en.attr)
- param = ISCSI_NET_PARAM_DELAYED_ACK_EN;
- else if (attr == &dev_attr_iface_tcp_nagle_disable.attr)
- param = ISCSI_NET_PARAM_TCP_NAGLE_DISABLE;
- else if (attr == &dev_attr_iface_tcp_wsf_disable.attr)
- param = ISCSI_NET_PARAM_TCP_WSF_DISABLE;
- else if (attr == &dev_attr_iface_tcp_wsf.attr)
- param = ISCSI_NET_PARAM_TCP_WSF;
- else if (attr == &dev_attr_iface_tcp_timer_scale.attr)
- param = ISCSI_NET_PARAM_TCP_TIMER_SCALE;
- else if (attr == &dev_attr_iface_tcp_timestamp_en.attr)
- param = ISCSI_NET_PARAM_TCP_TIMESTAMP_EN;
- else if (attr == &dev_attr_iface_cache_id.attr)
- param = ISCSI_NET_PARAM_CACHE_ID;
- else if (attr == &dev_attr_iface_redirect_en.attr)
- param = ISCSI_NET_PARAM_REDIRECT_EN;
else if (attr == &dev_attr_iface_def_taskmgmt_tmo.attr)
param = ISCSI_IFACE_PARAM_DEF_TASKMGMT_TMO;
else if (attr == &dev_attr_iface_header_digest.attr)
param = ISCSI_IFACE_PARAM_STRICT_LOGIN_COMP_EN;
else if (attr == &dev_attr_iface_initiator_name.attr)
param = ISCSI_IFACE_PARAM_INITIATOR_NAME;
+
+ if (param != -1)
+ return t->attr_is_visible(ISCSI_IFACE_PARAM, param);
+
+ if (attr == &dev_attr_iface_vlan_id.attr)
+ param = ISCSI_NET_PARAM_VLAN_ID;
+ else if (attr == &dev_attr_iface_vlan_priority.attr)
+ param = ISCSI_NET_PARAM_VLAN_PRIORITY;
+ else if (attr == &dev_attr_iface_vlan_enabled.attr)
+ param = ISCSI_NET_PARAM_VLAN_ENABLED;
+ else if (attr == &dev_attr_iface_mtu.attr)
+ param = ISCSI_NET_PARAM_MTU;
+ else if (attr == &dev_attr_iface_port.attr)
+ param = ISCSI_NET_PARAM_PORT;
+ else if (attr == &dev_attr_iface_ipaddress_state.attr)
+ param = ISCSI_NET_PARAM_IPADDR_STATE;
+ else if (attr == &dev_attr_iface_delayed_ack_en.attr)
+ param = ISCSI_NET_PARAM_DELAYED_ACK_EN;
+ else if (attr == &dev_attr_iface_tcp_nagle_disable.attr)
+ param = ISCSI_NET_PARAM_TCP_NAGLE_DISABLE;
+ else if (attr == &dev_attr_iface_tcp_wsf_disable.attr)
+ param = ISCSI_NET_PARAM_TCP_WSF_DISABLE;
+ else if (attr == &dev_attr_iface_tcp_wsf.attr)
+ param = ISCSI_NET_PARAM_TCP_WSF;
+ else if (attr == &dev_attr_iface_tcp_timer_scale.attr)
+ param = ISCSI_NET_PARAM_TCP_TIMER_SCALE;
+ else if (attr == &dev_attr_iface_tcp_timestamp_en.attr)
+ param = ISCSI_NET_PARAM_TCP_TIMESTAMP_EN;
+ else if (attr == &dev_attr_iface_cache_id.attr)
+ param = ISCSI_NET_PARAM_CACHE_ID;
+ else if (attr == &dev_attr_iface_redirect_en.attr)
+ param = ISCSI_NET_PARAM_REDIRECT_EN;
else if (iface->iface_type == ISCSI_IFACE_TYPE_IPV4) {
if (attr == &dev_attr_ipv4_iface_ipaddress.attr)
param = ISCSI_NET_PARAM_IPV4_ADDR;
return 0;
}
- switch (param) {
- case ISCSI_IFACE_PARAM_DEF_TASKMGMT_TMO:
- case ISCSI_IFACE_PARAM_HDRDGST_EN:
- case ISCSI_IFACE_PARAM_DATADGST_EN:
- case ISCSI_IFACE_PARAM_IMM_DATA_EN:
- case ISCSI_IFACE_PARAM_INITIAL_R2T_EN:
- case ISCSI_IFACE_PARAM_DATASEQ_INORDER_EN:
- case ISCSI_IFACE_PARAM_PDU_INORDER_EN:
- case ISCSI_IFACE_PARAM_ERL:
- case ISCSI_IFACE_PARAM_MAX_RECV_DLENGTH:
- case ISCSI_IFACE_PARAM_FIRST_BURST:
- case ISCSI_IFACE_PARAM_MAX_R2T:
- case ISCSI_IFACE_PARAM_MAX_BURST:
- case ISCSI_IFACE_PARAM_CHAP_AUTH_EN:
- case ISCSI_IFACE_PARAM_BIDI_CHAP_EN:
- case ISCSI_IFACE_PARAM_DISCOVERY_AUTH_OPTIONAL:
- case ISCSI_IFACE_PARAM_DISCOVERY_LOGOUT_EN:
- case ISCSI_IFACE_PARAM_STRICT_LOGIN_COMP_EN:
- case ISCSI_IFACE_PARAM_INITIATOR_NAME:
- param_type = ISCSI_IFACE_PARAM;
- break;
- default:
- param_type = ISCSI_NET_PARAM;
- }
-
- return t->attr_is_visible(param_type, param);
+ return t->attr_is_visible(ISCSI_NET_PARAM, param);
}
static struct attribute *iscsi_iface_attrs[] = {
else if (med->media_event_code == 2)
return DISK_EVENT_MEDIA_CHANGE;
else if (med->media_event_code == 3)
- return DISK_EVENT_EJECT_REQUEST;
+ return DISK_EVENT_MEDIA_CHANGE;
return 0;
}
static int cards_found;
int err;
- devlink = devlink_alloc(&qlge_devlink_ops, sizeof(struct qlge_adapter));
+ devlink = devlink_alloc(&qlge_devlink_ops, sizeof(struct qlge_adapter),
+ &pdev->dev);
if (!devlink)
return -ENOMEM;
goto netdev_free;
}
- err = devlink_register(devlink, &pdev->dev);
+ err = devlink_register(devlink);
if (err)
goto netdev_free;
#include "target_core_alua.h"
static sense_reason_t
-sbc_check_prot(struct se_device *, struct se_cmd *, unsigned char *, u32, bool);
+sbc_check_prot(struct se_device *, struct se_cmd *, unsigned char, u32, bool);
static sense_reason_t sbc_execute_unmap(struct se_cmd *cmd);
static sense_reason_t
}
static sense_reason_t
-sbc_setup_write_same(struct se_cmd *cmd, unsigned char *flags, struct sbc_ops *ops)
+sbc_setup_write_same(struct se_cmd *cmd, unsigned char flags, struct sbc_ops *ops)
{
struct se_device *dev = cmd->se_dev;
sector_t end_lba = dev->transport->get_blocks(dev) + 1;
unsigned int sectors = sbc_get_write_same_sectors(cmd);
sense_reason_t ret;
- if ((flags[0] & 0x04) || (flags[0] & 0x02)) {
+ if ((flags & 0x04) || (flags & 0x02)) {
pr_err("WRITE_SAME PBDATA and LBDATA"
" bits not supported for Block Discard"
" Emulation\n");
}
/* We always have ANC_SUP == 0 so setting ANCHOR is always an error */
- if (flags[0] & 0x10) {
+ if (flags & 0x10) {
pr_warn("WRITE SAME with ANCHOR not supported\n");
return TCM_INVALID_CDB_FIELD;
}
* Special case for WRITE_SAME w/ UNMAP=1 that ends up getting
* translated into block discard requests within backend code.
*/
- if (flags[0] & 0x08) {
+ if (flags & 0x08) {
if (!ops->execute_unmap)
return TCM_UNSUPPORTED_SCSI_OPCODE;
if (!ops->execute_write_same)
return TCM_UNSUPPORTED_SCSI_OPCODE;
- ret = sbc_check_prot(dev, cmd, &cmd->t_task_cdb[0], sectors, true);
+ ret = sbc_check_prot(dev, cmd, flags >> 5, sectors, true);
if (ret)
return ret;
}
static sense_reason_t
-sbc_check_prot(struct se_device *dev, struct se_cmd *cmd, unsigned char *cdb,
+sbc_check_prot(struct se_device *dev, struct se_cmd *cmd, unsigned char protect,
u32 sectors, bool is_write)
{
- u8 protect = cdb[1] >> 5;
int sp_ops = cmd->se_sess->sup_prot_ops;
int pi_prot_type = dev->dev_attrib.pi_prot_type;
bool fabric_prot = false;
fallthrough;
default:
pr_err("Unable to determine pi_prot_type for CDB: 0x%02x "
- "PROTECT: 0x%02x\n", cdb[0], protect);
+ "PROTECT: 0x%02x\n", cmd->t_task_cdb[0], protect);
return TCM_INVALID_CDB_FIELD;
}
if (sbc_check_dpofua(dev, cmd, cdb))
return TCM_INVALID_CDB_FIELD;
- ret = sbc_check_prot(dev, cmd, cdb, sectors, false);
+ ret = sbc_check_prot(dev, cmd, cdb[1] >> 5, sectors, false);
if (ret)
return ret;
if (sbc_check_dpofua(dev, cmd, cdb))
return TCM_INVALID_CDB_FIELD;
- ret = sbc_check_prot(dev, cmd, cdb, sectors, false);
+ ret = sbc_check_prot(dev, cmd, cdb[1] >> 5, sectors, false);
if (ret)
return ret;
if (sbc_check_dpofua(dev, cmd, cdb))
return TCM_INVALID_CDB_FIELD;
- ret = sbc_check_prot(dev, cmd, cdb, sectors, false);
+ ret = sbc_check_prot(dev, cmd, cdb[1] >> 5, sectors, false);
if (ret)
return ret;
if (sbc_check_dpofua(dev, cmd, cdb))
return TCM_INVALID_CDB_FIELD;
- ret = sbc_check_prot(dev, cmd, cdb, sectors, true);
+ ret = sbc_check_prot(dev, cmd, cdb[1] >> 5, sectors, true);
if (ret)
return ret;
if (sbc_check_dpofua(dev, cmd, cdb))
return TCM_INVALID_CDB_FIELD;
- ret = sbc_check_prot(dev, cmd, cdb, sectors, true);
+ ret = sbc_check_prot(dev, cmd, cdb[1] >> 5, sectors, true);
if (ret)
return ret;
if (sbc_check_dpofua(dev, cmd, cdb))
return TCM_INVALID_CDB_FIELD;
- ret = sbc_check_prot(dev, cmd, cdb, sectors, true);
+ ret = sbc_check_prot(dev, cmd, cdb[1] >> 5, sectors, true);
if (ret)
return ret;
size = sbc_get_size(cmd, 1);
cmd->t_task_lba = get_unaligned_be64(&cdb[12]);
- ret = sbc_setup_write_same(cmd, &cdb[10], ops);
+ ret = sbc_setup_write_same(cmd, cdb[10], ops);
if (ret)
return ret;
break;
size = sbc_get_size(cmd, 1);
cmd->t_task_lba = get_unaligned_be64(&cdb[2]);
- ret = sbc_setup_write_same(cmd, &cdb[1], ops);
+ ret = sbc_setup_write_same(cmd, cdb[1], ops);
if (ret)
return ret;
break;
* Follow sbcr26 with WRITE_SAME (10) and check for the existence
* of byte 1 bit 3 UNMAP instead of original reserved field
*/
- ret = sbc_setup_write_same(cmd, &cdb[1], ops);
+ ret = sbc_setup_write_same(cmd, cdb[1], ops);
if (ret)
return ret;
break;
INIT_WORK(&cmd->work, success ? target_complete_ok_work :
target_complete_failure_work);
- if (wwn->cmd_compl_affinity == SE_COMPL_AFFINITY_CPUID)
+ if (!wwn || wwn->cmd_compl_affinity == SE_COMPL_AFFINITY_CPUID)
cpu = cmd->cpuid;
else
cpu = wwn->cmd_compl_affinity;
};
/* --- WWAN framework integration --- */
-#ifdef CONFIG_WWAN
+#ifdef CONFIG_WWAN_CORE
static int wdm_wwan_port_start(struct wwan_port *port)
{
struct wdm_device *desc = wwan_port_get_drvdata(port);
/* inbuf has been copied, it is safe to check for outstanding data */
schedule_work(&desc->service_outs_intr);
}
-#else /* CONFIG_WWAN */
+#else /* CONFIG_WWAN_CORE */
static void wdm_wwan_init(struct wdm_device *desc) {}
static void wdm_wwan_deinit(struct wdm_device *desc) {}
static void wdm_wwan_rx(struct wdm_device *desc, int length) {}
-#endif /* CONFIG_WWAN */
+#endif /* CONFIG_WWAN_CORE */
/* --- error handling --- */
static void wdm_rxwork(struct work_struct *work)
"wIndex=%04x wLength=%04x\n",
ctrl->bRequestType, ctrl->bRequest, ctrl->wValue,
ctrl->wIndex, ctrl->wLength);
- if (ctrl->bRequestType & 0x80) {
+ if ((ctrl->bRequestType & USB_DIR_IN) && ctrl->wLength) {
pipe = usb_rcvctrlpipe(dev, 0);
snoop_urb(dev, NULL, pipe, ctrl->wLength, tmo, SUBMIT, NULL, 0);
#define USB_TP_TRANSMISSION_DELAY 40 /* ns */
#define USB_TP_TRANSMISSION_DELAY_MAX 65535 /* ns */
+#define USB_PING_RESPONSE_TIME 400 /* ns */
/* Protect struct usb_device->state and ->children members
* Note: Both are also protected by ->dev.sem, except that ->state can
}
/*
- * Set the Maximum Exit Latency (MEL) for the host to initiate a transition from
- * either U1 or U2.
+ * Set the Maximum Exit Latency (MEL) for the host to wakup up the path from
+ * U1/U2, send a PING to the device and receive a PING_RESPONSE.
+ * See USB 3.1 section C.1.5.2
*/
static void usb_set_lpm_mel(struct usb_device *udev,
struct usb3_lpm_parameters *udev_lpm_params,
unsigned int hub_exit_latency)
{
unsigned int total_mel;
- unsigned int device_mel;
- unsigned int hub_mel;
/*
- * Calculate the time it takes to transition all links from the roothub
- * to the parent hub into U0. The parent hub must then decode the
- * packet (hub header decode latency) to figure out which port it was
- * bound for.
- *
- * The Hub Header decode latency is expressed in 0.1us intervals (0x1
- * means 0.1us). Multiply that by 100 to get nanoseconds.
+ * tMEL1. time to transition path from host to device into U0.
+ * MEL for parent already contains the delay up to parent, so only add
+ * the exit latency for the last link (pick the slower exit latency),
+ * and the hub header decode latency. See USB 3.1 section C 2.2.1
+ * Store MEL in nanoseconds
*/
total_mel = hub_lpm_params->mel +
- (hub->descriptor->u.ss.bHubHdrDecLat * 100);
+ max(udev_exit_latency, hub_exit_latency) * 1000 +
+ hub->descriptor->u.ss.bHubHdrDecLat * 100;
/*
- * How long will it take to transition the downstream hub's port into
- * U0? The greater of either the hub exit latency or the device exit
- * latency.
- *
- * The BOS U1/U2 exit latencies are expressed in 1us intervals.
- * Multiply that by 1000 to get nanoseconds.
+ * tMEL2. Time to submit PING packet. Sum of tTPTransmissionDelay for
+ * each link + wHubDelay for each hub. Add only for last link.
+ * tMEL4, the time for PING_RESPONSE to traverse upstream is similar.
+ * Multiply by 2 to include it as well.
*/
- device_mel = udev_exit_latency * 1000;
- hub_mel = hub_exit_latency * 1000;
- if (device_mel > hub_mel)
- total_mel += device_mel;
- else
- total_mel += hub_mel;
+ total_mel += (__le16_to_cpu(hub->descriptor->u.ss.wHubDelay) +
+ USB_TP_TRANSMISSION_DELAY) * 2;
+
+ /*
+ * tMEL3, tPingResponse. Time taken by device to generate PING_RESPONSE
+ * after receiving PING. Also add 2100ns as stated in USB 3.1 C 1.5.2.4
+ * to cover the delay if the PING_RESPONSE is queued behind a Max Packet
+ * Size DP.
+ * Note these delays should be added only once for the entire path, so
+ * add them to the MEL of the device connected to the roothub.
+ */
+ if (!hub->hdev->parent)
+ total_mel += USB_PING_RESPONSE_TIME + 2100;
udev_lpm_params->mel = total_mel;
}
return 0;
}
+/*
+ * Don't allow device intiated U1/U2 if the system exit latency + one bus
+ * interval is greater than the minimum service interval of any active
+ * periodic endpoint. See USB 3.2 section 9.4.9
+ */
+static bool usb_device_may_initiate_lpm(struct usb_device *udev,
+ enum usb3_link_state state)
+{
+ unsigned int sel; /* us */
+ int i, j;
+
+ if (state == USB3_LPM_U1)
+ sel = DIV_ROUND_UP(udev->u1_params.sel, 1000);
+ else if (state == USB3_LPM_U2)
+ sel = DIV_ROUND_UP(udev->u2_params.sel, 1000);
+ else
+ return false;
+
+ for (i = 0; i < udev->actconfig->desc.bNumInterfaces; i++) {
+ struct usb_interface *intf;
+ struct usb_endpoint_descriptor *desc;
+ unsigned int interval;
+
+ intf = udev->actconfig->interface[i];
+ if (!intf)
+ continue;
+
+ for (j = 0; j < intf->cur_altsetting->desc.bNumEndpoints; j++) {
+ desc = &intf->cur_altsetting->endpoint[j].desc;
+
+ if (usb_endpoint_xfer_int(desc) ||
+ usb_endpoint_xfer_isoc(desc)) {
+ interval = (1 << (desc->bInterval - 1)) * 125;
+ if (sel + 125 > interval)
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
/*
* Enable the hub-initiated U1/U2 idle timeouts, and enable device-initiated
* U1/U2 entry.
* U1/U2_ENABLE
*/
if (udev->actconfig &&
- usb_set_device_initiated_lpm(udev, state, true) == 0) {
- if (state == USB3_LPM_U1)
- udev->usb3_lpm_u1_enabled = 1;
- else if (state == USB3_LPM_U2)
- udev->usb3_lpm_u2_enabled = 1;
- } else {
- /* Don't request U1/U2 entry if the device
- * cannot transition to U1/U2.
- */
- usb_set_lpm_timeout(udev, state, 0);
- hcd->driver->disable_usb3_lpm_timeout(hcd, udev, state);
+ usb_device_may_initiate_lpm(udev, state)) {
+ if (usb_set_device_initiated_lpm(udev, state, true)) {
+ /*
+ * Request to enable device initiated U1/U2 failed,
+ * better to turn off lpm in this case.
+ */
+ usb_set_lpm_timeout(udev, state, 0);
+ hcd->driver->disable_usb3_lpm_timeout(hcd, udev, state);
+ return;
+ }
}
-}
+ if (state == USB3_LPM_U1)
+ udev->usb3_lpm_u1_enabled = 1;
+ else if (state == USB3_LPM_U2)
+ udev->usb3_lpm_u2_enabled = 1;
+}
/*
* Disable the hub-initiated U1/U2 idle timeouts, and disable device-initiated
* U1/U2 entry.
/* DJI CineSSD */
{ USB_DEVICE(0x2ca3, 0x0031), .driver_info = USB_QUIRK_NO_LPM },
- /* Fibocom L850-GL LTE Modem */
- { USB_DEVICE(0x2cb7, 0x0007), .driver_info =
- USB_QUIRK_IGNORE_REMOTE_WAKEUP },
-
/* INTEL VALUE SSD */
{ USB_DEVICE(0x8086, 0xf1a5), .driver_info = USB_QUIRK_RESET_RESUME },
* 0 - No (default)
* 1 - Partial power down
* 2 - Hibernation
+ * @no_clock_gating: Specifies whether to avoid clock gating feature.
+ * 0 - No (use clock gating)
+ * 1 - Yes (avoid it)
* @lpm: Enable LPM support.
* 0 - No
* 1 - Yes
#define DWC2_POWER_DOWN_PARAM_NONE 0
#define DWC2_POWER_DOWN_PARAM_PARTIAL 1
#define DWC2_POWER_DOWN_PARAM_HIBERNATION 2
+ bool no_clock_gating;
bool lpm;
bool lpm_clock_gating;
* If neither hibernation nor partial power down are supported,
* clock gating is used to save power.
*/
- dwc2_gadget_enter_clock_gating(hsotg);
+ if (!hsotg->params.no_clock_gating)
+ dwc2_gadget_enter_clock_gating(hsotg);
}
/*
return;
}
- /* Zlp for all endpoints, for ep0 only in DATA IN stage */
+ /* Zlp for all endpoints in non DDMA, for ep0 only in DATA IN stage */
if (hs_ep->send_zlp) {
- dwc2_hsotg_program_zlp(hsotg, hs_ep);
hs_ep->send_zlp = 0;
- /* transfer will be completed on next complete interrupt */
- return;
+ if (!using_desc_dma(hsotg)) {
+ dwc2_hsotg_program_zlp(hsotg, hs_ep);
+ /* transfer will be completed on next complete interrupt */
+ return;
+ }
}
if (hs_ep->index == 0 && hsotg->ep0_state == DWC2_EP0_DATA_IN) {
__func__);
}
} else {
+ /* Mask GINTSTS_GOUTNAKEFF interrupt */
+ dwc2_hsotg_disable_gsint(hsotg, GINTSTS_GOUTNAKEFF);
+
if (!(dwc2_readl(hsotg, GINTSTS) & GINTSTS_GOUTNAKEFF))
dwc2_set_bit(hsotg, DCTL, DCTL_SGOUTNAK);
+ if (!using_dma(hsotg)) {
+ /* Wait for GINTSTS_RXFLVL interrupt */
+ if (dwc2_hsotg_wait_bit_set(hsotg, GINTSTS,
+ GINTSTS_RXFLVL, 100)) {
+ dev_warn(hsotg->dev, "%s: timeout GINTSTS.RXFLVL\n",
+ __func__);
+ } else {
+ /*
+ * Pop GLOBAL OUT NAK status packet from RxFIFO
+ * to assert GOUTNAKEFF interrupt
+ */
+ dwc2_readl(hsotg, GRXSTSP);
+ }
+ }
+
/* Wait for global nak to take effect */
if (dwc2_hsotg_wait_bit_set(hsotg, GINTSTS,
GINTSTS_GOUTNAKEFF, 100))
epctl = dwc2_readl(hs, epreg);
if (value) {
+ /* Unmask GOUTNAKEFF interrupt */
+ dwc2_hsotg_en_gsint(hs, GINTSTS_GOUTNAKEFF);
+
if (!(dwc2_readl(hs, GINTSTS) & GINTSTS_GOUTNAKEFF))
dwc2_set_bit(hs, DCTL, DCTL_SGOUTNAK);
// STALL bit will be set in GOUTNAKEFF interrupt handler
* If not hibernation nor partial power down are supported,
* clock gating is used to save power.
*/
- dwc2_host_enter_clock_gating(hsotg);
+ if (!hsotg->params.no_clock_gating)
+ dwc2_host_enter_clock_gating(hsotg);
break;
}
* If not hibernation nor partial power down are supported,
* clock gating is used to save power.
*/
- dwc2_host_enter_clock_gating(hsotg);
+ if (!hsotg->params.no_clock_gating)
+ dwc2_host_enter_clock_gating(hsotg);
/* After entering suspend, hardware is not accessible */
clear_bit(HCD_FLAG_HW_ACCESSIBLE, &hcd->flags);
struct dwc2_core_params *p = &hsotg->params;
p->power_down = DWC2_POWER_DOWN_PARAM_NONE;
+ p->no_clock_gating = true;
p->phy_utmi_width = 8;
}
unsigned dis_metastability_quirk:1;
unsigned dis_split_quirk:1;
+ unsigned async_callbacks:1;
u16 imod_interval;
};
static int dwc3_ep0_delegate_req(struct dwc3 *dwc, struct usb_ctrlrequest *ctrl)
{
- int ret;
+ int ret = -EINVAL;
- spin_unlock(&dwc->lock);
- ret = dwc->gadget_driver->setup(dwc->gadget, ctrl);
- spin_lock(&dwc->lock);
+ if (dwc->async_callbacks) {
+ spin_unlock(&dwc->lock);
+ ret = dwc->gadget_driver->setup(dwc->gadget, ctrl);
+ spin_lock(&dwc->lock);
+ }
return ret;
}
return ret;
}
+static void dwc3_gadget_async_callbacks(struct usb_gadget *g, bool enable)
+{
+ struct dwc3 *dwc = gadget_to_dwc(g);
+ unsigned long flags;
+
+ spin_lock_irqsave(&dwc->lock, flags);
+ dwc->async_callbacks = enable;
+ spin_unlock_irqrestore(&dwc->lock, flags);
+}
+
static const struct usb_gadget_ops dwc3_gadget_ops = {
.get_frame = dwc3_gadget_get_frame,
.wakeup = dwc3_gadget_wakeup,
.udc_set_ssp_rate = dwc3_gadget_set_ssp_rate,
.get_config_params = dwc3_gadget_config_params,
.vbus_draw = dwc3_gadget_vbus_draw,
+ .udc_async_callbacks = dwc3_gadget_async_callbacks,
};
/* -------------------------------------------------------------------------- */
static void dwc3_disconnect_gadget(struct dwc3 *dwc)
{
- if (dwc->gadget_driver && dwc->gadget_driver->disconnect) {
+ if (dwc->async_callbacks && dwc->gadget_driver->disconnect) {
spin_unlock(&dwc->lock);
dwc->gadget_driver->disconnect(dwc->gadget);
spin_lock(&dwc->lock);
static void dwc3_suspend_gadget(struct dwc3 *dwc)
{
- if (dwc->gadget_driver && dwc->gadget_driver->suspend) {
+ if (dwc->async_callbacks && dwc->gadget_driver->suspend) {
spin_unlock(&dwc->lock);
dwc->gadget_driver->suspend(dwc->gadget);
spin_lock(&dwc->lock);
static void dwc3_resume_gadget(struct dwc3 *dwc)
{
- if (dwc->gadget_driver && dwc->gadget_driver->resume) {
+ if (dwc->async_callbacks && dwc->gadget_driver->resume) {
spin_unlock(&dwc->lock);
dwc->gadget_driver->resume(dwc->gadget);
spin_lock(&dwc->lock);
if (!dwc->gadget_driver)
return;
- if (dwc->gadget->speed != USB_SPEED_UNKNOWN) {
+ if (dwc->async_callbacks && dwc->gadget->speed != USB_SPEED_UNKNOWN) {
spin_unlock(&dwc->lock);
usb_gadget_udc_reset(dwc->gadget, dwc->gadget_driver);
spin_lock(&dwc->lock);
* implemented.
*/
- if (dwc->gadget_driver && dwc->gadget_driver->resume) {
+ if (dwc->async_callbacks && dwc->gadget_driver->resume) {
spin_unlock(&dwc->lock);
dwc->gadget_driver->resume(dwc->gadget);
spin_lock(&dwc->lock);
struct gs_port *port;
mutex_lock(&ports[port_num].lock);
- if (WARN_ON(!ports[port_num].port)) {
+ if (!ports[port_num].port) {
mutex_unlock(&ports[port_num].lock);
return;
}
return 0;
free_eps:
+ pm_runtime_disable(&pdev->dev);
tegra_xudc_free_eps(xudc);
free_event_ring:
tegra_xudc_free_event_ring(xudc);
static irqreturn_t ehci_irq (struct usb_hcd *hcd)
{
struct ehci_hcd *ehci = hcd_to_ehci (hcd);
- u32 status, masked_status, pcd_status = 0, cmd;
+ u32 status, current_status, masked_status, pcd_status = 0;
+ u32 cmd;
int bh;
spin_lock(&ehci->lock);
- status = ehci_readl(ehci, &ehci->regs->status);
+ status = 0;
+ current_status = ehci_readl(ehci, &ehci->regs->status);
+restart:
/* e.g. cardbus physical eject */
- if (status == ~(u32) 0) {
+ if (current_status == ~(u32) 0) {
ehci_dbg (ehci, "device removed\n");
goto dead;
}
+ status |= current_status;
/*
* We don't use STS_FLR, but some controllers don't like it to
* remain on, so mask it out along with the other status bits.
*/
- masked_status = status & (INTR_MASK | STS_FLR);
+ masked_status = current_status & (INTR_MASK | STS_FLR);
/* Shared IRQ? */
if (!masked_status || unlikely(ehci->rh_state == EHCI_RH_HALTED)) {
/* clear (just) interrupts */
ehci_writel(ehci, masked_status, &ehci->regs->status);
+
+ /* For edge interrupts, don't race with an interrupt bit being raised */
+ current_status = ehci_readl(ehci, &ehci->regs->status);
+ if (current_status & INTR_MASK)
+ goto restart;
+
cmd = ehci_readl(ehci, &ehci->regs->command);
bh = 0;
*/
struct urb *curr_urb;
enum scheduling_pass sched_pass;
- struct usb_device *loaded_dev; /* dev that's loaded into the chip */
- int loaded_epnum; /* epnum whose toggles are loaded */
int urb_done; /* > 0 -> no errors, < 0: errno */
size_t curr_len;
u8 hien;
* Caller must NOT hold HCD spinlock.
*/
static void
-max3421_set_address(struct usb_hcd *hcd, struct usb_device *dev, int epnum,
- int force_toggles)
+max3421_set_address(struct usb_hcd *hcd, struct usb_device *dev, int epnum)
{
- struct max3421_hcd *max3421_hcd = hcd_to_max3421(hcd);
- int old_epnum, same_ep, rcvtog, sndtog;
- struct usb_device *old_dev;
+ int rcvtog, sndtog;
u8 hctl;
- old_dev = max3421_hcd->loaded_dev;
- old_epnum = max3421_hcd->loaded_epnum;
-
- same_ep = (dev == old_dev && epnum == old_epnum);
- if (same_ep && !force_toggles)
- return;
-
- if (old_dev && !same_ep) {
- /* save the old end-points toggles: */
- u8 hrsl = spi_rd8(hcd, MAX3421_REG_HRSL);
-
- rcvtog = (hrsl >> MAX3421_HRSL_RCVTOGRD_BIT) & 1;
- sndtog = (hrsl >> MAX3421_HRSL_SNDTOGRD_BIT) & 1;
-
- /* no locking: HCD (i.e., we) own toggles, don't we? */
- usb_settoggle(old_dev, old_epnum, 0, rcvtog);
- usb_settoggle(old_dev, old_epnum, 1, sndtog);
- }
/* setup new endpoint's toggle bits: */
rcvtog = usb_gettoggle(dev, epnum, 0);
sndtog = usb_gettoggle(dev, epnum, 1);
hctl = (BIT(rcvtog + MAX3421_HCTL_RCVTOG0_BIT) |
BIT(sndtog + MAX3421_HCTL_SNDTOG0_BIT));
- max3421_hcd->loaded_epnum = epnum;
spi_wr8(hcd, MAX3421_REG_HCTL, hctl);
/*
* address-assignment so it's best to just always load the
* address whenever the end-point changed/was forced.
*/
- max3421_hcd->loaded_dev = dev;
spi_wr8(hcd, MAX3421_REG_PERADDR, dev->devnum);
}
struct max3421_hcd *max3421_hcd = hcd_to_max3421(hcd);
struct urb *urb, *curr_urb = NULL;
struct max3421_ep *max3421_ep;
- int epnum, force_toggles = 0;
+ int epnum;
struct usb_host_endpoint *ep;
struct list_head *pos;
unsigned long flags;
usb_settoggle(urb->dev, epnum, 0, 1);
usb_settoggle(urb->dev, epnum, 1, 1);
max3421_ep->pkt_state = PKT_STATE_SETUP;
- force_toggles = 1;
} else
max3421_ep->pkt_state = PKT_STATE_TRANSFER;
}
spin_unlock_irqrestore(&max3421_hcd->lock, flags);
max3421_ep->last_active = max3421_hcd->frame_number;
- max3421_set_address(hcd, urb->dev, epnum, force_toggles);
+ max3421_set_address(hcd, urb->dev, epnum);
max3421_set_speed(hcd, urb->dev);
max3421_next_transfer(hcd, 0);
return 1;
status = 0;
urb = max3421_hcd->curr_urb;
if (urb) {
+ /* save the old end-points toggles: */
+ u8 hrsl = spi_rd8(hcd, MAX3421_REG_HRSL);
+ int rcvtog = (hrsl >> MAX3421_HRSL_RCVTOGRD_BIT) & 1;
+ int sndtog = (hrsl >> MAX3421_HRSL_SNDTOGRD_BIT) & 1;
+ int epnum = usb_endpoint_num(&urb->ep->desc);
+
+ /* no locking: HCD (i.e., we) own toggles, don't we? */
+ usb_settoggle(urb->dev, epnum, 0, rcvtog);
+ usb_settoggle(urb->dev, epnum, 1, sndtog);
+
max3421_hcd->curr_urb = NULL;
spin_lock_irqsave(&max3421_hcd->lock, flags);
usb_hcd_unlink_urb_from_ep(hcd, urb);
* Inform the usbcore about resume-in-progress by returning
* a non-zero value even if there are no status changes.
*/
+ spin_lock_irqsave(&xhci->lock, flags);
+
status = bus_state->resuming_ports;
mask = PORT_CSC | PORT_PEC | PORT_OCC | PORT_PLC | PORT_WRC | PORT_CEC;
- spin_lock_irqsave(&xhci->lock, flags);
/* For each port, did anything change? If so, set that bit in buf. */
for (i = 0; i < max_ports; i++) {
temp = readl(ports[i]->addr);
return 0;
case RENESAS_ROM_STATUS_NO_RESULT: /* No result yet */
- dev_dbg(&pdev->dev, "Unknown ROM status ...\n");
- break;
+ return 0;
case RENESAS_ROM_STATUS_ERROR: /* Error State */
default: /* All other states are marked as "Reserved states" */
u8 fw_state;
int err;
- /*
- * Only if device has ROM and loaded FW we can skip loading and
- * return success. Otherwise (even unknown state), attempt to load FW.
- */
- if (renesas_check_rom(pdev) && !renesas_check_rom_state(pdev))
- return 0;
+ /* Check if device has ROM and loaded, if so skip everything */
+ err = renesas_check_rom(pdev);
+ if (err) { /* we have rom */
+ err = renesas_check_rom_state(pdev);
+ if (!err)
+ return err;
+ }
/*
* Test if the device is actually needing the firmware. As most
{ /* end: all zeroes */ }
};
MODULE_DEVICE_TABLE(pci, pci_ids);
+
+/*
+ * Without CONFIG_USB_XHCI_PCI_RENESAS renesas_xhci_check_request_fw() won't
+ * load firmware, so don't encumber the xhci-pci driver with it.
+ */
+#if IS_ENABLED(CONFIG_USB_XHCI_PCI_RENESAS)
MODULE_FIRMWARE("renesas_usb_fw.mem");
+#endif
/* pci driver glue; this is a "new style" PCI driver module */
static struct pci_driver xhci_pci_driver = {
list_for_each_entry(usb_phy, &phy_list, head) {
if (usb_phy->dev == dev)
- break;
+ return usb_phy;
}
- return usb_phy;
+ return NULL;
}
static void usb_phy_set_default_current(struct usb_phy *usb_phy)
struct usb_phy *usb_phy;
char uchger_state[50] = { 0 };
char uchger_type[50] = { 0 };
+ unsigned long flags;
+ spin_lock_irqsave(&phy_lock, flags);
usb_phy = __device_to_usb_phy(dev);
+ spin_unlock_irqrestore(&phy_lock, flags);
+
+ if (!usb_phy)
+ return -ENODEV;
snprintf(uchger_state, ARRAY_SIZE(uchger_state),
"USB_CHARGER_STATE=%s", usb_chger_state[usb_phy->chg_state]);
#define usbhsf_dma_map(p) __usbhsf_dma_map_ctrl(p, 1)
#define usbhsf_dma_unmap(p) __usbhsf_dma_map_ctrl(p, 0)
static int __usbhsf_dma_map_ctrl(struct usbhs_pkt *pkt, int map);
+static void usbhsf_tx_irq_ctrl(struct usbhs_pipe *pipe, int enable);
+static void usbhsf_rx_irq_ctrl(struct usbhs_pipe *pipe, int enable);
struct usbhs_pkt *usbhs_pkt_pop(struct usbhs_pipe *pipe, struct usbhs_pkt *pkt)
{
struct usbhs_priv *priv = usbhs_pipe_to_priv(pipe);
if (chan) {
dmaengine_terminate_all(chan);
usbhsf_dma_unmap(pkt);
+ } else {
+ if (usbhs_pipe_is_dir_in(pipe))
+ usbhsf_rx_irq_ctrl(pipe, 0);
+ else
+ usbhsf_tx_irq_ctrl(pipe, 0);
}
usbhs_pipe_clear_without_sequence(pipe, 0, 0);
{ USB_DEVICE(0x10C4, 0x89A4) }, /* CESINEL FTBC Flexible Thyristor Bridge Controller */
{ USB_DEVICE(0x10C4, 0x89FB) }, /* Qivicon ZigBee USB Radio Stick */
{ USB_DEVICE(0x10C4, 0x8A2A) }, /* HubZ dual ZigBee and Z-Wave dongle */
+ { USB_DEVICE(0x10C4, 0x8A5B) }, /* CEL EM3588 ZigBee USB Stick */
{ USB_DEVICE(0x10C4, 0x8A5E) }, /* CEL EM3588 ZigBee USB Stick Long Range */
{ USB_DEVICE(0x10C4, 0x8B34) }, /* Qivicon ZigBee USB Radio Stick */
{ USB_DEVICE(0x10C4, 0xEA60) }, /* Silicon Labs factory default */
{ USB_DEVICE(0x1901, 0x0194) }, /* GE Healthcare Remote Alarm Box */
{ USB_DEVICE(0x1901, 0x0195) }, /* GE B850/B650/B450 CP2104 DP UART interface */
{ USB_DEVICE(0x1901, 0x0196) }, /* GE B850 CP2105 DP UART interface */
- { USB_DEVICE(0x1901, 0x0197) }, /* GE CS1000 Display serial interface */
- { USB_DEVICE(0x1901, 0x0198) }, /* GE CS1000 M.2 Key E serial interface */
+ { USB_DEVICE(0x1901, 0x0197) }, /* GE CS1000 M.2 Key E serial interface */
+ { USB_DEVICE(0x1901, 0x0198) }, /* GE CS1000 Display serial interface */
{ USB_DEVICE(0x199B, 0xBA30) }, /* LORD WSDA-200-USB */
{ USB_DEVICE(0x19CF, 0x3000) }, /* Parrot NMEA GPS Flight Recorder */
{ USB_DEVICE(0x1ADB, 0x0001) }, /* Schweitzer Engineering C662 Cable */
#define QUECTEL_PRODUCT_UC15 0x9090
/* These u-blox products use Qualcomm's vendor ID */
#define UBLOX_PRODUCT_R410M 0x90b2
+#define UBLOX_PRODUCT_R6XX 0x90fa
/* These Yuga products use Qualcomm's vendor ID */
#define YUGA_PRODUCT_CLM920_NC5 0x9625
/* u-blox products using Qualcomm vendor ID */
{ USB_DEVICE(QUALCOMM_VENDOR_ID, UBLOX_PRODUCT_R410M),
.driver_info = RSVD(1) | RSVD(3) },
+ { USB_DEVICE(QUALCOMM_VENDOR_ID, UBLOX_PRODUCT_R6XX),
+ .driver_info = RSVD(3) },
/* Quectel products using Quectel vendor ID */
{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC21, 0xff, 0xff, 0xff),
.driver_info = NUMEP2 },
USB_SC_DEVICE, USB_PR_DEVICE, NULL,
US_FL_NO_REPORT_OPCODES | US_FL_NO_SAME),
+/* Reported-by: Julian Sikorski <belegdol@gmail.com> */
+UNUSUAL_DEV(0x059f, 0x1061, 0x0000, 0x9999,
+ "LaCie",
+ "Rugged USB3-FW",
+ USB_SC_DEVICE, USB_PR_DEVICE, NULL,
+ US_FL_IGNORE_UAS),
+
/*
* Apricorn USB3 dongle sometimes returns "USBSUSBSUSBS" in response to SCSI
* commands in UAS mode. Observed with the 1.28 firmware; are there others?
if (!fwnode)
return -ENODEV;
+ /*
+ * This fwnode has a "compatible" property, but is never populated as a
+ * struct device. Instead we simply parse it to read the properties.
+ * This it breaks fw_devlink=on. To maintain backward compatibility
+ * with existing DT files, we work around this by deleting any
+ * fwnode_links to/from this fwnode.
+ */
+ fw_devlink_purge_absent_suppliers(fwnode);
+
/*
* When both VDD and VSYS power supplies are present, the low power
* supply VSYS is selected when VSYS voltage is above 3.1 V.
typec_set_pwr_opmode(chip->port, chip->pwr_opmode);
if (client->irq) {
- ret = stusb160x_irq_init(chip, client->irq);
- if (ret)
- goto port_unregister;
-
chip->role_sw = fwnode_usb_role_switch_get(fwnode);
if (IS_ERR(chip->role_sw)) {
ret = PTR_ERR(chip->role_sw);
ret);
goto port_unregister;
}
+
+ ret = stusb160x_irq_init(chip, client->irq);
+ if (ret)
+ goto role_sw_put;
} else {
/*
* If Source or Dual power role, need to enable VDD supply
return 0;
+role_sw_put:
+ if (chip->role_sw)
+ usb_role_switch_put(chip->role_sw);
port_unregister:
typec_unregister_port(chip->port);
all_reg_disable:
if (!fwnode)
return -ENODEV;
+ /*
+ * This fwnode has a "compatible" property, but is never populated as a
+ * struct device. Instead we simply parse it to read the properties.
+ * This breaks fw_devlink=on. To maintain backward compatibility
+ * with existing DT files, we work around this by deleting any
+ * fwnode_links to/from this fwnode.
+ */
+ fw_devlink_purge_absent_suppliers(fwnode);
+
tps->role_sw = fwnode_usb_role_switch_get(fwnode);
if (IS_ERR(tps->role_sw)) {
ret = PTR_ERR(tps->role_sw);
cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
MLX5_SET(cqc, cqc, log_cq_size, ilog2(num_ent));
MLX5_SET(cqc, cqc, uar_page, ndev->mvdev.res.uar->index);
- MLX5_SET(cqc, cqc, c_eqn, eqn);
+ MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
MLX5_SET64(cqc, cqc, dbr_addr, vcq->db.dma);
err = mlx5_core_create_cq(mdev, &vcq->mcq, in, inlen, out, sizeof(out));
with v4 shared libraries freely available from Compaq. If you're
going to use shared libraries from Tru64 version 5.0 or later, say N.
-config BINFMT_EM86
- tristate "Kernel support for Linux/Intel ELF binaries"
- depends on ALPHA
- help
- Say Y here if you want to be able to execute Linux/Intel ELF
- binaries just like native Alpha binaries on your Alpha machine. For
- this to work, you need to have the emulator /usr/bin/em86 in place.
-
- You can get the same functionality by saying N here and saying Y to
- "Kernel support for MISC binaries".
-
- You may answer M to compile the emulation support as a module and
- later load the module when you want to use a Linux/Intel binary. The
- module will be called binfmt_em86. If unsure, say Y.
-
config BINFMT_MISC
tristate "Kernel support for MISC binaries"
help
obj-$(CONFIG_FS_VERITY) += verity/
obj-$(CONFIG_FILE_LOCKING) += locks.o
obj-$(CONFIG_BINFMT_AOUT) += binfmt_aout.o
-obj-$(CONFIG_BINFMT_EM86) += binfmt_em86.o
obj-$(CONFIG_BINFMT_MISC) += binfmt_misc.o
obj-$(CONFIG_BINFMT_SCRIPT) += binfmt_script.o
obj-$(CONFIG_BINFMT_ELF) += binfmt_elf.o
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * linux/fs/binfmt_em86.c
- *
- * Based on linux/fs/binfmt_script.c
- * Copyright (C) 1996 Martin von Löwis
- * original #!-checking implemented by tytso.
- *
- * em86 changes Copyright (C) 1997 Jim Paradis
- */
-
-#include <linux/module.h>
-#include <linux/string.h>
-#include <linux/stat.h>
-#include <linux/binfmts.h>
-#include <linux/elf.h>
-#include <linux/init.h>
-#include <linux/fs.h>
-#include <linux/file.h>
-#include <linux/errno.h>
-
-
-#define EM86_INTERP "/usr/bin/em86"
-#define EM86_I_NAME "em86"
-
-static int load_em86(struct linux_binprm *bprm)
-{
- const char *i_name, *i_arg;
- char *interp;
- struct file * file;
- int retval;
- struct elfhdr elf_ex;
-
- /* Make sure this is a Linux/Intel ELF executable... */
- elf_ex = *((struct elfhdr *)bprm->buf);
-
- if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
- return -ENOEXEC;
-
- /* First of all, some simple consistency checks */
- if ((elf_ex.e_type != ET_EXEC && elf_ex.e_type != ET_DYN) ||
- (!((elf_ex.e_machine == EM_386) || (elf_ex.e_machine == EM_486))) ||
- !bprm->file->f_op->mmap) {
- return -ENOEXEC;
- }
-
- /* Need to be able to load the file after exec */
- if (bprm->interp_flags & BINPRM_FLAGS_PATH_INACCESSIBLE)
- return -ENOENT;
-
- /* Unlike in the script case, we don't have to do any hairy
- * parsing to find our interpreter... it's hardcoded!
- */
- interp = EM86_INTERP;
- i_name = EM86_I_NAME;
- i_arg = NULL; /* We reserve the right to add an arg later */
-
- /*
- * Splice in (1) the interpreter's name for argv[0]
- * (2) (optional) argument to interpreter
- * (3) filename of emulated file (replace argv[0])
- *
- * This is done in reverse order, because of how the
- * user environment and arguments are stored.
- */
- remove_arg_zero(bprm);
- retval = copy_string_kernel(bprm->filename, bprm);
- if (retval < 0) return retval;
- bprm->argc++;
- if (i_arg) {
- retval = copy_string_kernel(i_arg, bprm);
- if (retval < 0) return retval;
- bprm->argc++;
- }
- retval = copy_string_kernel(i_name, bprm);
- if (retval < 0) return retval;
- bprm->argc++;
-
- /*
- * OK, now restart the process with the interpreter's inode.
- * Note that we use open_exec() as the name is now in kernel
- * space, and we don't need to copy it.
- */
- file = open_exec(interp);
- if (IS_ERR(file))
- return PTR_ERR(file);
-
- bprm->interpreter = file;
- return 0;
-}
-
-static struct linux_binfmt em86_format = {
- .module = THIS_MODULE,
- .load_binary = load_em86,
-};
-
-static int __init init_em86_binfmt(void)
-{
- register_binfmt(&em86_format);
- return 0;
-}
-
-static void __exit exit_em86_binfmt(void)
-{
- unregister_binfmt(&em86_format);
-}
-
-core_initcall(init_em86_binfmt);
-module_exit(exit_em86_binfmt);
-MODULE_LICENSE("GPL");
free_percpu(bdev->bd_stats);
kfree(bdev->bd_meta_info);
+ if (!bdev_is_partition(bdev))
+ kfree(bdev->bd_disk);
kmem_cache_free(bdev_cachep, BDEV_I(inode));
}
int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr,
u64 time_seq, struct ulist **roots,
- bool ignore_offset)
+ bool ignore_offset, bool skip_commit_root_sem)
{
int ret;
- if (!trans)
+ if (!trans && !skip_commit_root_sem)
down_read(&fs_info->commit_root_sem);
ret = btrfs_find_all_roots_safe(trans, fs_info, bytenr,
time_seq, roots, ignore_offset);
- if (!trans)
+ if (!trans && !skip_commit_root_sem)
up_read(&fs_info->commit_root_sem);
return ret;
}
const u64 *extent_item_pos, bool ignore_offset);
int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr,
- u64 time_seq, struct ulist **roots, bool ignore_offset);
+ u64 time_seq, struct ulist **roots, bool ignore_offset,
+ bool skip_commit_root_sem);
char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
u32 name_len, unsigned long name_off,
struct extent_buffer *eb_in, u64 parent,
btrfs_record_physical_zoned(inode, cb->start, bio);
btrfs_writepage_endio_finish_ordered(BTRFS_I(inode), NULL,
cb->start, cb->start + cb->len - 1,
- bio->bi_status == BLK_STS_OK);
+ !cb->errors);
end_compressed_writeback(inode, cb);
/* note, our inode could be gone now */
kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
if (qrecord_inserted)
- btrfs_qgroup_trace_extent_post(fs_info, record);
+ btrfs_qgroup_trace_extent_post(trans, record);
return 0;
}
if (qrecord_inserted)
- return btrfs_qgroup_trace_extent_post(fs_info, record);
+ return btrfs_qgroup_trace_extent_post(trans, record);
return 0;
}
static void csum_tree_block(struct extent_buffer *buf, u8 *result)
{
struct btrfs_fs_info *fs_info = buf->fs_info;
- const int num_pages = fs_info->nodesize >> PAGE_SHIFT;
+ const int num_pages = num_extent_pages(buf);
const int first_page_part = min_t(u32, PAGE_SIZE, fs_info->nodesize);
SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
char *kaddr;
mutex_lock(&fs_info->fs_devices->device_list_mutex);
devices = &fs_info->fs_devices->devices;
list_for_each_entry(device, devices, dev_list) {
+ if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))
+ continue;
+
ret = btrfs_trim_free_extents(device, &group_trimmed);
if (ret) {
dev_failed++;
goto out;
}
- if (ordered_extent->disk)
+ if (ordered_extent->bdev)
btrfs_rewrite_logical_zoned(ordered_extent);
btrfs_free_io_failure_record(inode, start, end);
entry->truncated_len = (u64)-1;
entry->qgroup_rsv = ret;
entry->physical = (u64)-1;
- entry->disk = NULL;
- entry->partno = (u8)-1;
ASSERT(type == BTRFS_ORDERED_REGULAR ||
type == BTRFS_ORDERED_NOCOW ||
* command in a workqueue context
*/
u64 physical;
- struct gendisk *disk;
- u8 partno;
+ struct block_device *bdev;
};
/*
return 0;
}
-int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info,
+int btrfs_qgroup_trace_extent_post(struct btrfs_trans_handle *trans,
struct btrfs_qgroup_extent_record *qrecord)
{
struct ulist *old_root;
u64 bytenr = qrecord->bytenr;
int ret;
- ret = btrfs_find_all_roots(NULL, fs_info, bytenr, 0, &old_root, false);
+ /*
+ * We are always called in a context where we are already holding a
+ * transaction handle. Often we are called when adding a data delayed
+ * reference from btrfs_truncate_inode_items() (truncating or unlinking),
+ * in which case we will be holding a write lock on extent buffer from a
+ * subvolume tree. In this case we can't allow btrfs_find_all_roots() to
+ * acquire fs_info->commit_root_sem, because that is a higher level lock
+ * that must be acquired before locking any extent buffers.
+ *
+ * So we want btrfs_find_all_roots() to not acquire the commit_root_sem
+ * but we can't pass it a non-NULL transaction handle, because otherwise
+ * it would not use commit roots and would lock extent buffers, causing
+ * a deadlock if it ends up trying to read lock the same extent buffer
+ * that was previously write locked at btrfs_truncate_inode_items().
+ *
+ * So pass a NULL transaction handle to btrfs_find_all_roots() and
+ * explicitly tell it to not acquire the commit_root_sem - if we are
+ * holding a transaction handle we don't need its protection.
+ */
+ ASSERT(trans != NULL);
+
+ ret = btrfs_find_all_roots(NULL, trans->fs_info, bytenr, 0, &old_root,
+ false, true);
if (ret < 0) {
- fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
- btrfs_warn(fs_info,
+ trans->fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
+ btrfs_warn(trans->fs_info,
"error accounting new delayed refs extent (err code: %d), quota inconsistent",
ret);
return 0;
kfree(record);
return 0;
}
- return btrfs_qgroup_trace_extent_post(fs_info, record);
+ return btrfs_qgroup_trace_extent_post(trans, record);
}
int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans,
/* Search commit root to find old_roots */
ret = btrfs_find_all_roots(NULL, fs_info,
record->bytenr, 0,
- &record->old_roots, false);
+ &record->old_roots, false, false);
if (ret < 0)
goto cleanup;
}
* current root. It's safe inside commit_transaction().
*/
ret = btrfs_find_all_roots(trans, fs_info,
- record->bytenr, BTRFS_SEQ_LAST, &new_roots, false);
+ record->bytenr, BTRFS_SEQ_LAST, &new_roots, false, false);
if (ret < 0)
goto cleanup;
if (qgroup_to_skip) {
num_bytes = found.offset;
ret = btrfs_find_all_roots(NULL, fs_info, found.objectid, 0,
- &roots, false);
+ &roots, false, false);
if (ret < 0)
goto out;
/* For rescan, just pass old_roots as NULL */
* using current root, then we can move all expensive backref walk out of
* transaction committing, but not now as qgroup accounting will be wrong again.
*/
-int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info,
+int btrfs_qgroup_trace_extent_post(struct btrfs_trans_handle *trans,
struct btrfs_qgroup_extent_record *qrecord);
/*
* quota.
*/
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots,
- false);
+ false, false);
if (ret) {
ulist_free(old_roots);
test_err("couldn't find old roots: %d", ret);
return ret;
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots,
- false);
+ false, false);
if (ret) {
ulist_free(old_roots);
ulist_free(new_roots);
new_roots = NULL;
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots,
- false);
+ false, false);
if (ret) {
ulist_free(old_roots);
test_err("couldn't find old roots: %d", ret);
return -EINVAL;
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots,
- false);
+ false, false);
if (ret) {
ulist_free(old_roots);
ulist_free(new_roots);
}
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots,
- false);
+ false, false);
if (ret) {
ulist_free(old_roots);
test_err("couldn't find old roots: %d", ret);
return ret;
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots,
- false);
+ false, false);
if (ret) {
ulist_free(old_roots);
ulist_free(new_roots);
}
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots,
- false);
+ false, false);
if (ret) {
ulist_free(old_roots);
test_err("couldn't find old roots: %d", ret);
return ret;
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots,
- false);
+ false, false);
if (ret) {
ulist_free(old_roots);
ulist_free(new_roots);
}
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots,
- false);
+ false, false);
if (ret) {
ulist_free(old_roots);
test_err("couldn't find old roots: %d", ret);
return ret;
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots,
- false);
+ false, false);
if (ret) {
ulist_free(old_roots);
ulist_free(new_roots);
spin_lock(&inode->lock);
inode->logged_trans = trans->transid;
/*
- * Don't update last_log_commit if we logged that an inode exists
- * after it was loaded to memory (full_sync bit set).
- * This is to prevent data loss when we do a write to the inode,
- * then the inode gets evicted after all delalloc was flushed,
- * then we log it exists (due to a rename for example) and then
- * fsync it. This last fsync would do nothing (not logging the
- * extents previously written).
+ * Don't update last_log_commit if we logged that an inode exists.
+ * We do this for two reasons:
+ *
+ * 1) We might have had buffered writes to this inode that were
+ * flushed and had their ordered extents completed in this
+ * transaction, but we did not previously log the inode with
+ * LOG_INODE_ALL. Later the inode was evicted and after that
+ * it was loaded again and this LOG_INODE_EXISTS log operation
+ * happened. We must make sure that if an explicit fsync against
+ * the inode is performed later, it logs the new extents, an
+ * updated inode item, etc, and syncs the log. The same logic
+ * applies to direct IO writes instead of buffered writes.
+ *
+ * 2) When we log the inode with LOG_INODE_EXISTS, its inode item
+ * is logged with an i_size of 0 or whatever value was logged
+ * before. If later the i_size of the inode is increased by a
+ * truncate operation, the log is synced through an fsync of
+ * some other inode and then finally an explicit fsync against
+ * this inode is made, we must make sure this fsync logs the
+ * inode with the new i_size, the hole between old i_size and
+ * the new i_size, and syncs the log.
*/
- if (inode_only != LOG_INODE_EXISTS ||
- !test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags))
+ if (inode_only != LOG_INODE_EXISTS)
inode->last_log_commit = inode->last_sub_trans;
spin_unlock(&inode->lock);
}
* if this inode hasn't been logged and directory we're renaming it
* from hasn't been logged, we don't need to log it
*/
- if (inode->logged_trans < trans->transid &&
- (!old_dir || old_dir->logged_trans < trans->transid))
+ if (!inode_logged(trans, inode) &&
+ (!old_dir || !inode_logged(trans, old_dir)))
return;
/*
if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
list_del_init(&device->dev_alloc_list);
clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
+ fs_devices->rw_devices--;
}
list_del_init(&device->dev_list);
fs_devices->num_devices--;
return;
ordered->physical = physical;
- ordered->disk = bio->bi_bdev->bd_disk;
- ordered->partno = bio->bi_bdev->bd_partno;
+ ordered->bdev = bio->bi_bdev;
btrfs_put_ordered_extent(ordered);
}
struct extent_map_tree *em_tree;
struct extent_map *em;
struct btrfs_ordered_sum *sum;
- struct block_device *bdev;
u64 orig_logical = ordered->disk_bytenr;
u64 *logical = NULL;
int nr, stripe_len;
/* Zoned devices should not have partitions. So, we can assume it is 0 */
- ASSERT(ordered->partno == 0);
- bdev = bdgrab(ordered->disk->part0);
- if (WARN_ON(!bdev))
+ ASSERT(!bdev_is_partition(ordered->bdev));
+ if (WARN_ON(!ordered->bdev))
return;
- if (WARN_ON(btrfs_rmap_block(fs_info, orig_logical, bdev,
+ if (WARN_ON(btrfs_rmap_block(fs_info, orig_logical, ordered->bdev,
ordered->physical, &logical, &nr,
&stripe_len)))
goto out;
out:
kfree(logical);
- bdput(bdev);
}
bool btrfs_check_meta_write_pointer(struct btrfs_fs_info *fs_info,
break;
case CEPH_MDS_SESSION_CLOSING:
/* Should never reach this when we're unmounting */
- WARN_ON_ONCE(true);
+ WARN_ON_ONCE(s->s_ttl);
fallthrough;
case CEPH_MDS_SESSION_NEW:
case CEPH_MDS_SESSION_RESTARTING:
InformationLevel) - 4;
offset = param_offset + params;
- /* Setup pointer to Request Data (inode type) */
- pRqD = (struct unlink_psx_rq *)(((char *)&pSMB->hdr.Protocol) + offset);
+ /* Setup pointer to Request Data (inode type).
+ * Note that SMB offsets are from the beginning of SMB which is 4 bytes
+ * in, after RFC1001 field
+ */
+ pRqD = (struct unlink_psx_rq *)((char *)(pSMB) + offset + 4);
pRqD->type = cpu_to_le16(type);
pSMB->ParameterOffset = cpu_to_le16(param_offset);
pSMB->DataOffset = cpu_to_le16(offset);
param_offset = offsetof(struct smb_com_transaction2_spi_req,
InformationLevel) - 4;
offset = param_offset + params;
- pdata = (OPEN_PSX_REQ *)(((char *)&pSMB->hdr.Protocol) + offset);
+ /* SMB offsets are from the beginning of SMB which is 4 bytes in, after RFC1001 field */
+ pdata = (OPEN_PSX_REQ *)((char *)(pSMB) + offset + 4);
pdata->Level = cpu_to_le16(SMB_QUERY_FILE_UNIX_BASIC);
pdata->Permissions = cpu_to_le64(mode);
pdata->PosixOpenFlags = cpu_to_le32(posix_flags);
#ifdef CONFIG_CIFS_DFS_UPCALL
struct super_block *sb = NULL;
struct cifs_sb_info *cifs_sb = NULL;
- struct dfs_cache_tgt_list tgt_list = {0};
+ struct dfs_cache_tgt_list tgt_list = DFS_CACHE_TGT_LIST_INIT(tgt_list);
struct dfs_cache_tgt_iterator *tgt_it = NULL;
#endif
{
int rc;
char *npath = NULL;
- struct dfs_cache_tgt_list tgt_list = {0};
+ struct dfs_cache_tgt_list tgt_list = DFS_CACHE_TGT_LIST_INIT(tgt_list);
struct dfs_cache_tgt_iterator *tgt_it = NULL;
struct smb3_fs_context tmp_ctx = {NULL};
#include "cifs_debug.h"
#include "cifs_unicode.h"
#include "smb2glob.h"
+#include "dns_resolve.h"
#include "dfs_cache.h"
err_free_it:
list_for_each_entry_safe(it, nit, head, it_list) {
+ list_del(&it->it_list);
kfree(it->it_name);
kfree(it);
}
return 0;
}
+static bool target_share_equal(struct TCP_Server_Info *server, const char *s1, const char *s2)
+{
+ char unc[sizeof("\\\\") + SERVER_NAME_LENGTH] = {0};
+ const char *host;
+ size_t hostlen;
+ char *ip = NULL;
+ struct sockaddr sa;
+ bool match;
+ int rc;
+
+ if (strcasecmp(s1, s2))
+ return false;
+
+ /*
+ * Resolve share's hostname and check if server address matches. Otherwise just ignore it
+ * as we could not have upcall to resolve hostname or failed to convert ip address.
+ */
+ match = true;
+ extract_unc_hostname(s1, &host, &hostlen);
+ scnprintf(unc, sizeof(unc), "\\\\%.*s", (int)hostlen, host);
+
+ rc = dns_resolve_server_name_to_ip(unc, &ip, NULL);
+ if (rc < 0) {
+ cifs_dbg(FYI, "%s: could not resolve %.*s. assuming server address matches.\n",
+ __func__, (int)hostlen, host);
+ return true;
+ }
+
+ if (!cifs_convert_address(&sa, ip, strlen(ip))) {
+ cifs_dbg(VFS, "%s: failed to convert address \'%s\'. skip address matching.\n",
+ __func__, ip);
+ } else {
+ mutex_lock(&server->srv_mutex);
+ match = cifs_match_ipaddr((struct sockaddr *)&server->dstaddr, &sa);
+ mutex_unlock(&server->srv_mutex);
+ }
+
+ kfree(ip);
+ return match;
+}
+
+/*
+ * Mark dfs tcon for reconnecting when the currently connected tcon does not match any of the new
+ * target shares in @refs.
+ */
+static void mark_for_reconnect_if_needed(struct cifs_tcon *tcon, struct dfs_cache_tgt_list *tl,
+ const struct dfs_info3_param *refs, int numrefs)
+{
+ struct dfs_cache_tgt_iterator *it;
+ int i;
+
+ for (it = dfs_cache_get_tgt_iterator(tl); it; it = dfs_cache_get_next_tgt(tl, it)) {
+ for (i = 0; i < numrefs; i++) {
+ if (target_share_equal(tcon->ses->server, dfs_cache_get_tgt_name(it),
+ refs[i].node_name))
+ return;
+ }
+ }
+
+ cifs_dbg(FYI, "%s: no cached or matched targets. mark dfs share for reconnect.\n", __func__);
+ for (i = 0; i < tcon->ses->chan_count; i++) {
+ spin_lock(&GlobalMid_Lock);
+ if (tcon->ses->chans[i].server->tcpStatus != CifsExiting)
+ tcon->ses->chans[i].server->tcpStatus = CifsNeedReconnect;
+ spin_unlock(&GlobalMid_Lock);
+ }
+}
+
+/* Refresh dfs referral of tcon and mark it for reconnect if needed */
+static int refresh_tcon(struct cifs_ses **sessions, struct cifs_tcon *tcon, bool force_refresh)
+{
+ const char *path = tcon->dfs_path + 1;
+ struct cifs_ses *ses;
+ struct cache_entry *ce;
+ struct dfs_info3_param *refs = NULL;
+ int numrefs = 0;
+ bool needs_refresh = false;
+ struct dfs_cache_tgt_list tl = DFS_CACHE_TGT_LIST_INIT(tl);
+ int rc = 0;
+ unsigned int xid;
+
+ ses = find_ipc_from_server_path(sessions, path);
+ if (IS_ERR(ses)) {
+ cifs_dbg(FYI, "%s: could not find ipc session\n", __func__);
+ return PTR_ERR(ses);
+ }
+
+ down_read(&htable_rw_lock);
+ ce = lookup_cache_entry(path);
+ needs_refresh = force_refresh || IS_ERR(ce) || cache_entry_expired(ce);
+ if (!IS_ERR(ce)) {
+ rc = get_targets(ce, &tl);
+ if (rc)
+ cifs_dbg(FYI, "%s: could not get dfs targets: %d\n", __func__, rc);
+ }
+ up_read(&htable_rw_lock);
+
+ if (!needs_refresh) {
+ rc = 0;
+ goto out;
+ }
+
+ xid = get_xid();
+ rc = get_dfs_referral(xid, ses, path, &refs, &numrefs);
+ free_xid(xid);
+
+ /* Create or update a cache entry with the new referral */
+ if (!rc) {
+ dump_refs(refs, numrefs);
+
+ down_write(&htable_rw_lock);
+ ce = lookup_cache_entry(path);
+ if (IS_ERR(ce))
+ add_cache_entry_locked(refs, numrefs);
+ else if (force_refresh || cache_entry_expired(ce))
+ update_cache_entry_locked(ce, refs, numrefs);
+ up_write(&htable_rw_lock);
+
+ mark_for_reconnect_if_needed(tcon, &tl, refs, numrefs);
+ }
+
+out:
+ dfs_cache_free_tgts(&tl);
+ free_dfs_info_array(refs, numrefs);
+ return rc;
+}
+
+/**
+ * dfs_cache_remount_fs - remount a DFS share
+ *
+ * Reconfigure dfs mount by forcing a new DFS referral and if the currently cached targets do not
+ * match any of the new targets, mark it for reconnect.
+ *
+ * @cifs_sb: cifs superblock.
+ *
+ * Return zero if remounted, otherwise non-zero.
+ */
+int dfs_cache_remount_fs(struct cifs_sb_info *cifs_sb)
+{
+ struct cifs_tcon *tcon;
+ struct mount_group *mg;
+ struct cifs_ses *sessions[CACHE_MAX_ENTRIES + 1] = {NULL};
+ int rc;
+
+ if (!cifs_sb || !cifs_sb->master_tlink)
+ return -EINVAL;
+
+ tcon = cifs_sb_master_tcon(cifs_sb);
+ if (!tcon->dfs_path) {
+ cifs_dbg(FYI, "%s: not a dfs tcon\n", __func__);
+ return 0;
+ }
+
+ if (uuid_is_null(&cifs_sb->dfs_mount_id)) {
+ cifs_dbg(FYI, "%s: tcon has no dfs mount group id\n", __func__);
+ return -EINVAL;
+ }
+
+ mutex_lock(&mount_group_list_lock);
+ mg = find_mount_group_locked(&cifs_sb->dfs_mount_id);
+ if (IS_ERR(mg)) {
+ mutex_unlock(&mount_group_list_lock);
+ cifs_dbg(FYI, "%s: tcon has ipc session to refresh referral\n", __func__);
+ return PTR_ERR(mg);
+ }
+ kref_get(&mg->refcount);
+ mutex_unlock(&mount_group_list_lock);
+
+ spin_lock(&mg->lock);
+ memcpy(&sessions, mg->sessions, mg->num_sessions * sizeof(mg->sessions[0]));
+ spin_unlock(&mg->lock);
+
+ /*
+ * After reconnecting to a different server, unique ids won't match anymore, so we disable
+ * serverino. This prevents dentry revalidation to think the dentry are stale (ESTALE).
+ */
+ cifs_autodisable_serverino(cifs_sb);
+ /*
+ * Force the use of prefix path to support failover on DFS paths that resolve to targets
+ * that have different prefix paths.
+ */
+ cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_USE_PREFIX_PATH;
+ rc = refresh_tcon(sessions, tcon, true);
+
+ kref_put(&mg->refcount, mount_group_release);
+ return rc;
+}
+
/*
* Refresh all active dfs mounts regardless of whether they are in cache or not.
* (cache can be cleared)
struct cifs_ses *ses;
struct cifs_tcon *tcon, *ntcon;
struct list_head tcons;
- unsigned int xid;
INIT_LIST_HEAD(&tcons);
spin_unlock(&cifs_tcp_ses_lock);
list_for_each_entry_safe(tcon, ntcon, &tcons, ulist) {
- const char *path = tcon->dfs_path + 1;
- struct cache_entry *ce;
- struct dfs_info3_param *refs = NULL;
- int numrefs = 0;
- bool needs_refresh = false;
- int rc = 0;
-
list_del_init(&tcon->ulist);
-
- ses = find_ipc_from_server_path(sessions, path);
- if (IS_ERR(ses))
- goto next_tcon;
-
- down_read(&htable_rw_lock);
- ce = lookup_cache_entry(path);
- needs_refresh = IS_ERR(ce) || cache_entry_expired(ce);
- up_read(&htable_rw_lock);
-
- if (!needs_refresh)
- goto next_tcon;
-
- xid = get_xid();
- rc = get_dfs_referral(xid, ses, path, &refs, &numrefs);
- free_xid(xid);
-
- /* Create or update a cache entry with the new referral */
- if (!rc) {
- down_write(&htable_rw_lock);
- ce = lookup_cache_entry(path);
- if (IS_ERR(ce))
- add_cache_entry_locked(refs, numrefs);
- else if (cache_entry_expired(ce))
- update_cache_entry_locked(ce, refs, numrefs);
- up_write(&htable_rw_lock);
- }
-
-next_tcon:
- free_dfs_info_array(refs, numrefs);
+ refresh_tcon(sessions, tcon, false);
cifs_put_tcon(tcon);
}
}
#include <linux/uuid.h>
#include "cifsglob.h"
+#define DFS_CACHE_TGT_LIST_INIT(var) { .tl_numtgts = 0, .tl_list = LIST_HEAD_INIT((var).tl_list), }
+
struct dfs_cache_tgt_list {
int tl_numtgts;
struct list_head tl_list;
void dfs_cache_put_refsrv_sessions(const uuid_t *mount_id);
void dfs_cache_add_refsrv_session(const uuid_t *mount_id, struct cifs_ses *ses);
char *dfs_cache_canonical_path(const char *path, const struct nls_table *cp, int remap);
+int dfs_cache_remount_fs(struct cifs_sb_info *cifs_sb);
static inline struct dfs_cache_tgt_iterator *
dfs_cache_get_next_tgt(struct dfs_cache_tgt_list *tl,
static int cifs_readpage(struct file *file, struct page *page)
{
- loff_t offset = (loff_t)page->index << PAGE_SHIFT;
+ loff_t offset = page_file_offset(page);
int rc = -EACCES;
unsigned int xid;
#include <linux/magic.h>
#include <linux/security.h>
#include <net/net_namespace.h>
+#ifdef CONFIG_CIFS_DFS_UPCALL
+#include "dfs_cache.h"
+#endif
*/
#include <linux/ctype.h>
smb3_cleanup_fs_context_contents(cifs_sb->ctx);
rc = smb3_fs_context_dup(cifs_sb->ctx, ctx);
smb3_update_mnt_flags(cifs_sb);
+#ifdef CONFIG_CIFS_DFS_UPCALL
+ if (!rc)
+ rc = dfs_cache_remount_fs(cifs_sb);
+#endif
return rc;
}
ctx->cred_uid = uid;
ctx->cruid_specified = true;
break;
+ case Opt_backupuid:
+ uid = make_kuid(current_user_ns(), result.uint_32);
+ if (!uid_valid(uid))
+ goto cifs_parse_mount_err;
+ ctx->backupuid = uid;
+ ctx->backupuid_specified = true;
+ break;
case Opt_backupgid:
gid = make_kgid(current_user_ns(), result.uint_32);
if (!gid_valid(gid))
{
struct cifs_io_parms io_parms = {0};
int nbytes;
+ int rc = 0;
struct kvec iov[2];
io_parms.netfid = cfile->fid.netfid;
io_parms.tcon = tcon;
io_parms.persistent_fid = cfile->fid.persistent_fid;
io_parms.volatile_fid = cfile->fid.volatile_fid;
- io_parms.offset = off;
- io_parms.length = len;
- /* iov[0] is reserved for smb header */
- iov[1].iov_base = buf;
- iov[1].iov_len = io_parms.length;
- return SMB2_write(xid, &io_parms, &nbytes, iov, 1);
+ while (len) {
+ io_parms.offset = off;
+ io_parms.length = len;
+ if (io_parms.length > SMB2_MAX_BUFFER_SIZE)
+ io_parms.length = SMB2_MAX_BUFFER_SIZE;
+ /* iov[0] is reserved for smb header */
+ iov[1].iov_base = buf;
+ iov[1].iov_len = io_parms.length;
+ rc = SMB2_write(xid, &io_parms, &nbytes, iov, 1);
+ if (rc)
+ break;
+ if (nbytes > len)
+ return -EINVAL;
+ buf += nbytes;
+ off += nbytes;
+ len -= nbytes;
+ }
+ return rc;
}
static int smb3_simple_fallocate_range(unsigned int xid,
(char **)&out_data, &out_data_len);
if (rc)
goto out;
- /*
- * It is already all allocated
- */
- if (out_data_len == 0)
- goto out;
buf = kzalloc(1024 * 1024, GFP_KERNEL);
if (buf == NULL) {
goto out;
}
+ if (keep_size == true) {
+ /*
+ * We can not preallocate pages beyond the end of the file
+ * in SMB2
+ */
+ if (off >= i_size_read(inode)) {
+ rc = 0;
+ goto out;
+ }
+ /*
+ * For fallocates that are partially beyond the end of file,
+ * clamp len so we only fallocate up to the end of file.
+ */
+ if (off + len > i_size_read(inode)) {
+ len = i_size_read(inode) - off;
+ }
+ }
+
if ((keep_size == true) || (i_size_read(inode) >= off + len)) {
/*
* At this point, we are trying to fallocate an internal
return err;
}
-static bool ext2_check_page(struct page *page, int quiet)
+static bool ext2_check_page(struct page *page, int quiet, char *kaddr)
{
struct inode *dir = page->mapping->host;
struct super_block *sb = dir->i_sb;
unsigned chunk_size = ext2_chunk_size(dir);
- char *kaddr = page_address(page);
u32 max_inumber = le32_to_cpu(EXT2_SB(sb)->s_es->s_inodes_count);
unsigned offs, rec_len;
unsigned limit = PAGE_SIZE;
if (!IS_ERR(page)) {
*page_addr = kmap_local_page(page);
if (unlikely(!PageChecked(page))) {
- if (PageError(page) || !ext2_check_page(page, quiet))
+ if (PageError(page) || !ext2_check_page(page, quiet,
+ *page_addr))
goto fail;
}
}
* ext2_delete_entry deletes a directory entry by merging it with the
* previous entry. Page is up-to-date.
*/
-int ext2_delete_entry (struct ext2_dir_entry_2 * dir, struct page * page )
+int ext2_delete_entry (struct ext2_dir_entry_2 *dir, struct page *page,
+ char *kaddr)
{
struct inode *inode = page->mapping->host;
- char *kaddr = page_address(page);
unsigned from = ((char*)dir - kaddr) & ~(ext2_chunk_size(inode)-1);
unsigned to = ((char *)dir - kaddr) +
ext2_rec_len_from_disk(dir->rec_len);
de = ext2_next_entry(de);
}
if (pde)
- from = (char*)pde - (char*)page_address(page);
+ from = (char *)pde - kaddr;
pos = page_offset(page) + from;
lock_page(page);
err = ext2_prepare_chunk(page, pos, to - from);
extern int ext2_make_empty(struct inode *, struct inode *);
extern struct ext2_dir_entry_2 *ext2_find_entry(struct inode *, const struct qstr *,
struct page **, void **res_page_addr);
-extern int ext2_delete_entry (struct ext2_dir_entry_2 *, struct page *);
+extern int ext2_delete_entry(struct ext2_dir_entry_2 *dir, struct page *page,
+ char *kaddr);
extern int ext2_empty_dir (struct inode *);
extern struct ext2_dir_entry_2 *ext2_dotdot(struct inode *dir, struct page **p, void **pa);
extern void ext2_set_link(struct inode *, struct ext2_dir_entry_2 *, struct page *, void *,
goto out;
}
- err = ext2_delete_entry (de, page);
+ err = ext2_delete_entry (de, page, page_addr);
ext2_put_page(page, page_addr);
if (err)
goto out;
old_inode->i_ctime = current_time(old_inode);
mark_inode_dirty(old_inode);
- ext2_delete_entry(old_de, old_page);
+ ext2_delete_entry(old_de, old_page, old_page_addr);
if (dir_de) {
if (old_dir != new_dir)
*/
smp_mb();
+ if (IS_DAX(inode))
+ return false;
+
/* while holding I_WB_SWITCH, no one else can update the association */
spin_lock(&inode->i_lock);
if (!(inode->i_sb->s_flags & SB_ACTIVE) ||
static const struct fs_parameter_spec hugetlb_fs_parameters[] = {
fsparam_u32 ("gid", Opt_gid),
fsparam_string("min_size", Opt_min_size),
- fsparam_u32 ("mode", Opt_mode),
+ fsparam_u32oct("mode", Opt_mode),
fsparam_string("nr_inodes", Opt_nr_inodes),
fsparam_string("pagesize", Opt_pagesize),
fsparam_string("size", Opt_size),
*/
extern const struct fs_context_operations legacy_fs_context_ops;
extern int parse_monolithic_mount_data(struct fs_context *, void *);
-extern void fc_drop_locked(struct fs_context *);
extern void vfs_clean_context(struct fs_context *fc);
extern int finish_clean_context(struct fs_context *fc);
int work_flags;
unsigned long flags;
- if (test_bit(IO_WQ_BIT_EXIT, &wqe->wq->state)) {
+ /*
+ * If io-wq is exiting for this task, or if the request has explicitly
+ * been marked as one that should not get executed, cancel it here.
+ */
+ if (test_bit(IO_WQ_BIT_EXIT, &wqe->wq->state) ||
+ (work->flags & IO_WQ_WORK_CANCEL)) {
io_run_cancel(work, wqe);
return;
}
{
struct io_kiocb *cur;
- io_for_each_link(cur, req)
- io_prep_async_work(cur);
+ if (req->flags & REQ_F_LINK_TIMEOUT) {
+ struct io_ring_ctx *ctx = req->ctx;
+
+ spin_lock_irq(&ctx->completion_lock);
+ io_for_each_link(cur, req)
+ io_prep_async_work(cur);
+ spin_unlock_irq(&ctx->completion_lock);
+ } else {
+ io_for_each_link(cur, req)
+ io_prep_async_work(cur);
+ }
}
static void io_queue_async_work(struct io_kiocb *req)
/* init ->work of the whole link before punting */
io_prep_async_link(req);
+
+ /*
+ * Not expected to happen, but if we do have a bug where this _can_
+ * happen, catch it here and ensure the request is marked as
+ * canceled. That will make io-wq go through the usual work cancel
+ * procedure rather than attempt to run this request (or create a new
+ * worker for it).
+ */
+ if (WARN_ON_ONCE(!same_thread_group(req->task, current)))
+ req->work.flags |= IO_WQ_WORK_CANCEL;
+
trace_io_uring_queue_async_work(ctx, io_wq_is_hashed(&req->work), req,
&req->work, req->flags);
io_wq_enqueue(tctx->io_wq, &req->work);
node = next;
}
if (wq_list_empty(&tctx->task_list)) {
+ spin_lock_irq(&tctx->task_lock);
clear_bit(0, &tctx->task_state);
- if (wq_list_empty(&tctx->task_list))
+ if (wq_list_empty(&tctx->task_list)) {
+ spin_unlock_irq(&tctx->task_lock);
break;
+ }
+ spin_unlock_irq(&tctx->task_lock);
/* another tctx_task_work() is enqueued, yield */
if (test_and_set_bit(0, &tctx->task_state))
break;
io_req_task_work_add(req);
}
+static void io_req_task_queue_reissue(struct io_kiocb *req)
+{
+ req->io_task_work.func = io_queue_async_work;
+ io_req_task_work_add(req);
+}
+
static inline void io_queue_next(struct io_kiocb *req)
{
struct io_kiocb *nxt = io_req_find_next(req);
* Find and free completed poll iocbs
*/
static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
- struct list_head *done)
+ struct list_head *done, bool resubmit)
{
struct req_batch rb;
struct io_kiocb *req;
req = list_first_entry(done, struct io_kiocb, inflight_entry);
list_del(&req->inflight_entry);
- if (READ_ONCE(req->result) == -EAGAIN &&
+ if (READ_ONCE(req->result) == -EAGAIN && resubmit &&
!(req->flags & REQ_F_DONT_REISSUE)) {
req->iopoll_completed = 0;
req_ref_get(req);
- io_queue_async_work(req);
+ io_req_task_queue_reissue(req);
continue;
}
}
static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events,
- long min)
+ long min, bool resubmit)
{
struct io_kiocb *req, *tmp;
LIST_HEAD(done);
}
if (!list_empty(&done))
- io_iopoll_complete(ctx, nr_events, &done);
+ io_iopoll_complete(ctx, nr_events, &done, resubmit);
return ret;
}
while (!list_empty(&ctx->iopoll_list)) {
unsigned int nr_events = 0;
- io_do_iopoll(ctx, &nr_events, 0);
+ io_do_iopoll(ctx, &nr_events, 0, false);
/* let it sleep and repeat later if can't complete a request */
if (nr_events == 0)
list_empty(&ctx->iopoll_list))
break;
}
- ret = io_do_iopoll(ctx, &nr_events, min);
+ ret = io_do_iopoll(ctx, &nr_events, min, true);
} while (!ret && nr_events < min && !need_resched());
out:
mutex_unlock(&ctx->uring_lock);
*/
if (percpu_ref_is_dying(&ctx->refs))
return false;
+ /*
+ * Play it safe and assume not safe to re-import and reissue if we're
+ * not in the original thread group (or in task context).
+ */
+ if (!same_thread_group(req->task, current) || !in_task())
+ return false;
return true;
}
#else
req->flags &= ~REQ_F_REISSUE;
if (io_resubmit_prep(req)) {
req_ref_get(req);
- io_queue_async_work(req);
+ io_req_task_queue_reissue(req);
} else {
int cflags = 0;
struct io_poll_table {
struct poll_table_struct pt;
struct io_kiocb *req;
+ int nr_entries;
int error;
};
if (req->poll.events & EPOLLONESHOT)
flags = 0;
if (!io_cqring_fill_event(ctx, req->user_data, error, flags)) {
- io_poll_remove_waitqs(req);
req->poll.done = true;
flags = 0;
}
done = io_poll_complete(req, req->result);
if (done) {
+ io_poll_remove_double(req);
hash_del(&req->hash_node);
} else {
req->result = 0;
struct io_kiocb *req = pt->req;
/*
- * If poll->head is already set, it's because the file being polled
- * uses multiple waitqueues for poll handling (eg one for read, one
- * for write). Setup a separate io_poll_iocb if this happens.
+ * The file being polled uses multiple waitqueues for poll handling
+ * (e.g. one for read, one for write). Setup a separate io_poll_iocb
+ * if this happens.
*/
- if (unlikely(poll->head)) {
+ if (unlikely(pt->nr_entries)) {
struct io_poll_iocb *poll_one = poll;
/* already have a 2nd entry, fail a third attempt */
*poll_ptr = poll;
}
- pt->error = 0;
+ pt->nr_entries++;
poll->head = head;
if (poll->events & EPOLLEXCLUSIVE)
ipt->pt._key = mask;
ipt->req = req;
- ipt->error = -EINVAL;
+ ipt->error = 0;
+ ipt->nr_entries = 0;
mask = vfs_poll(req->file, &ipt->pt) & poll->events;
+ if (unlikely(!ipt->nr_entries) && !ipt->error)
+ ipt->error = -EINVAL;
spin_lock_irq(&ctx->completion_lock);
+ if (ipt->error || (mask && (poll->events & EPOLLONESHOT)))
+ io_poll_remove_double(req);
if (likely(poll->head)) {
spin_lock(&poll->head->lock);
if (unlikely(list_empty(&poll->wait.entry))) {
ret = __io_arm_poll_handler(req, &apoll->poll, &ipt, mask,
io_async_wake);
if (ret || ipt.error) {
- io_poll_remove_double(req);
spin_unlock_irq(&ctx->completion_lock);
if (ret)
return IO_APOLL_READY;
mutex_lock(&ctx->uring_lock);
if (!list_empty(&ctx->iopoll_list))
- io_do_iopoll(ctx, &nr_events, 0);
+ io_do_iopoll(ctx, &nr_events, 0, true);
/*
* Don't submit if refs are dying, good for io_uring_register(),
struct io_wq_data data;
unsigned int concurrency;
+ mutex_lock(&ctx->uring_lock);
hash = ctx->hash_map;
if (!hash) {
hash = kzalloc(sizeof(*hash), GFP_KERNEL);
- if (!hash)
+ if (!hash) {
+ mutex_unlock(&ctx->uring_lock);
return ERR_PTR(-ENOMEM);
+ }
refcount_set(&hash->refs, 1);
init_waitqueue_head(&hash->wait);
ctx->hash_map = hash;
}
+ mutex_unlock(&ctx->uring_lock);
data.hash = hash;
data.task = task;
f = fdget(p->wq_fd);
if (!f.file)
return -ENXIO;
- fdput(f);
- if (f.file->f_op != &io_uring_fops)
+ if (f.file->f_op != &io_uring_fops) {
+ fdput(f);
return -EINVAL;
+ }
+ fdput(f);
}
if (ctx->flags & IORING_SETUP_SQPOLL) {
struct task_struct *tsk;
}
}
+/*
+ * zero out partial blocks of one cluster.
+ *
+ * start: file offset where zero starts, will be made upper block aligned.
+ * len: it will be trimmed to the end of current cluster if "start + len"
+ * is bigger than it.
+ */
+static int ocfs2_zeroout_partial_cluster(struct inode *inode,
+ u64 start, u64 len)
+{
+ int ret;
+ u64 start_block, end_block, nr_blocks;
+ u64 p_block, offset;
+ u32 cluster, p_cluster, nr_clusters;
+ struct super_block *sb = inode->i_sb;
+ u64 end = ocfs2_align_bytes_to_clusters(sb, start);
+
+ if (start + len < end)
+ end = start + len;
+
+ start_block = ocfs2_blocks_for_bytes(sb, start);
+ end_block = ocfs2_blocks_for_bytes(sb, end);
+ nr_blocks = end_block - start_block;
+ if (!nr_blocks)
+ return 0;
+
+ cluster = ocfs2_bytes_to_clusters(sb, start);
+ ret = ocfs2_get_clusters(inode, cluster, &p_cluster,
+ &nr_clusters, NULL);
+ if (ret)
+ return ret;
+ if (!p_cluster)
+ return 0;
+
+ offset = start_block - ocfs2_clusters_to_blocks(sb, cluster);
+ p_block = ocfs2_clusters_to_blocks(sb, p_cluster) + offset;
+ return sb_issue_zeroout(sb, p_block, nr_blocks, GFP_NOFS);
+}
+
static int ocfs2_zero_partial_clusters(struct inode *inode,
u64 start, u64 len)
{
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
unsigned int csize = osb->s_clustersize;
handle_t *handle;
+ loff_t isize = i_size_read(inode);
/*
* The "start" and "end" values are NOT necessarily part of
if ((start & (csize - 1)) == 0 && (end & (csize - 1)) == 0)
goto out;
+ /* No page cache for EOF blocks, issue zero out to disk. */
+ if (end > isize) {
+ /*
+ * zeroout eof blocks in last cluster starting from
+ * "isize" even "start" > "isize" because it is
+ * complicated to zeroout just at "start" as "start"
+ * may be not aligned with block size, buffer write
+ * would be required to do that, but out of eof buffer
+ * write is not supported.
+ */
+ ret = ocfs2_zeroout_partial_cluster(inode, isize,
+ end - isize);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+ if (start >= isize)
+ goto out;
+ end = isize;
+ }
handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
return ret;
}
-/*
- * zero out partial blocks of one cluster.
- *
- * start: file offset where zero starts, will be made upper block aligned.
- * len: it will be trimmed to the end of current cluster if "start + len"
- * is bigger than it.
- */
-static int ocfs2_zeroout_partial_cluster(struct inode *inode,
- u64 start, u64 len)
-{
- int ret;
- u64 start_block, end_block, nr_blocks;
- u64 p_block, offset;
- u32 cluster, p_cluster, nr_clusters;
- struct super_block *sb = inode->i_sb;
- u64 end = ocfs2_align_bytes_to_clusters(sb, start);
-
- if (start + len < end)
- end = start + len;
-
- start_block = ocfs2_blocks_for_bytes(sb, start);
- end_block = ocfs2_blocks_for_bytes(sb, end);
- nr_blocks = end_block - start_block;
- if (!nr_blocks)
- return 0;
-
- cluster = ocfs2_bytes_to_clusters(sb, start);
- ret = ocfs2_get_clusters(inode, cluster, &p_cluster,
- &nr_clusters, NULL);
- if (ret)
- return ret;
- if (!p_cluster)
- return 0;
-
- offset = start_block - ocfs2_clusters_to_blocks(sb, cluster);
- p_block = ocfs2_clusters_to_blocks(sb, p_cluster) + offset;
- return sb_issue_zeroout(sb, p_block, nr_blocks, GFP_NOFS);
-}
-
/*
* Parts of this function taken from xfs_change_file_space()
*/
goto out_inode_unlock;
}
- orig_isize = i_size_read(inode);
switch (sr->l_whence) {
case 0: /*SEEK_SET*/
break;
sr->l_start += f_pos;
break;
case 2: /*SEEK_END*/
- sr->l_start += orig_isize;
+ sr->l_start += i_size_read(inode);
break;
default:
ret = -EINVAL;
ret = -EINVAL;
}
+ orig_isize = i_size_read(inode);
/* zeroout eof blocks in the cluster. */
if (!ret && change_size && orig_isize < size) {
ret = ocfs2_zeroout_partial_cluster(inode, orig_isize,
#include "internal.h"
+/*
+ * New pipe buffers will be restricted to this size while the user is exceeding
+ * their pipe buffer quota. The general pipe use case needs at least two
+ * buffers: one for data yet to be read, and one for new data. If this is less
+ * than two, then a write to a non-empty pipe may block even if the pipe is not
+ * full. This can occur with GNU make jobserver or similar uses of pipes as
+ * semaphores: multiple processes may be waiting to write tokens back to the
+ * pipe before reading tokens: https://lore.kernel.org/lkml/1628086770.5rn8p04n6j.none@localhost/.
+ *
+ * Users can reduce their pipe buffers with F_SETPIPE_SZ below this at their
+ * own risk, namely: pipe writes to non-full pipes may block until the pipe is
+ * emptied.
+ */
+#define PIPE_MIN_DEF_BUFFERS 2
+
/*
* The max size that a non-root user is allowed to grow the pipe. Can
* be set by root in /proc/sys/fs/pipe-max-size
#endif
/*
- * Only wake up if the pipe started out empty, since
- * otherwise there should be no readers waiting.
+ * Epoll nonsensically wants a wakeup whether the pipe
+ * was already empty or not.
*
* If it wasn't empty we try to merge new data into
* the last buffer.
*
* That naturally merges small writes, but it also
- * page-aligs the rest of the writes for large writes
+ * page-aligns the rest of the writes for large writes
* spanning multiple pages.
*/
head = pipe->head;
- was_empty = pipe_empty(head, pipe->tail);
+ was_empty = true;
chars = total_len & (PAGE_SIZE-1);
- if (chars && !was_empty) {
+ if (chars && !pipe_empty(head, pipe->tail)) {
unsigned int mask = pipe->ring_size - 1;
struct pipe_buffer *buf = &pipe->bufs[(head - 1) & mask];
int offset = buf->offset + buf->len;
user_bufs = account_pipe_buffers(user, 0, pipe_bufs);
if (too_many_pipe_buffers_soft(user_bufs) && pipe_is_unprivileged_user()) {
- user_bufs = account_pipe_buffers(user, pipe_bufs, 1);
- pipe_bufs = 1;
+ user_bufs = account_pipe_buffers(user, pipe_bufs, PIPE_MIN_DEF_BUFFERS);
+ pipe_bufs = PIPE_MIN_DEF_BUFFERS;
}
if (too_many_pipe_buffers_hard(user_bufs) && pipe_is_unprivileged_user())
search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
}
+static int has_valid_deh_location(struct buffer_head *bh, struct item_head *ih)
+{
+ struct reiserfs_de_head *deh;
+ int i;
+
+ deh = B_I_DEH(bh, ih);
+ for (i = 0; i < ih_entry_count(ih); i++) {
+ if (deh_location(&deh[i]) > ih_item_len(ih)) {
+ reiserfs_warning(NULL, "reiserfs-5094",
+ "directory entry location seems wrong %h",
+ &deh[i]);
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
static int is_leaf(char *buf, int blocksize, struct buffer_head *bh)
{
struct block_head *blkh;
"(second one): %h", ih);
return 0;
}
- if (is_direntry_le_ih(ih) && (ih_item_len(ih) < (ih_entry_count(ih) * IH_SIZE))) {
- reiserfs_warning(NULL, "reiserfs-5093",
- "item entry count seems wrong %h",
- ih);
- return 0;
+ if (is_direntry_le_ih(ih)) {
+ if (ih_item_len(ih) < (ih_entry_count(ih) * IH_SIZE)) {
+ reiserfs_warning(NULL, "reiserfs-5093",
+ "item entry count seems wrong %h",
+ ih);
+ return 0;
+ }
+ return has_valid_deh_location(bh, ih);
}
prev_location = ih_location(ih);
}
unlock_new_inode(root_inode);
}
+ if (!S_ISDIR(root_inode->i_mode) || !inode_get_bytes(root_inode) ||
+ !root_inode->i_size) {
+ SWARN(silent, s, "", "corrupt root inode, run fsck");
+ iput(root_inode);
+ errval = -EUCLEAN;
+ goto error;
+ }
+
s->s_root = d_make_root(root_inode);
if (!s->s_root)
goto error;
}
static __always_inline int validate_range(struct mm_struct *mm,
- __u64 *start, __u64 len)
+ __u64 start, __u64 len)
{
__u64 task_size = mm->task_size;
- *start = untagged_addr(*start);
-
- if (*start & ~PAGE_MASK)
+ if (start & ~PAGE_MASK)
return -EINVAL;
if (len & ~PAGE_MASK)
return -EINVAL;
if (!len)
return -EINVAL;
- if (*start < mmap_min_addr)
+ if (start < mmap_min_addr)
return -EINVAL;
- if (*start >= task_size)
+ if (start >= task_size)
return -EINVAL;
- if (len > task_size - *start)
+ if (len > task_size - start)
return -EINVAL;
return 0;
}
vm_flags |= VM_UFFD_MINOR;
}
- ret = validate_range(mm, &uffdio_register.range.start,
+ ret = validate_range(mm, uffdio_register.range.start,
uffdio_register.range.len);
if (ret)
goto out;
if (copy_from_user(&uffdio_unregister, buf, sizeof(uffdio_unregister)))
goto out;
- ret = validate_range(mm, &uffdio_unregister.start,
+ ret = validate_range(mm, uffdio_unregister.start,
uffdio_unregister.len);
if (ret)
goto out;
if (copy_from_user(&uffdio_wake, buf, sizeof(uffdio_wake)))
goto out;
- ret = validate_range(ctx->mm, &uffdio_wake.start, uffdio_wake.len);
+ ret = validate_range(ctx->mm, uffdio_wake.start, uffdio_wake.len);
if (ret)
goto out;
sizeof(uffdio_copy)-sizeof(__s64)))
goto out;
- ret = validate_range(ctx->mm, &uffdio_copy.dst, uffdio_copy.len);
+ ret = validate_range(ctx->mm, uffdio_copy.dst, uffdio_copy.len);
if (ret)
goto out;
/*
sizeof(uffdio_zeropage)-sizeof(__s64)))
goto out;
- ret = validate_range(ctx->mm, &uffdio_zeropage.range.start,
+ ret = validate_range(ctx->mm, uffdio_zeropage.range.start,
uffdio_zeropage.range.len);
if (ret)
goto out;
sizeof(struct uffdio_writeprotect)))
return -EFAULT;
- ret = validate_range(ctx->mm, &uffdio_wp.range.start,
+ ret = validate_range(ctx->mm, uffdio_wp.range.start,
uffdio_wp.range.len);
if (ret)
return ret;
sizeof(uffdio_continue) - (sizeof(__s64))))
goto out;
- ret = validate_range(ctx->mm, &uffdio_continue.range.start,
+ ret = validate_range(ctx->mm, uffdio_continue.range.start,
uffdio_continue.range.len);
if (ret)
goto out;
/* start of the extended dinode, writable fields */
uint32_t di_crc; /* CRC of the inode */
uint64_t di_changecount; /* number of attribute changes */
- xfs_lsn_t di_lsn; /* flush sequence */
+
+ /*
+ * The LSN we write to this field during formatting is not a reflection
+ * of the current on-disk LSN. It should never be used for recovery
+ * sequencing, nor should it be recovered into the on-disk inode at all.
+ * See xlog_recover_inode_commit_pass2() and xfs_log_dinode_to_disk()
+ * for details.
+ */
+ xfs_lsn_t di_lsn;
+
uint64_t di_flags2; /* more random flags */
uint32_t di_cowextsize; /* basic cow extent size for file */
uint8_t di_pad2[12]; /* more padding for future expansion */
static xfs_lsn_t
xlog_recover_get_buf_lsn(
struct xfs_mount *mp,
- struct xfs_buf *bp)
+ struct xfs_buf *bp,
+ struct xfs_buf_log_format *buf_f)
{
uint32_t magic32;
uint16_t magic16;
void *blk = bp->b_addr;
uuid_t *uuid;
xfs_lsn_t lsn = -1;
+ uint16_t blft;
/* v4 filesystems always recover immediately */
if (!xfs_sb_version_hascrc(&mp->m_sb))
goto recover_immediately;
+ /*
+ * realtime bitmap and summary file blocks do not have magic numbers or
+ * UUIDs, so we must recover them immediately.
+ */
+ blft = xfs_blft_from_flags(buf_f);
+ if (blft == XFS_BLFT_RTBITMAP_BUF || blft == XFS_BLFT_RTSUMMARY_BUF)
+ goto recover_immediately;
+
magic32 = be32_to_cpu(*(__be32 *)blk);
switch (magic32) {
case XFS_ABTB_CRC_MAGIC:
switch (magicda) {
case XFS_DIR3_LEAF1_MAGIC:
case XFS_DIR3_LEAFN_MAGIC:
+ case XFS_ATTR3_LEAF_MAGIC:
case XFS_DA3_NODE_MAGIC:
lsn = be64_to_cpu(((struct xfs_da3_blkinfo *)blk)->lsn);
uuid = &((struct xfs_da3_blkinfo *)blk)->uuid;
* the verifier will be reset to match whatever recover turns that
* buffer into.
*/
- lsn = xlog_recover_get_buf_lsn(mp, bp);
+ lsn = xlog_recover_get_buf_lsn(mp, bp, buf_f);
if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) {
trace_xfs_log_recover_buf_skip(log, buf_f);
xlog_recover_validate_buf_type(mp, bp, buf_f, NULLCOMMITLSN);
STATIC void
xfs_log_dinode_to_disk(
struct xfs_log_dinode *from,
- struct xfs_dinode *to)
+ struct xfs_dinode *to,
+ xfs_lsn_t lsn)
{
to->di_magic = cpu_to_be16(from->di_magic);
to->di_mode = cpu_to_be16(from->di_mode);
to->di_flags2 = cpu_to_be64(from->di_flags2);
to->di_cowextsize = cpu_to_be32(from->di_cowextsize);
to->di_ino = cpu_to_be64(from->di_ino);
- to->di_lsn = cpu_to_be64(from->di_lsn);
+ to->di_lsn = cpu_to_be64(lsn);
memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
uuid_copy(&to->di_uuid, &from->di_uuid);
to->di_flushiter = 0;
}
/*
- * If the inode has an LSN in it, recover the inode only if it's less
- * than the lsn of the transaction we are replaying. Note: we still
- * need to replay an owner change even though the inode is more recent
- * than the transaction as there is no guarantee that all the btree
- * blocks are more recent than this transaction, too.
+ * If the inode has an LSN in it, recover the inode only if the on-disk
+ * inode's LSN is older than the lsn of the transaction we are
+ * replaying. We can have multiple checkpoints with the same start LSN,
+ * so the current LSN being equal to the on-disk LSN doesn't necessarily
+ * mean that the on-disk inode is more recent than the change being
+ * replayed.
+ *
+ * We must check the current_lsn against the on-disk inode
+ * here because the we can't trust the log dinode to contain a valid LSN
+ * (see comment below before replaying the log dinode for details).
+ *
+ * Note: we still need to replay an owner change even though the inode
+ * is more recent than the transaction as there is no guarantee that all
+ * the btree blocks are more recent than this transaction, too.
*/
if (dip->di_version >= 3) {
xfs_lsn_t lsn = be64_to_cpu(dip->di_lsn);
- if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) {
+ if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) > 0) {
trace_xfs_log_recover_inode_skip(log, in_f);
error = 0;
goto out_owner_change;
goto out_release;
}
- /* recover the log dinode inode into the on disk inode */
- xfs_log_dinode_to_disk(ldip, dip);
+ /*
+ * Recover the log dinode inode into the on disk inode.
+ *
+ * The LSN in the log dinode is garbage - it can be zero or reflect
+ * stale in-memory runtime state that isn't coherent with the changes
+ * logged in this transaction or the changes written to the on-disk
+ * inode. Hence we write the current lSN into the inode because that
+ * matches what xfs_iflush() would write inode the inode when flushing
+ * the changes in this transaction.
+ */
+ xfs_log_dinode_to_disk(ldip, dip, current_lsn);
fields = in_f->ilf_fields;
if (fields & XFS_ILOG_DEV)
STATIC void
xlog_verify_tail_lsn(
struct xlog *log,
- struct xlog_in_core *iclog,
- xfs_lsn_t tail_lsn);
+ struct xlog_in_core *iclog);
#else
#define xlog_verify_dest_ptr(a,b)
#define xlog_verify_grant_tail(a)
#define xlog_verify_iclog(a,b,c)
-#define xlog_verify_tail_lsn(a,b,c)
+#define xlog_verify_tail_lsn(a,b)
#endif
STATIC int
return error;
}
-static bool
-__xlog_state_release_iclog(
- struct xlog *log,
- struct xlog_in_core *iclog)
-{
- lockdep_assert_held(&log->l_icloglock);
-
- if (iclog->ic_state == XLOG_STATE_WANT_SYNC) {
- /* update tail before writing to iclog */
- xfs_lsn_t tail_lsn = xlog_assign_tail_lsn(log->l_mp);
-
- iclog->ic_state = XLOG_STATE_SYNCING;
- iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn);
- xlog_verify_tail_lsn(log, iclog, tail_lsn);
- /* cycle incremented when incrementing curr_block */
- trace_xlog_iclog_syncing(iclog, _RET_IP_);
- return true;
- }
-
- ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE);
- return false;
-}
-
/*
* Flush iclog to disk if this is the last reference to the given iclog and the
* it is in the WANT_SYNC state.
+ *
+ * If the caller passes in a non-zero @old_tail_lsn and the current log tail
+ * does not match, there may be metadata on disk that must be persisted before
+ * this iclog is written. To satisfy that requirement, set the
+ * XLOG_ICL_NEED_FLUSH flag as a condition for writing this iclog with the new
+ * log tail value.
+ *
+ * If XLOG_ICL_NEED_FUA is already set on the iclog, we need to ensure that the
+ * log tail is updated correctly. NEED_FUA indicates that the iclog will be
+ * written to stable storage, and implies that a commit record is contained
+ * within the iclog. We need to ensure that the log tail does not move beyond
+ * the tail that the first commit record in the iclog ordered against, otherwise
+ * correct recovery of that checkpoint becomes dependent on future operations
+ * performed on this iclog.
+ *
+ * Hence if NEED_FUA is set and the current iclog tail lsn is empty, write the
+ * current tail into iclog. Once the iclog tail is set, future operations must
+ * not modify it, otherwise they potentially violate ordering constraints for
+ * the checkpoint commit that wrote the initial tail lsn value. The tail lsn in
+ * the iclog will get zeroed on activation of the iclog after sync, so we
+ * always capture the tail lsn on the iclog on the first NEED_FUA release
+ * regardless of the number of active reference counts on this iclog.
*/
+
int
xlog_state_release_iclog(
struct xlog *log,
- struct xlog_in_core *iclog)
+ struct xlog_in_core *iclog,
+ xfs_lsn_t old_tail_lsn)
{
+ xfs_lsn_t tail_lsn;
lockdep_assert_held(&log->l_icloglock);
trace_xlog_iclog_release(iclog, _RET_IP_);
if (iclog->ic_state == XLOG_STATE_IOERROR)
return -EIO;
- if (atomic_dec_and_test(&iclog->ic_refcnt) &&
- __xlog_state_release_iclog(log, iclog)) {
- spin_unlock(&log->l_icloglock);
- xlog_sync(log, iclog);
- spin_lock(&log->l_icloglock);
+ /*
+ * Grabbing the current log tail needs to be atomic w.r.t. the writing
+ * of the tail LSN into the iclog so we guarantee that the log tail does
+ * not move between deciding if a cache flush is required and writing
+ * the LSN into the iclog below.
+ */
+ if (old_tail_lsn || iclog->ic_state == XLOG_STATE_WANT_SYNC) {
+ tail_lsn = xlog_assign_tail_lsn(log->l_mp);
+
+ if (old_tail_lsn && tail_lsn != old_tail_lsn)
+ iclog->ic_flags |= XLOG_ICL_NEED_FLUSH;
+
+ if ((iclog->ic_flags & XLOG_ICL_NEED_FUA) &&
+ !iclog->ic_header.h_tail_lsn)
+ iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn);
}
+ if (!atomic_dec_and_test(&iclog->ic_refcnt))
+ return 0;
+
+ if (iclog->ic_state != XLOG_STATE_WANT_SYNC) {
+ ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE);
+ return 0;
+ }
+
+ iclog->ic_state = XLOG_STATE_SYNCING;
+ if (!iclog->ic_header.h_tail_lsn)
+ iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn);
+ xlog_verify_tail_lsn(log, iclog);
+ trace_xlog_iclog_syncing(iclog, _RET_IP_);
+
+ spin_unlock(&log->l_icloglock);
+ xlog_sync(log, iclog);
+ spin_lock(&log->l_icloglock);
return 0;
}
xfs_log_unmount(mp);
}
+/*
+ * Flush out the iclog to disk ensuring that device caches are flushed and
+ * the iclog hits stable storage before any completion waiters are woken.
+ */
+static inline int
+xlog_force_iclog(
+ struct xlog_in_core *iclog)
+{
+ atomic_inc(&iclog->ic_refcnt);
+ iclog->ic_flags |= XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA;
+ if (iclog->ic_state == XLOG_STATE_ACTIVE)
+ xlog_state_switch_iclogs(iclog->ic_log, iclog, 0);
+ return xlog_state_release_iclog(iclog->ic_log, iclog, 0);
+}
+
/*
* Wait for the iclog and all prior iclogs to be written disk as required by the
* log force state machine. Waiting on ic_force_wait ensures iclog completions
/* account for space used by record data */
ticket->t_curr_res -= sizeof(ulf);
- /*
- * For external log devices, we need to flush the data device cache
- * first to ensure all metadata writeback is on stable storage before we
- * stamp the tail LSN into the unmount record.
- */
- if (log->l_targ != log->l_mp->m_ddev_targp)
- blkdev_issue_flush(log->l_targ->bt_bdev);
return xlog_write(log, &vec, ticket, NULL, NULL, XLOG_UNMOUNT_TRANS);
}
spin_lock(&log->l_icloglock);
iclog = log->l_iclog;
- atomic_inc(&iclog->ic_refcnt);
- if (iclog->ic_state == XLOG_STATE_ACTIVE)
- xlog_state_switch_iclogs(log, iclog, 0);
- else
- ASSERT(iclog->ic_state == XLOG_STATE_WANT_SYNC ||
- iclog->ic_state == XLOG_STATE_IOERROR);
- /*
- * Ensure the journal is fully flushed and on stable storage once the
- * iclog containing the unmount record is written.
- */
- iclog->ic_flags |= (XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA);
- error = xlog_state_release_iclog(log, iclog);
+ error = xlog_force_iclog(iclog);
xlog_wait_on_iclog(iclog);
if (tic) {
* metadata writeback and causing priority inversions.
*/
iclog->ic_bio.bi_opf = REQ_OP_WRITE | REQ_META | REQ_SYNC | REQ_IDLE;
- if (iclog->ic_flags & XLOG_ICL_NEED_FLUSH)
+ if (iclog->ic_flags & XLOG_ICL_NEED_FLUSH) {
iclog->ic_bio.bi_opf |= REQ_PREFLUSH;
+ /*
+ * For external log devices, we also need to flush the data
+ * device cache first to ensure all metadata writeback covered
+ * by the LSN in this iclog is on stable storage. This is slow,
+ * but it *must* complete before we issue the external log IO.
+ */
+ if (log->l_targ != log->l_mp->m_ddev_targp)
+ blkdev_issue_flush(log->l_mp->m_ddev_targp->bt_bdev);
+ }
if (iclog->ic_flags & XLOG_ICL_NEED_FUA)
iclog->ic_bio.bi_opf |= REQ_FUA;
+
iclog->ic_flags &= ~(XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA);
if (xlog_map_iclog_data(&iclog->ic_bio, iclog->ic_data, count)) {
return 0;
release_iclog:
- error = xlog_state_release_iclog(log, iclog);
+ error = xlog_state_release_iclog(log, iclog, 0);
spin_unlock(&log->l_icloglock);
return error;
}
ASSERT(optype & XLOG_COMMIT_TRANS);
*commit_iclog = iclog;
} else {
- error = xlog_state_release_iclog(log, iclog);
+ error = xlog_state_release_iclog(log, iclog, 0);
}
spin_unlock(&log->l_icloglock);
memset(iclog->ic_header.h_cycle_data, 0,
sizeof(iclog->ic_header.h_cycle_data));
iclog->ic_header.h_lsn = 0;
+ iclog->ic_header.h_tail_lsn = 0;
}
/*
* reference to the iclog.
*/
if (!atomic_add_unless(&iclog->ic_refcnt, -1, 1))
- error = xlog_state_release_iclog(log, iclog);
+ error = xlog_state_release_iclog(log, iclog, 0);
spin_unlock(&log->l_icloglock);
if (error)
return error;
log->l_iclog = iclog->ic_next;
}
+/*
+ * Force the iclog to disk and check if the iclog has been completed before
+ * xlog_force_iclog() returns. This can happen on synchronous (e.g.
+ * pmem) or fast async storage because we drop the icloglock to issue the IO.
+ * If completion has already occurred, tell the caller so that it can avoid an
+ * unnecessary wait on the iclog.
+ */
+static int
+xlog_force_and_check_iclog(
+ struct xlog_in_core *iclog,
+ bool *completed)
+{
+ xfs_lsn_t lsn = be64_to_cpu(iclog->ic_header.h_lsn);
+ int error;
+
+ *completed = false;
+ error = xlog_force_iclog(iclog);
+ if (error)
+ return error;
+
+ /*
+ * If the iclog has already been completed and reused the header LSN
+ * will have been rewritten by completion
+ */
+ if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn)
+ *completed = true;
+ return 0;
+}
+
/*
* Write out all data in the in-core log as of this exact moment in time.
*
{
struct xlog *log = mp->m_log;
struct xlog_in_core *iclog;
- xfs_lsn_t lsn;
XFS_STATS_INC(mp, xs_log_force);
trace_xfs_log_force(mp, 0, _RET_IP_);
iclog = iclog->ic_prev;
} else if (iclog->ic_state == XLOG_STATE_ACTIVE) {
if (atomic_read(&iclog->ic_refcnt) == 0) {
- /*
- * We are the only one with access to this iclog.
- *
- * Flush it out now. There should be a roundoff of zero
- * to show that someone has already taken care of the
- * roundoff from the previous sync.
- */
- atomic_inc(&iclog->ic_refcnt);
- lsn = be64_to_cpu(iclog->ic_header.h_lsn);
- xlog_state_switch_iclogs(log, iclog, 0);
- if (xlog_state_release_iclog(log, iclog))
+ /* We have exclusive access to this iclog. */
+ bool completed;
+
+ if (xlog_force_and_check_iclog(iclog, &completed))
goto out_error;
- if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn)
+ if (completed)
goto out_unlock;
} else {
/*
- * Someone else is writing to this iclog.
- *
- * Use its call to flush out the data. However, the
- * other thread may not force out this LR, so we mark
- * it WANT_SYNC.
+ * Someone else is still writing to this iclog, so we
+ * need to ensure that when they release the iclog it
+ * gets synced immediately as we may be waiting on it.
*/
xlog_state_switch_iclogs(log, iclog, 0);
}
- } else {
- /*
- * If the head iclog is not active nor dirty, we just attach
- * ourselves to the head and go to sleep if necessary.
- */
- ;
}
+ /*
+ * The iclog we are about to wait on may contain the checkpoint pushed
+ * by the above xlog_cil_force() call, but it may not have been pushed
+ * to disk yet. Like the ACTIVE case above, we need to make sure caches
+ * are flushed when this iclog is written.
+ */
+ if (iclog->ic_state == XLOG_STATE_WANT_SYNC)
+ iclog->ic_flags |= XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA;
+
if (flags & XFS_LOG_SYNC)
return xlog_wait_on_iclog(iclog);
out_unlock:
bool already_slept)
{
struct xlog_in_core *iclog;
+ bool completed;
spin_lock(&log->l_icloglock);
iclog = log->l_iclog;
goto out_unlock;
}
- if (iclog->ic_state == XLOG_STATE_ACTIVE) {
+ switch (iclog->ic_state) {
+ case XLOG_STATE_ACTIVE:
/*
* We sleep here if we haven't already slept (e.g. this is the
* first time we've looked at the correct iclog buf) and the
&log->l_icloglock);
return -EAGAIN;
}
- atomic_inc(&iclog->ic_refcnt);
- xlog_state_switch_iclogs(log, iclog, 0);
- if (xlog_state_release_iclog(log, iclog))
+ if (xlog_force_and_check_iclog(iclog, &completed))
goto out_error;
if (log_flushed)
*log_flushed = 1;
+ if (completed)
+ goto out_unlock;
+ break;
+ case XLOG_STATE_WANT_SYNC:
+ /*
+ * This iclog may contain the checkpoint pushed by the
+ * xlog_cil_force_seq() call, but there are other writers still
+ * accessing it so it hasn't been pushed to disk yet. Like the
+ * ACTIVE case above, we need to make sure caches are flushed
+ * when this iclog is written.
+ */
+ iclog->ic_flags |= XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA;
+ break;
+ default:
+ /*
+ * The entire checkpoint was written by the CIL force and is on
+ * its way to disk already. It will be stable when it
+ * completes, so we don't need to manipulate caches here at all.
+ * We just need to wait for completion if necessary.
+ */
+ break;
}
if (flags & XFS_LOG_SYNC)
STATIC void
xlog_verify_tail_lsn(
struct xlog *log,
- struct xlog_in_core *iclog,
- xfs_lsn_t tail_lsn)
+ struct xlog_in_core *iclog)
{
- int blocks;
+ xfs_lsn_t tail_lsn = be64_to_cpu(iclog->ic_header.h_tail_lsn);
+ int blocks;
if (CYCLE_LSN(tail_lsn) == log->l_prev_cycle) {
blocks =
struct xfs_trans_header thdr;
struct xfs_log_iovec lhdr;
struct xfs_log_vec lvhdr = { NULL };
+ xfs_lsn_t preflush_tail_lsn;
xfs_lsn_t commit_lsn;
- xfs_lsn_t push_seq;
+ xfs_csn_t push_seq;
struct bio bio;
DECLARE_COMPLETION_ONSTACK(bdev_flush);
* because we hold the flush lock exclusively. Hence we can now issue
* a cache flush to ensure all the completed metadata in the journal we
* are about to overwrite is on stable storage.
+ *
+ * Because we are issuing this cache flush before we've written the
+ * tail lsn to the iclog, we can have metadata IO completions move the
+ * tail forwards between the completion of this flush and the iclog
+ * being written. In this case, we need to re-issue the cache flush
+ * before the iclog write. To detect whether the log tail moves, sample
+ * the tail LSN *before* we issue the flush.
*/
+ preflush_tail_lsn = atomic64_read(&log->l_tail_lsn);
xfs_flush_bdev_async(&bio, log->l_mp->m_ddev_targp->bt_bdev,
&bdev_flush);
* storage.
*/
commit_iclog->ic_flags |= XLOG_ICL_NEED_FUA;
- xlog_state_release_iclog(log, commit_iclog);
+ xlog_state_release_iclog(log, commit_iclog, preflush_tail_lsn);
spin_unlock(&log->l_icloglock);
return;
{ XLOG_STATE_DIRTY, "XLOG_STATE_DIRTY" }, \
{ XLOG_STATE_IOERROR, "XLOG_STATE_IOERROR" }
+/*
+ * In core log flags
+ */
+#define XLOG_ICL_NEED_FLUSH (1 << 0) /* iclog needs REQ_PREFLUSH */
+#define XLOG_ICL_NEED_FUA (1 << 1) /* iclog needs REQ_FUA */
+
+#define XLOG_ICL_STRINGS \
+ { XLOG_ICL_NEED_FLUSH, "XLOG_ICL_NEED_FLUSH" }, \
+ { XLOG_ICL_NEED_FUA, "XLOG_ICL_NEED_FUA" }
+
/*
* Log ticket flags
#define XLOG_COVER_OPS 5
-#define XLOG_ICL_NEED_FLUSH (1 << 0) /* iclog needs REQ_PREFLUSH */
-#define XLOG_ICL_NEED_FUA (1 << 1) /* iclog needs REQ_FUA */
-
/* Ticket reservation region accounting */
#define XLOG_TIC_LEN_MAX 15
void xfs_log_ticket_ungrant(struct xlog *log, struct xlog_ticket *ticket);
void xfs_log_ticket_regrant(struct xlog *log, struct xlog_ticket *ticket);
-int xlog_state_release_iclog(struct xlog *log, struct xlog_in_core *iclog);
+int xlog_state_release_iclog(struct xlog *log, struct xlog_in_core *iclog,
+ xfs_lsn_t log_tail_lsn);
/*
* When we crack an atomic LSN, we sample it first so that the value will not
__field(uint32_t, state)
__field(int32_t, refcount)
__field(uint32_t, offset)
+ __field(uint32_t, flags)
__field(unsigned long long, lsn)
__field(unsigned long, caller_ip)
),
__entry->state = iclog->ic_state;
__entry->refcount = atomic_read(&iclog->ic_refcnt);
__entry->offset = iclog->ic_offset;
+ __entry->flags = iclog->ic_flags;
__entry->lsn = be64_to_cpu(iclog->ic_header.h_lsn);
__entry->caller_ip = caller_ip;
),
- TP_printk("dev %d:%d state %s refcnt %d offset %u lsn 0x%llx caller %pS",
+ TP_printk("dev %d:%d state %s refcnt %d offset %u lsn 0x%llx flags %s caller %pS",
MAJOR(__entry->dev), MINOR(__entry->dev),
__print_symbolic(__entry->state, XLOG_STATE_STRINGS),
__entry->refcount,
__entry->offset,
__entry->lsn,
+ __print_flags(__entry->flags, "|", XLOG_ICL_STRINGS),
(char *)__entry->caller_ip)
);
* @hrv: Hardware Revision of the device, pass -1 to not check _HRV
*
* The caller is responsible for invoking acpi_dev_put() on the returned device.
- *
- * FIXME: Due to above requirement there is a window that may invalidate @adev
- * and next iteration will use a dangling pointer, e.g. in the case of a
- * hotplug event. That said, the caller should ensure that this will never
- * happen.
*/
#define for_each_acpi_dev_match(adev, hid, uid, hrv) \
for (adev = acpi_dev_get_first_match_dev(hid, uid, hrv); \
static inline void acpi_dev_put(struct acpi_device *adev)
{
- put_device(&adev->dev);
+ if (adev)
+ put_device(&adev->dev);
}
struct acpi_device *acpi_bus_get_acpi_device(acpi_handle handle);
unsigned long arg);
#define DRM_IOCTL_NR(n) _IOC_NR(n)
+#define DRM_IOCTL_TYPE(n) _IOC_TYPE(n)
#define DRM_MAJOR 226
/**
* Maximum number of blkcg policies allowed to be registered concurrently.
* Defined here to simplify include dependency.
*/
-#define BLKCG_MAX_POLS 5
+#define BLKCG_MAX_POLS 6
typedef void (rq_end_io_fn)(struct request *, blk_status_t);
extern struct static_key_false cgroup_bpf_enabled_key[MAX_BPF_ATTACH_TYPE];
#define cgroup_bpf_enabled(type) static_branch_unlikely(&cgroup_bpf_enabled_key[type])
-#define BPF_CGROUP_STORAGE_NEST_MAX 8
-
-struct bpf_cgroup_storage_info {
- struct task_struct *task;
- struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE];
-};
-
-/* For each cpu, permit maximum BPF_CGROUP_STORAGE_NEST_MAX number of tasks
- * to use bpf cgroup storage simultaneously.
- */
-DECLARE_PER_CPU(struct bpf_cgroup_storage_info,
- bpf_cgroup_storage_info[BPF_CGROUP_STORAGE_NEST_MAX]);
-
#define for_each_cgroup_storage_type(stype) \
for (stype = 0; stype < MAX_BPF_CGROUP_STORAGE_TYPE; stype++)
return BPF_CGROUP_STORAGE_SHARED;
}
-static inline int bpf_cgroup_storage_set(struct bpf_cgroup_storage
- *storage[MAX_BPF_CGROUP_STORAGE_TYPE])
-{
- enum bpf_cgroup_storage_type stype;
- int i, err = 0;
-
- preempt_disable();
- for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) {
- if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != NULL))
- continue;
-
- this_cpu_write(bpf_cgroup_storage_info[i].task, current);
- for_each_cgroup_storage_type(stype)
- this_cpu_write(bpf_cgroup_storage_info[i].storage[stype],
- storage[stype]);
- goto out;
- }
- err = -EBUSY;
- WARN_ON_ONCE(1);
-
-out:
- preempt_enable();
- return err;
-}
-
-static inline void bpf_cgroup_storage_unset(void)
-{
- int i;
-
- for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) {
- if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != current))
- continue;
-
- this_cpu_write(bpf_cgroup_storage_info[i].task, NULL);
- return;
- }
-}
-
struct bpf_cgroup_storage *
cgroup_storage_lookup(struct bpf_cgroup_storage_map *map,
void *key, bool locked);
return -EINVAL;
}
-static inline int bpf_cgroup_storage_set(
- struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) { return 0; }
-static inline void bpf_cgroup_storage_unset(void) {}
static inline int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux,
struct bpf_map *map) { return 0; }
static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(
struct bpf_prog *include_prog,
struct bpf_prog_array **new_array);
+struct bpf_run_ctx {};
+
+struct bpf_cg_run_ctx {
+ struct bpf_run_ctx run_ctx;
+ struct bpf_prog_array_item *prog_item;
+};
+
/* BPF program asks to bypass CAP_NET_BIND_SERVICE in bind. */
#define BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE (1 << 0)
/* BPF program asks to set CN on the packet. */
#define BPF_RET_SET_CN (1 << 0)
-/* For BPF_PROG_RUN_ARRAY_FLAGS and __BPF_PROG_RUN_ARRAY,
- * if bpf_cgroup_storage_set() failed, the rest of programs
- * will not execute. This should be a really rare scenario
- * as it requires BPF_CGROUP_STORAGE_NEST_MAX number of
- * preemptions all between bpf_cgroup_storage_set() and
- * bpf_cgroup_storage_unset() on the same cpu.
- */
#define BPF_PROG_RUN_ARRAY_FLAGS(array, ctx, func, ret_flags) \
({ \
struct bpf_prog_array_item *_item; \
struct bpf_prog *_prog; \
struct bpf_prog_array *_array; \
+ struct bpf_run_ctx *old_run_ctx; \
+ struct bpf_cg_run_ctx run_ctx; \
u32 _ret = 1; \
u32 func_ret; \
migrate_disable(); \
rcu_read_lock(); \
_array = rcu_dereference(array); \
_item = &_array->items[0]; \
+ old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); \
while ((_prog = READ_ONCE(_item->prog))) { \
- if (unlikely(bpf_cgroup_storage_set(_item->cgroup_storage))) \
- break; \
+ run_ctx.prog_item = _item; \
func_ret = func(_prog, ctx); \
_ret &= (func_ret & 1); \
- *(ret_flags) |= (func_ret >> 1); \
- bpf_cgroup_storage_unset(); \
+ *(ret_flags) |= (func_ret >> 1); \
_item++; \
} \
+ bpf_reset_run_ctx(old_run_ctx); \
rcu_read_unlock(); \
migrate_enable(); \
_ret; \
struct bpf_prog_array_item *_item; \
struct bpf_prog *_prog; \
struct bpf_prog_array *_array; \
+ struct bpf_run_ctx *old_run_ctx; \
+ struct bpf_cg_run_ctx run_ctx; \
u32 _ret = 1; \
migrate_disable(); \
rcu_read_lock(); \
if (unlikely(check_non_null && !_array))\
goto _out; \
_item = &_array->items[0]; \
- while ((_prog = READ_ONCE(_item->prog))) { \
- if (!set_cg_storage) { \
- _ret &= func(_prog, ctx); \
- } else { \
- if (unlikely(bpf_cgroup_storage_set(_item->cgroup_storage))) \
- break; \
- _ret &= func(_prog, ctx); \
- bpf_cgroup_storage_unset(); \
- } \
+ old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);\
+ while ((_prog = READ_ONCE(_item->prog))) { \
+ run_ctx.prog_item = _item; \
+ _ret &= func(_prog, ctx); \
_item++; \
} \
+ bpf_reset_run_ctx(old_run_ctx); \
_out: \
rcu_read_unlock(); \
migrate_enable(); \
migrate_enable();
}
+static inline struct bpf_run_ctx *bpf_set_run_ctx(struct bpf_run_ctx *new_ctx)
+{
+ struct bpf_run_ctx *old_ctx;
+
+ old_ctx = current->bpf_ctx;
+ current->bpf_ctx = new_ctx;
+ return old_ctx;
+}
+
+static inline void bpf_reset_run_ctx(struct bpf_run_ctx *old_ctx)
+{
+ current->bpf_ctx = old_ctx;
+}
+
extern const struct file_operations bpf_map_fops;
extern const struct file_operations bpf_prog_fops;
extern const struct file_operations bpf_iter_fops;
struct seq_file *seq);
typedef int (*bpf_iter_fill_link_info_t)(const struct bpf_iter_aux_info *aux,
struct bpf_link_info *info);
+typedef const struct bpf_func_proto *
+(*bpf_iter_get_func_proto_t)(enum bpf_func_id func_id,
+ const struct bpf_prog *prog);
enum bpf_iter_feature {
BPF_ITER_RESCHED = BIT(0),
bpf_iter_detach_target_t detach_target;
bpf_iter_show_fdinfo_t show_fdinfo;
bpf_iter_fill_link_info_t fill_link_info;
+ bpf_iter_get_func_proto_t get_func_proto;
u32 ctx_arg_info_size;
u32 feature;
struct bpf_ctx_arg_aux ctx_arg_info[BPF_ITER_CTX_ARG_MAX];
int bpf_iter_reg_target(const struct bpf_iter_reg *reg_info);
void bpf_iter_unreg_target(const struct bpf_iter_reg *reg_info);
bool bpf_iter_prog_supported(struct bpf_prog *prog);
+const struct bpf_func_proto *
+bpf_iter_get_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog);
int bpf_iter_link_attach(const union bpf_attr *attr, bpfptr_t uattr, struct bpf_prog *prog);
int bpf_iter_new_fd(struct bpf_link *link);
bool bpf_link_is_iter(struct bpf_link *link);
extern const struct bpf_func_proto bpf_task_storage_delete_proto;
extern const struct bpf_func_proto bpf_for_each_map_elem_proto;
extern const struct bpf_func_proto bpf_btf_find_by_name_kind_proto;
+extern const struct bpf_func_proto bpf_sk_setsockopt_proto;
+extern const struct bpf_func_proto bpf_sk_getsockopt_proto;
const struct bpf_func_proto *bpf_tracing_func_proto(
enum bpf_func_id func_id, const struct bpf_prog *prog);
BPF_LINK_TYPE(BPF_LINK_TYPE_ITER, iter)
#ifdef CONFIG_NET
BPF_LINK_TYPE(BPF_LINK_TYPE_NETNS, netns)
+BPF_LINK_TYPE(BPF_LINK_TYPE_XDP, xdp)
#endif
};
u64 map_key_state; /* constant (32 bit) key tracking for maps */
int ctx_field_size; /* the ctx field size for load insn, maybe 0 */
- int sanitize_stack_off; /* stack slot to be cleared */
u32 seen; /* this insn was processed by the verifier at env->pass_cnt */
+ bool sanitize_stack_spill; /* subject to Spectre v4 sanitation */
bool zext_dst; /* this insn zero extends dst reg */
u8 alu_state; /* used in combination with alu_limit */
u32 used_map_cnt; /* number of used maps */
u32 used_btf_cnt; /* number of used BTF objects */
u32 id_gen; /* used to generate unique reg IDs */
+ bool explore_alu_limits;
bool allow_ptr_leaks;
bool allow_uninit_stack;
bool allow_ptr_to_map_access;
/* unused opcode to mark call to interpreter with arguments */
#define BPF_CALL_ARGS 0xe0
+/* unused opcode to mark speculation barrier for mitigating
+ * Speculative Store Bypass
+ */
+#define BPF_NOSPEC 0xc0
+
/* As per nm, we expose JITed images as text (code) section for
* kallsyms. That way, tools like perf can find it to match
* addresses.
.off = 0, \
.imm = 0 })
+/* Speculation barrier */
+
+#define BPF_ST_NOSPEC() \
+ ((struct bpf_insn) { \
+ .code = BPF_ST | BPF_NOSPEC, \
+ .dst_reg = 0, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = 0 })
+
/* Internal classic blocks for direct assignment */
#define __BPF_STMT(CODE, K) \
DECLARE_BPF_DISPATCHER(xdp)
+DECLARE_STATIC_KEY_FALSE(bpf_master_redirect_enabled_key);
+
+u32 xdp_master_redirect(struct xdp_buff *xdp);
+
static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog,
struct xdp_buff *xdp)
{
* under local_bh_disable(), which provides the needed RCU protection
* for accessing map entries.
*/
- return __BPF_PROG_RUN(prog, xdp, BPF_DISPATCHER_FUNC(xdp));
+ u32 act = __BPF_PROG_RUN(prog, xdp, BPF_DISPATCHER_FUNC(xdp));
+
+ if (static_branch_unlikely(&bpf_master_redirect_enabled_key)) {
+ if (act == XDP_TX && netif_is_bond_slave(xdp->rxq->dev))
+ act = xdp_master_redirect(xdp);
+ }
+
+ return act;
}
void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog);
extern void put_fs_context(struct fs_context *fc);
extern int vfs_parse_fs_param_source(struct fs_context *fc,
struct fs_parameter *param);
+extern void fc_drop_locked(struct fs_context *fc);
/*
* sget() wrappers to be called from the ->get_tree() op.
void fsl_mc_free_irqs(struct fsl_mc_device *mc_dev);
-struct fsl_mc_device *fsl_mc_get_endpoint(struct fsl_mc_device *mc_dev);
+struct fsl_mc_device *fsl_mc_get_endpoint(struct fsl_mc_device *mc_dev,
+ u16 if_id);
extern struct bus_type fsl_mc_bus_type;
VM_BUG_ON(offset + len > PAGE_SIZE);
memcpy(to + offset, from, len);
+ flush_dcache_page(page);
kunmap_local(to);
}
static inline void memzero_page(struct page *page, size_t offset, size_t len)
{
- char *addr = kmap_atomic(page);
+ char *addr = kmap_local_page(page);
memset(addr + offset, 0, len);
- kunmap_atomic(addr);
+ flush_dcache_page(page);
+ kunmap_local(addr);
}
#endif /* _LINUX_HIGHMEM_H */
}
#endif
-#if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_NET_SWITCHDEV)
-
-int switchdev_bridge_port_offload(struct net_device *brport_dev,
- struct net_device *dev, const void *ctx,
- struct notifier_block *atomic_nb,
- struct notifier_block *blocking_nb,
- bool tx_fwd_offload,
- struct netlink_ext_ack *extack);
-void switchdev_bridge_port_unoffload(struct net_device *brport_dev,
- const void *ctx,
- struct notifier_block *atomic_nb,
- struct notifier_block *blocking_nb);
-
-#else
-
-static inline int
-switchdev_bridge_port_offload(struct net_device *brport_dev,
- struct net_device *dev, const void *ctx,
- struct notifier_block *atomic_nb,
- struct notifier_block *blocking_nb,
- bool tx_fwd_offload,
- struct netlink_ext_ack *extack)
-{
- return -EINVAL;
-}
-
-static inline void
-switchdev_bridge_port_unoffload(struct net_device *brport_dev,
- const void *ctx,
- struct notifier_block *atomic_nb,
- struct notifier_block *blocking_nb)
-{
-}
-#endif
-
#endif
__be32 sl_addr[];
};
-#define IP_SFLSIZE(count) (sizeof(struct ip_sf_socklist) + \
- (count) * sizeof(__be32))
-
#define IP_SFBLOCK 10 /* allocate this many at once */
/* ip_mc_socklist is real list now. Speed is not argument;
/* Get the device * from ishtp device instance */
struct device *ishtp_device(struct ishtp_cl_device *cl_device);
+/* wait for IPC resume */
+bool ishtp_wait_resume(struct ishtp_device *dev);
/* Trace interface for clients */
ishtp_print_log ishtp_trace_callback(struct ishtp_cl_device *cl_device);
/* Get device pointer of PCI device for DMA acces */
*/
#define for_each_mem_range(i, p_start, p_end) \
__for_each_mem_range(i, &memblock.memory, NULL, NUMA_NO_NODE, \
- MEMBLOCK_NONE, p_start, p_end, NULL)
+ MEMBLOCK_HOTPLUG, p_start, p_end, NULL)
/**
* for_each_mem_range_rev - reverse iterate through memblock areas from
*/
#define for_each_mem_range_rev(i, p_start, p_end) \
__for_each_mem_range_rev(i, &memblock.memory, NULL, NUMA_NO_NODE, \
- MEMBLOCK_NONE, p_start, p_end, NULL)
+ MEMBLOCK_HOTPLUG, p_start, p_end, NULL)
/**
* for_each_reserved_mem_range - iterate over all reserved memblock areas
* host and device execution environments match and
* channels are in a DISABLED state.
* @mhi_dev: Device associated with the channels
+ * @flags: MHI channel flags
*/
-int mhi_prepare_for_transfer(struct mhi_device *mhi_dev);
+int mhi_prepare_for_transfer(struct mhi_device *mhi_dev,
+ unsigned int flags);
+
+/* Automatically allocate and queue inbound buffers */
+#define MHI_CH_INBOUND_ALLOC_BUFS BIT(0)
/**
* mhi_unprepare_from_transfer - Reset UL and DL channels for data transfer.
bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev);
bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev);
bool mlx5_lag_is_active(struct mlx5_core_dev *dev);
+bool mlx5_lag_is_master(struct mlx5_core_dev *dev);
+bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev);
struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev);
u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev,
struct net_device *slave);
u64 *values,
int num_counters,
size_t *offsets);
+struct mlx5_core_dev *mlx5_lag_get_peer_mdev(struct mlx5_core_dev *dev);
struct mlx5_uars_page *mlx5_get_uars_page(struct mlx5_core_dev *mdev);
void mlx5_put_uars_page(struct mlx5_core_dev *mdev, struct mlx5_uars_page *up);
int mlx5_dm_sw_icm_alloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type,
REP_LOADED,
};
+enum mlx5_switchdev_event {
+ MLX5_SWITCHDEV_EVENT_PAIR,
+ MLX5_SWITCHDEV_EVENT_UNPAIR,
+};
+
struct mlx5_eswitch_rep;
struct mlx5_eswitch_rep_ops {
int (*load)(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep);
void (*unload)(struct mlx5_eswitch_rep *rep);
void *(*get_proto_dev)(struct mlx5_eswitch_rep *rep);
+ int (*event)(struct mlx5_eswitch *esw,
+ struct mlx5_eswitch_rep *rep,
+ enum mlx5_switchdev_event event,
+ void *data);
};
struct mlx5_eswitch_rep_data {
void *mlx5_eswitch_uplink_get_proto_dev(struct mlx5_eswitch *esw, u8 rep_type);
struct mlx5_flow_handle *
mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *on_esw,
+ struct mlx5_eswitch *from_esw,
struct mlx5_eswitch_rep *rep, u32 sqn);
#ifdef CONFIG_MLX5_ESWITCH
u8 mlx5_eswitch_mode(struct mlx5_core_dev *dev);
u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev);
+struct mlx5_core_dev *mlx5_eswitch_get_core_dev(struct mlx5_eswitch *esw);
#else /* CONFIG_MLX5_ESWITCH */
return 0;
}
+static inline struct mlx5_core_dev *mlx5_eswitch_get_core_dev(struct mlx5_eswitch *esw)
+{
+ return NULL;
+}
+
#endif /* CONFIG_MLX5_ESWITCH */
static inline bool is_mdev_switchdev_mode(struct mlx5_core_dev *dev)
#define MLX5_FS_DEFAULT_FLOW_TAG 0x0
+#define MLX5_SET_CFG(p, f, v) MLX5_SET(create_flow_group_in, p, f, v)
+
enum {
MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO = 1 << 16,
MLX5_FLOW_CONTEXT_ACTION_ENCRYPT = 1 << 17,
u8 max_geneve_tlv_option_data_len[0x5];
u8 reserved_at_570[0x10];
- u8 reserved_at_580[0x33];
+ u8 reserved_at_580[0xb];
+ u8 log_max_dci_stream_channels[0x5];
+ u8 reserved_at_590[0x3];
+ u8 log_max_dci_errored_streams[0x5];
+ u8 reserved_at_598[0x8];
+
+ u8 reserved_at_5a0[0x13];
u8 log_max_dek[0x5];
u8 reserved_at_5b8[0x4];
u8 mini_cqe_resp_stride_index[0x1];
u8 reserved_at_3c0[0x8];
u8 next_send_psn[0x18];
- u8 reserved_at_3e0[0x8];
+ u8 reserved_at_3e0[0x3];
+ u8 log_num_dci_stream_channels[0x5];
u8 cqn_snd[0x18];
- u8 reserved_at_400[0x8];
+ u8 reserved_at_400[0x3];
+ u8 log_num_dci_errored_streams[0x5];
u8 deth_sqpn[0x18];
u8 reserved_at_420[0x20];
u8 status[0x4];
u8 reserved_at_4[0x2];
u8 dbr_umem_valid[0x1];
- u8 apu_thread_cq[0x1];
+ u8 apu_cq[0x1];
u8 cqe_sz[0x3];
u8 cc[0x1];
u8 reserved_at_c[0x1];
u8 cq_period[0xc];
u8 cq_max_count[0x10];
- u8 reserved_at_a0[0x18];
- u8 c_eqn[0x8];
+ u8 c_eqn_or_apu_element[0x20];
u8 reserved_at_c0[0x3];
u8 log_page_size[0x5];
unsigned long pp_magic;
struct page_pool *pp;
unsigned long _pp_mapping_pad;
- /**
- * @dma_addr: might require a 64-bit value on
- * 32-bit architectures.
- */
- unsigned long dma_addr[2];
+ unsigned long dma_addr;
+ union {
+ /**
+ * dma_addr_upper: might require a 64-bit
+ * value on 32-bit architectures.
+ */
+ unsigned long dma_addr_upper;
+ /**
+ * For frag page support, not supported in
+ * 32-bit architectures with 64-bit DMA.
+ */
+ atomic_long_t pp_frag_count;
+ };
};
struct { /* slab, slob and slub */
union {
};
-/*
- * This structure holds boot-time configured netdevice settings. They
- * are then used in the device probing.
- */
-struct netdev_boot_setup {
- char name[IFNAMSIZ];
- struct ifmap map;
-};
-#define NETDEV_BOOT_SETUP_MAX 8
-
-int __init netdev_boot_setup(char *str);
-
struct gro_list {
struct list_head list;
int count;
* that got dropped are freed/returned via xdp_return_frame().
* Returns negative number, means general error invoking ndo, meaning
* no frames were xmit'ed and core-caller will free all frames.
+ * struct net_device *(*ndo_xdp_get_xmit_slave)(struct net_device *dev,
+ * struct xdp_buff *xdp);
+ * Get the xmit slave of master device based on the xdp_buff.
* int (*ndo_xsk_wakeup)(struct net_device *dev, u32 queue_id, u32 flags);
* This function is used to wake up the softirq, ksoftirqd or kthread
* responsible for sending and/or receiving packets on a specific
int (*ndo_xdp_xmit)(struct net_device *dev, int n,
struct xdp_frame **xdp,
u32 flags);
+ struct net_device * (*ndo_xdp_get_xmit_slave)(struct net_device *dev,
+ struct xdp_buff *xdp);
int (*ndo_xsk_wakeup)(struct net_device *dev,
u32 queue_id, u32 flags);
struct devlink_port * (*ndo_get_devlink_port)(struct net_device *dev);
}
int netdev_boot_setup_check(struct net_device *dev);
-unsigned long netdev_boot_base(const char *prefix, int unit);
struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type,
const char *hwaddr);
struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type);
return 0;
}
#endif
+int netif_set_real_num_queues(struct net_device *dev,
+ unsigned int txq, unsigned int rxq);
static inline struct netdev_rx_queue *
__netif_get_rx_queue(struct net_device *dev, unsigned int rxq)
int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
int fd, int expected_fd, u32 flags);
int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
+u8 dev_xdp_prog_count(struct net_device *dev);
u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode);
int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
*/
static inline void dev_put(struct net_device *dev)
{
+ if (dev) {
#ifdef CONFIG_PCPU_DEV_REFCNT
- this_cpu_dec(*dev->pcpu_refcnt);
+ this_cpu_dec(*dev->pcpu_refcnt);
#else
- refcount_dec(&dev->dev_refcnt);
+ refcount_dec(&dev->dev_refcnt);
#endif
+ }
}
/**
*/
static inline void dev_hold(struct net_device *dev)
{
+ if (dev) {
#ifdef CONFIG_PCPU_DEV_REFCNT
- this_cpu_inc(*dev->pcpu_refcnt);
+ this_cpu_inc(*dev->pcpu_refcnt);
#else
- refcount_inc(&dev->dev_refcnt);
+ refcount_inc(&dev->dev_refcnt);
#endif
+ }
}
/* Carrier loss detection, dial on demand. The functions netif_carrier_on
struct bio_list;
struct blk_plug;
struct bpf_local_storage;
+struct bpf_run_ctx;
struct capture_control;
struct cfs_rq;
struct fs_struct;
#ifdef CONFIG_BPF_SYSCALL
/* Used by BPF task local storage */
struct bpf_local_storage __rcu *bpf_storage;
+ /* Used for BPF run context */
+ struct bpf_run_ctx *bpf_ctx;
#endif
#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, gfp_t gfp_mask);
struct sk_buff *skb_realloc_headroom(struct sk_buff *skb,
unsigned int headroom);
+struct sk_buff *skb_expand_head(struct sk_buff *skb, unsigned int headroom);
struct sk_buff *skb_copy_expand(const struct sk_buff *skb, int newheadroom,
int newtailroom, gfp_t priority);
int __must_check skb_to_sgvec_nomark(struct sk_buff *skb, struct scatterlist *sg,
}
#ifdef CONFIG_PAGE_POOL
-static inline void skb_mark_for_recycle(struct sk_buff *skb, struct page *page,
- struct page_pool *pp)
+static inline void skb_mark_for_recycle(struct sk_buff *skb)
{
skb->pp_recycle = 1;
- page_pool_store_mem_info(page, pp);
}
#endif
return rcu_dereference_sk_user_data(sk);
}
+static inline void sk_psock_set_state(struct sk_psock *psock,
+ enum sk_psock_state_bits bit)
+{
+ set_bit(bit, &psock->state);
+}
+
+static inline void sk_psock_clear_state(struct sk_psock *psock,
+ enum sk_psock_state_bits bit)
+{
+ clear_bit(bit, &psock->state);
+}
+
+static inline bool sk_psock_test_state(const struct sk_psock *psock,
+ enum sk_psock_state_bits bit)
+{
+ return test_bit(bit, &psock->state);
+}
+
+static inline void sock_drop(struct sock *sk, struct sk_buff *skb)
+{
+ sk_drops_add(sk, skb);
+ kfree_skb(skb);
+}
+
+static inline void drop_sk_msg(struct sk_psock *psock, struct sk_msg *msg)
+{
+ if (msg->skb)
+ sock_drop(psock->sk, msg->skb);
+ kfree(msg);
+}
+
static inline void sk_psock_queue_msg(struct sk_psock *psock,
struct sk_msg *msg)
{
spin_lock_bh(&psock->ingress_lock);
- list_add_tail(&msg->list, &psock->ingress_msg);
+ if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED))
+ list_add_tail(&msg->list, &psock->ingress_msg);
+ else
+ drop_sk_msg(psock, msg);
spin_unlock_bh(&psock->ingress_lock);
}
psock->psock_update_sk_prot(sk, psock, true);
}
-static inline void sk_psock_set_state(struct sk_psock *psock,
- enum sk_psock_state_bits bit)
-{
- set_bit(bit, &psock->state);
-}
-
-static inline void sk_psock_clear_state(struct sk_psock *psock,
- enum sk_psock_state_bits bit)
-{
- clear_bit(bit, &psock->state);
-}
-
-static inline bool sk_psock_test_state(const struct sk_psock *psock,
- enum sk_psock_state_bits bit)
-{
- return test_bit(bit, &psock->state);
-}
-
static inline struct sk_psock *sk_psock_get(struct sock *sk)
{
struct sk_psock *psock;
struct net_device *wd_probe(int unit);
struct net_device *ne_probe(int unit);
struct net_device *fmv18x_probe(int unit);
-struct net_device *i82596_probe(int unit);
struct net_device *ni65_probe(int unit);
struct net_device *sonic_probe(int unit);
struct net_device *smc_init(int unit);
-struct net_device *atarilance_probe(int unit);
-struct net_device *sun3lance_probe(int unit);
-struct net_device *sun3_82586_probe(int unit);
-struct net_device *apne_probe(int unit);
struct net_device *cs89x0_probe(int unit);
-struct net_device *mvme147lance_probe(int unit);
struct net_device *tc515_probe(int unit);
struct net_device *lance_probe(int unit);
struct net_device *cops_probe(int unit);
-struct net_device *ltpc_probe(void);
/* Fibre Channel adapters */
int iph5526_probe(struct net_device *dev);
-
-/* SBNI adapters */
-int sbni_probe(int unit);
#define TCA_ACT_HW_STATS_ANY (TCA_ACT_HW_STATS_IMMEDIATE | \
TCA_ACT_HW_STATS_DELAYED)
+/* Reserve 16 bits for user-space. See TCA_ACT_FLAGS_NO_PERCPU_STATS. */
+#define TCA_ACT_FLAGS_USER_BITS 16
+#define TCA_ACT_FLAGS_USER_MASK 0xffff
+#define TCA_ACT_FLAGS_POLICE (1U << TCA_ACT_FLAGS_USER_BITS)
+#define TCA_ACT_FLAGS_BIND (1U << (TCA_ACT_FLAGS_USER_BITS + 1))
+#define TCA_ACT_FLAGS_REPLACE (1U << (TCA_ACT_FLAGS_USER_BITS + 2))
+#define TCA_ACT_FLAGS_NO_RTNL (1U << (TCA_ACT_FLAGS_USER_BITS + 3))
+
/* Update lastuse only if needed, to avoid dirtying a cache line.
* We use a temp variable to avoid fetching jiffies twice.
*/
void (*cleanup)(struct tc_action *);
int (*lookup)(struct net *net, struct tc_action **a, u32 index);
int (*init)(struct net *net, struct nlattr *nla,
- struct nlattr *est, struct tc_action **act, int ovr,
- int bind, bool rtnl_held, struct tcf_proto *tp,
+ struct nlattr *est, struct tc_action **act,
+ struct tcf_proto *tp,
u32 flags, struct netlink_ext_ack *extack);
int (*walk)(struct net *, struct sk_buff *,
struct netlink_callback *, int,
int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions,
int nr_actions, struct tcf_result *res);
int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
- struct nlattr *est, char *name, int ovr, int bind,
+ struct nlattr *est,
struct tc_action *actions[], int init_res[], size_t *attr_size,
- bool rtnl_held, struct netlink_ext_ack *extack);
-struct tc_action_ops *tc_action_load_ops(char *name, struct nlattr *nla,
+ u32 flags, struct netlink_ext_ack *extack);
+struct tc_action_ops *tc_action_load_ops(struct nlattr *nla, bool police,
bool rtnl_held,
struct netlink_ext_ack *extack);
struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
struct nlattr *nla, struct nlattr *est,
- char *name, int ovr, int bind,
struct tc_action_ops *a_o, int *init_res,
- bool rtnl_held,
- struct netlink_ext_ack *extack);
+ u32 flags, struct netlink_ext_ack *extack);
int tcf_action_dump(struct sk_buff *skb, struct tc_action *actions[], int bind,
int ref, bool terse);
int tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int, int);
struct socket_wq peer_wq;
wait_queue_entry_t peer_wake;
struct scm_stat scm_stat;
+#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
+ struct sk_buff *oob_skb;
+#endif
};
static inline struct unix_sock *unix_sk(const struct sock *sk)
int (*check_irq)(struct platform_device *pdev);
};
+/* exported from ax88796.c for xsurf100.c */
+extern void ax_NS8390_reinit(struct net_device *dev);
+
#endif /* __NET_AX88796_PLAT_H */
void hci_free_dev(struct hci_dev *hdev);
int hci_register_dev(struct hci_dev *hdev);
void hci_unregister_dev(struct hci_dev *hdev);
+void hci_cleanup_dev(struct hci_dev *hdev);
int hci_suspend_dev(struct hci_dev *hdev);
int hci_resume_dev(struct hci_dev *hdev);
int hci_reset_dev(struct hci_dev *hdev);
int bond_3ad_lacpdu_recv(const struct sk_buff *skb, struct bonding *bond,
struct slave *slave);
int bond_3ad_set_carrier(struct bonding *bond);
+void bond_3ad_update_lacp_active(struct bonding *bond);
void bond_3ad_update_lacp_rate(struct bonding *bond);
void bond_3ad_update_ad_actor_settings(struct bonding *bond);
int bond_3ad_stats_fill(struct sk_buff *skb, struct bond_3ad_stats *stats);
BOND_OPT_AD_USER_PORT_KEY,
BOND_OPT_NUM_PEER_NOTIF_ALIAS,
BOND_OPT_PEER_NOTIF_DELAY,
+ BOND_OPT_LACP_ACTIVE,
BOND_OPT_LAST
};
int updelay;
int downdelay;
int peer_notif_delay;
+ int lacp_active;
int lacp_fast;
unsigned int min_links;
int ad_select;
/* protecting ipsec_list */
spinlock_t ipsec_lock;
#endif /* CONFIG_XFRM_OFFLOAD */
+ struct bpf_prog *xdp_prog;
};
#define bond_slave_get_rcu(dev) \
} __packed;
struct compat_group_filter {
- __u32 gf_interface;
- struct __kernel_sockaddr_storage gf_group
- __aligned(4);
- __u32 gf_fmode;
- __u32 gf_numsrc;
- struct __kernel_sockaddr_storage gf_slist[1]
- __aligned(4);
+ union {
+ struct {
+ __u32 gf_interface_aux;
+ struct __kernel_sockaddr_storage gf_group_aux
+ __aligned(4);
+ __u32 gf_fmode_aux;
+ __u32 gf_numsrc_aux;
+ struct __kernel_sockaddr_storage gf_slist[1]
+ __aligned(4);
+ } __packed;
+ struct {
+ __u32 gf_interface;
+ struct __kernel_sockaddr_storage gf_group
+ __aligned(4);
+ __u32 gf_fmode;
+ __u32 gf_numsrc;
+ struct __kernel_sockaddr_storage gf_slist_flex[]
+ __aligned(4);
+ } __packed;
+ };
} __packed;
#endif /* NET_COMPAT_H */
*
* Note: @extack can be NULL when port notifier queries the port function.
*/
- int (*port_function_hw_addr_get)(struct devlink *devlink, struct devlink_port *port,
- u8 *hw_addr, int *hw_addr_len,
+ int (*port_function_hw_addr_get)(struct devlink_port *port, u8 *hw_addr,
+ int *hw_addr_len,
struct netlink_ext_ack *extack);
/**
* @port_function_hw_addr_set: Port function's hardware address set function.
* by the devlink port. Driver should return -EOPNOTSUPP if it doesn't support port
* function handling for a particular port.
*/
- int (*port_function_hw_addr_set)(struct devlink *devlink, struct devlink_port *port,
+ int (*port_function_hw_addr_set)(struct devlink_port *port,
const u8 *hw_addr, int hw_addr_len,
struct netlink_ext_ack *extack);
/**
*
* Return: 0 on success, negative value otherwise.
*/
- int (*port_fn_state_get)(struct devlink *devlink,
- struct devlink_port *port,
+ int (*port_fn_state_get)(struct devlink_port *port,
enum devlink_port_fn_state *state,
enum devlink_port_fn_opstate *opstate,
struct netlink_ext_ack *extack);
*
* Return: 0 on success, negative value otherwise.
*/
- int (*port_fn_state_set)(struct devlink *devlink,
- struct devlink_port *port,
+ int (*port_fn_state_set)(struct devlink_port *port,
enum devlink_port_fn_state state,
struct netlink_ext_ack *extack);
* Drivers that operate on real HW must use devlink_alloc() instead.
*/
struct devlink *devlink_alloc_ns(const struct devlink_ops *ops,
- size_t priv_size, struct net *net);
+ size_t priv_size, struct net *net,
+ struct device *dev);
static inline struct devlink *devlink_alloc(const struct devlink_ops *ops,
- size_t priv_size)
+ size_t priv_size,
+ struct device *dev)
{
- return devlink_alloc_ns(ops, priv_size, &init_net);
+ return devlink_alloc_ns(ops, priv_size, &init_net, dev);
}
-int devlink_register(struct devlink *devlink, struct device *dev);
+int devlink_register(struct devlink *devlink);
void devlink_unregister(struct devlink *devlink);
void devlink_reload_enable(struct devlink *devlink);
void devlink_reload_disable(struct devlink *devlink);
DSA_TAG_PROTO_SJA1110 = DSA_TAG_PROTO_SJA1110_VALUE,
};
-struct packet_type;
struct dsa_switch;
struct dsa_device_ops {
struct sk_buff *(*xmit)(struct sk_buff *skb, struct net_device *dev);
- struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt);
+ struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev);
void (*flow_dissect)(const struct sk_buff *skb, __be16 *proto,
int *offset);
unsigned int needed_headroom;
/* Copies for faster access in master receive hot path */
struct dsa_switch_tree *dst;
- struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt);
+ struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev);
enum {
DSA_PORT_TYPE_UNUSED = 0,
struct device_node *dn;
unsigned int ageing_time;
bool vlan_filtering;
+ /* Managed by DSA on user ports and by drivers on CPU and DSA ports */
+ bool learning;
u8 stp_state;
struct net_device *bridge_dev;
int bridge_num;
int (*port_bridge_flags)(struct dsa_switch *ds, int port,
struct switchdev_brport_flags flags,
struct netlink_ext_ack *extack);
- int (*port_set_mrouter)(struct dsa_switch *ds, int port, bool mrouter,
- struct netlink_ext_ack *extack);
/*
* VLAN support
}
/**
- * flow_action_has_one_action() - check if exactly one action is present
+ * flow_offload_has_one_action() - check if exactly one action is present
* @action: tc filter flow offload action
*
* Returns true if exactly one action is present.
struct in6_addr sl_addr[];
};
-#define IP6_SFLSIZE(count) (sizeof(struct ip6_sf_socklist) + \
- (count) * sizeof(struct in6_addr))
-
#define IP6_SFBLOCK 10 /* allocate this many at once */
struct ipv6_mc_socklist {
____cacheline_aligned_in_smp;
};
+#define inet_lhash2_for_each_icsk_continue(__icsk) \
+ hlist_for_each_entry_continue(__icsk, icsk_listen_portaddr_node)
+
+#define inet_lhash2_for_each_icsk(__icsk, list) \
+ hlist_for_each_entry(__icsk, list, icsk_listen_portaddr_node)
+
#define inet_lhash2_for_each_icsk_rcu(__icsk, list) \
hlist_for_each_entry_rcu(__icsk, list, icsk_listen_portaddr_node)
static inline unsigned int ip6_skb_dst_mtu(struct sk_buff *skb)
{
- int mtu;
+ unsigned int mtu;
struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
inet6_sk(skb->sk) : NULL;
#include <linux/if_ether.h>
/* Lengths of frame formats */
-#define LLC_PDU_LEN_I 4 /* header and 2 control bytes */
-#define LLC_PDU_LEN_S 4
-#define LLC_PDU_LEN_U 3 /* header and 1 control byte */
+#define LLC_PDU_LEN_I 4 /* header and 2 control bytes */
+#define LLC_PDU_LEN_S 4
+#define LLC_PDU_LEN_U 3 /* header and 1 control byte */
+/* header and 1 control byte and XID info */
+#define LLC_PDU_LEN_U_XID (LLC_PDU_LEN_U + sizeof(struct llc_xid_info))
/* Known SAP addresses */
#define LLC_GLOBAL_SAP 0xFF
#define LLC_NULL_SAP 0x00 /* not network-layer visible */
#define LLC_PDU_TYPE_U_MASK 0x03 /* 8-bit control field */
#define LLC_PDU_TYPE_MASK 0x03
-#define LLC_PDU_TYPE_I 0 /* first bit */
-#define LLC_PDU_TYPE_S 1 /* first two bits */
-#define LLC_PDU_TYPE_U 3 /* first two bits */
+#define LLC_PDU_TYPE_I 0 /* first bit */
+#define LLC_PDU_TYPE_S 1 /* first two bits */
+#define LLC_PDU_TYPE_U 3 /* first two bits */
+#define LLC_PDU_TYPE_U_XID 4 /* private type for detecting XID commands */
#define LLC_PDU_TYPE_IS_I(pdu) \
((!(pdu->ctrl_1 & LLC_PDU_TYPE_I_MASK)) ? 1 : 0)
static inline void llc_pdu_header_init(struct sk_buff *skb, u8 type,
u8 ssap, u8 dsap, u8 cr)
{
- const int hlen = type == LLC_PDU_TYPE_U ? 3 : 4;
+ int hlen = 4; /* default value for I and S types */
struct llc_pdu_un *pdu;
+ switch (type) {
+ case LLC_PDU_TYPE_U:
+ hlen = 3;
+ break;
+ case LLC_PDU_TYPE_U_XID:
+ hlen = 6;
+ break;
+ }
+
skb_push(skb, hlen);
skb_reset_network_header(skb);
pdu = llc_pdu_un_hdr(skb);
xid_info->fmt_id = LLC_XID_FMT_ID; /* 0x81 */
xid_info->type = svcs_supported;
xid_info->rw = rx_window << 1; /* size of receive window */
- skb_put(skb, sizeof(struct llc_xid_info));
+
+ /* no need to push/put since llc_pdu_header_init() has already
+ * pushed 3 + 3 bytes
+ */
}
/**
#endif
spinlock_t xfrm_state_lock;
seqcount_spinlock_t xfrm_state_hash_generation;
+ seqcount_spinlock_t xfrm_policy_hash_generation;
spinlock_t xfrm_policy_lock;
struct mutex xfrm_cfg_mutex;
void nci_unregister_device(struct nci_dev *ndev);
int nci_request(struct nci_dev *ndev,
void (*req)(struct nci_dev *ndev,
- unsigned long opt),
- unsigned long opt, __u32 timeout);
+ const void *opt),
+ const void *opt, __u32 timeout);
int nci_prop_cmd(struct nci_dev *ndev, __u8 oid, size_t len,
const __u8 *payload);
int nci_core_cmd(struct nci_dev *ndev, __u16 opcode, size_t len,
* Please note DMA-sync-for-CPU is still
* device driver responsibility
*/
-#define PP_FLAG_ALL (PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV)
+#define PP_FLAG_PAGE_FRAG BIT(2) /* for page frag feature */
+#define PP_FLAG_ALL (PP_FLAG_DMA_MAP |\
+ PP_FLAG_DMA_SYNC_DEV |\
+ PP_FLAG_PAGE_FRAG)
/*
* Fast allocation side cache array/stack
unsigned long defer_warn;
u32 pages_state_hold_cnt;
+ unsigned int frag_offset;
+ struct page *frag_page;
+ long frag_users;
/*
* Data structure for allocation side
return page_pool_alloc_pages(pool, gfp);
}
+struct page *page_pool_alloc_frag(struct page_pool *pool, unsigned int *offset,
+ unsigned int size, gfp_t gfp);
+
+static inline struct page *page_pool_dev_alloc_frag(struct page_pool *pool,
+ unsigned int *offset,
+ unsigned int size)
+{
+ gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN);
+
+ return page_pool_alloc_frag(pool, offset, size, gfp);
+}
+
/* get the stored dma direction. A driver might decide to treat this locally and
* avoid the extra cache line from page_pool to determine the direction
*/
page_pool_put_full_page(pool, page, true);
}
+#define PAGE_POOL_DMA_USE_PP_FRAG_COUNT \
+ (sizeof(dma_addr_t) > sizeof(unsigned long))
+
static inline dma_addr_t page_pool_get_dma_addr(struct page *page)
{
- dma_addr_t ret = page->dma_addr[0];
- if (sizeof(dma_addr_t) > sizeof(unsigned long))
- ret |= (dma_addr_t)page->dma_addr[1] << 16 << 16;
+ dma_addr_t ret = page->dma_addr;
+
+ if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT)
+ ret |= (dma_addr_t)page->dma_addr_upper << 16 << 16;
+
return ret;
}
static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr)
{
- page->dma_addr[0] = addr;
- if (sizeof(dma_addr_t) > sizeof(unsigned long))
- page->dma_addr[1] = upper_32_bits(addr);
+ page->dma_addr = addr;
+ if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT)
+ page->dma_addr_upper = upper_32_bits(addr);
+}
+
+static inline void page_pool_set_frag_count(struct page *page, long nr)
+{
+ atomic_long_set(&page->pp_frag_count, nr);
+}
+
+static inline long page_pool_atomic_sub_frag_count_return(struct page *page,
+ long nr)
+{
+ long ret;
+
+ /* As suggested by Alexander, atomic_long_read() may cover up the
+ * reference count errors, so avoid calling atomic_long_read() in
+ * the cases of freeing or draining the page_frags, where we would
+ * not expect it to match or that are slowpath anyway.
+ */
+ if (__builtin_constant_p(nr) &&
+ atomic_long_read(&page->pp_frag_count) == nr)
+ return 0;
+
+ ret = atomic_long_sub_return(nr, &page->pp_frag_count);
+ WARN_ON(ret < 0);
+ return ret;
}
static inline bool is_page_pool_compiled_in(void)
spin_unlock_bh(&pool->ring.producer_lock);
}
-/* Store mem_info on struct page and use it while recycling skb frags */
-static inline
-void page_pool_store_mem_info(struct page *page, struct page_pool *pp)
-{
- page->pp = pp;
-}
-
#endif /* _NET_PAGE_POOL_H */
int tcf_exts_validate(struct net *net, struct tcf_proto *tp,
struct nlattr **tb, struct nlattr *rate_tlv,
- struct tcf_exts *exts, bool ovr, bool rtnl_held,
+ struct tcf_exts *exts, u32 flags,
struct netlink_ext_ack *extack);
void tcf_exts_destroy(struct tcf_exts *exts);
void tcf_exts_change(struct tcf_exts *dst, struct tcf_exts *src);
/**
* struct tcf_pkt_info - packet information
+ *
+ * @ptr: start of the pkt data
+ * @nexthdr: offset of the next header
*/
struct tcf_pkt_info {
unsigned char * ptr;
* @ops: the operations lookup table of the corresponding ematch module
* @datalen: length of the ematch specific configuration data
* @data: ematch specific data
+ * @net: the network namespace
*/
struct tcf_ematch {
struct tcf_ematch_ops * ops;
u32 ext_filter_mask);
int (*validate_link_af)(const struct net_device *dev,
- const struct nlattr *attr);
+ const struct nlattr *attr,
+ struct netlink_ext_ack *extack);
int (*set_link_af)(struct net_device *dev,
const struct nlattr *attr,
struct netlink_ext_ack *extack);
int (*change)(struct net *net, struct sk_buff *,
struct tcf_proto*, unsigned long,
u32 handle, struct nlattr **,
- void **, bool, bool,
+ void **, u32,
struct netlink_ext_ack *);
int (*delete)(struct tcf_proto *tp, void *arg,
bool *last, bool rtnl_held,
} cacc;
struct {
+ __u32 last_rtx_chunks;
__u16 pmtu;
__u16 probe_size;
__u16 probe_high;
void sctp_transport_immediate_rtx(struct sctp_transport *);
void sctp_transport_dst_release(struct sctp_transport *t);
void sctp_transport_dst_confirm(struct sctp_transport *t);
-void sctp_transport_pl_send(struct sctp_transport *t);
-void sctp_transport_pl_recv(struct sctp_transport *t);
+bool sctp_transport_pl_send(struct sctp_transport *t);
+bool sctp_transport_pl_recv(struct sctp_transport *t);
/* This is the structure we use to queue packets as they come into
#include <net/tcp_states.h>
#include <linux/net_tstamp.h>
#include <net/l3mdev.h>
+#include <uapi/linux/socket.h>
/*
* This structure really needs to be cleaned up.
#define RCV_SHUTDOWN 1
#define SEND_SHUTDOWN 2
-#define SOCK_SNDBUF_LOCK 1
-#define SOCK_RCVBUF_LOCK 2
#define SOCK_BINDADDR_LOCK 4
#define SOCK_BINDPORT_LOCK 8
typedef int switchdev_obj_dump_cb_t(struct switchdev_obj *obj);
+struct switchdev_brport {
+ struct net_device *dev;
+ const void *ctx;
+ struct notifier_block *atomic_nb;
+ struct notifier_block *blocking_nb;
+ bool tx_fwd_offload;
+};
+
enum switchdev_notifier_type {
SWITCHDEV_FDB_ADD_TO_BRIDGE = 1,
SWITCHDEV_FDB_DEL_TO_BRIDGE,
SWITCHDEV_VXLAN_FDB_ADD_TO_DEVICE,
SWITCHDEV_VXLAN_FDB_DEL_TO_DEVICE,
SWITCHDEV_VXLAN_FDB_OFFLOADED,
+
+ SWITCHDEV_BRPORT_OFFLOADED,
+ SWITCHDEV_BRPORT_UNOFFLOADED,
};
struct switchdev_notifier_info {
bool handled;
};
+struct switchdev_notifier_brport_info {
+ struct switchdev_notifier_info info; /* must be first */
+ const struct switchdev_brport brport;
+};
+
static inline struct net_device *
switchdev_notifier_info_to_dev(const struct switchdev_notifier_info *info)
{
#ifdef CONFIG_NET_SWITCHDEV
+int switchdev_bridge_port_offload(struct net_device *brport_dev,
+ struct net_device *dev, const void *ctx,
+ struct notifier_block *atomic_nb,
+ struct notifier_block *blocking_nb,
+ bool tx_fwd_offload,
+ struct netlink_ext_ack *extack);
+void switchdev_bridge_port_unoffload(struct net_device *brport_dev,
+ const void *ctx,
+ struct notifier_block *atomic_nb,
+ struct notifier_block *blocking_nb);
+
void switchdev_deferred_process(void);
int switchdev_port_attr_set(struct net_device *dev,
const struct switchdev_attr *attr,
struct netlink_ext_ack *extack));
#else
+static inline int
+switchdev_bridge_port_offload(struct net_device *brport_dev,
+ struct net_device *dev, const void *ctx,
+ struct notifier_block *atomic_nb,
+ struct notifier_block *blocking_nb,
+ bool tx_fwd_offload,
+ struct netlink_ext_ack *extack)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline void
+switchdev_bridge_port_unoffload(struct net_device *brport_dev,
+ const void *ctx,
+ struct notifier_block *atomic_nb,
+ struct notifier_block *blocking_nb)
+{
+}
+
static inline void switchdev_deferred_process(void)
{
}
struct seq_net_private p;
enum tcp_seq_states state;
struct sock *syn_wait_sk;
- struct tcp_seq_afinfo *bpf_seq_afinfo;
int bucket, offset, sbucket, num;
loff_t last_pos;
};
/* Do not create a PCM for this DAI link (Backend link) */
unsigned int ignore:1;
+ /* This flag will reorder stop sequence. By enabling this flag
+ * DMA controller stop sequence will be invoked first followed by
+ * CPU DAI driver stop sequence
+ */
+ unsigned int stop_dma_first:1;
+
#ifdef CONFIG_SND_SOC_TOPOLOGY
struct snd_soc_dobj dobj; /* For topology */
#endif
#define SO_NETNS_COOKIE 71
+#define SO_BUF_LOCK 72
+
#if !defined(__KERNEL__)
#if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__))
enum {
J1939_NLA_PAD,
J1939_NLA_BYTES_ACKED,
+ J1939_NLA_TOTAL_SIZE,
+ J1939_NLA_PGN,
+ J1939_NLA_SRC_NAME,
+ J1939_NLA_DEST_NAME,
+ J1939_NLA_SRC_ADDR,
+ J1939_NLA_DEST_ADDR,
};
enum {
J1939_EE_INFO_NONE,
J1939_EE_INFO_TX_ABORT,
+ J1939_EE_INFO_RX_RTS,
+ J1939_EE_INFO_RX_DPO,
+ J1939_EE_INFO_RX_ABORT,
};
struct j1939_filter {
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/* SPDX-License-Identifier: LGPL-2.1 WITH Linux-syscall-note */
/* Copyright(c) 2019 Intel Corporation. All rights rsvd. */
#ifndef _USR_IDXD_H_
#define _USR_IDXD_H_
IFLA_BOND_AD_ACTOR_SYSTEM,
IFLA_BOND_TLB_DYNAMIC_LB,
IFLA_BOND_PEER_NOTIF_DELAY,
+ IFLA_BOND_AD_LACP_ACTIVE,
__IFLA_BOND_MAX,
};
};
struct ip_msfilter {
- __be32 imsf_multiaddr;
- __be32 imsf_interface;
- __u32 imsf_fmode;
- __u32 imsf_numsrc;
- __be32 imsf_slist[1];
+ union {
+ struct {
+ __be32 imsf_multiaddr_aux;
+ __be32 imsf_interface_aux;
+ __u32 imsf_fmode_aux;
+ __u32 imsf_numsrc_aux;
+ __be32 imsf_slist[1];
+ };
+ struct {
+ __be32 imsf_multiaddr;
+ __be32 imsf_interface;
+ __u32 imsf_fmode;
+ __u32 imsf_numsrc;
+ __be32 imsf_slist_flex[];
+ };
+ };
};
#define IP_MSFILTER_SIZE(numsrc) \
};
struct group_filter {
- __u32 gf_interface; /* interface index */
- struct __kernel_sockaddr_storage gf_group; /* multicast address */
- __u32 gf_fmode; /* filter mode */
- __u32 gf_numsrc; /* number of sources */
- struct __kernel_sockaddr_storage gf_slist[1]; /* interface index */
+ union {
+ struct {
+ __u32 gf_interface_aux; /* interface index */
+ struct __kernel_sockaddr_storage gf_group_aux; /* multicast address */
+ __u32 gf_fmode_aux; /* filter mode */
+ __u32 gf_numsrc_aux; /* number of sources */
+ struct __kernel_sockaddr_storage gf_slist[1]; /* interface index */
+ };
+ struct {
+ __u32 gf_interface; /* interface index */
+ struct __kernel_sockaddr_storage gf_group; /* multicast address */
+ __u32 gf_fmode; /* filter mode */
+ __u32 gf_numsrc; /* number of sources */
+ struct __kernel_sockaddr_storage gf_slist_flex[]; /* interface index */
+ };
+ };
};
#define GROUP_FILTER_SIZE(numsrc) \
__TCA_ACT_MAX
};
+/* See other TCA_ACT_FLAGS_ * flags in include/net/act_api.h. */
#define TCA_ACT_FLAGS_NO_PERCPU_STATS 1 /* Don't use percpu allocator for
* actions stats.
*/
};
};
+#define SOCK_SNDBUF_LOCK 1
+#define SOCK_RCVBUF_LOCK 2
+
+#define SOCK_BUF_LOCK_MASK (SOCK_SNDBUF_LOCK | SOCK_RCVBUF_LOCK)
+
#endif /* _UAPI_LINUX_SOCKET_H */
-/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */
+/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB */
/*
* Copyright (c) 2006 - 2021 Intel Corporation. All rights reserved.
* Copyright (c) 2005 Topspin Communications. All rights reserved.
{
ktime_t *calltime = (ktime_t *)data;
- printk(KERN_DEBUG "calling %pS @ %i\n", fn, task_pid_nr(current));
+ printk(KERN_DEBUG "calling %pS @ %i irqs_disabled() %d\n", fn, task_pid_nr(current), irqs_disabled());
*calltime = ktime_get();
}
rettime = ktime_get();
delta = ktime_sub(rettime, *calltime);
duration = (unsigned long long) ktime_to_ns(delta) >> 10;
- printk(KERN_DEBUG "initcall %pS returned %d after %lld usecs\n",
- fn, ret, duration);
+ printk(KERN_DEBUG "initcall %pS returned %d after %lld usecs, irqs_disabled() %d\n",
+ fn, ret, duration, irqs_disabled());
}
static ktime_t initcall_calltime;
return supported;
}
+const struct bpf_func_proto *
+bpf_iter_get_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+{
+ const struct bpf_iter_target_info *tinfo;
+ const struct bpf_func_proto *fn = NULL;
+
+ mutex_lock(&targets_mutex);
+ list_for_each_entry(tinfo, &targets, list) {
+ if (tinfo->btf_id == prog->aux->attach_btf_id) {
+ const struct bpf_iter_reg *reg_info;
+
+ reg_info = tinfo->reg_info;
+ if (reg_info->get_func_proto)
+ fn = reg_info->get_func_proto(func_id, prog);
+ break;
+ }
+ }
+ mutex_unlock(&targets_mutex);
+
+ return fn;
+}
+
static void bpf_iter_link_release(struct bpf_link *link)
{
struct bpf_iter_link *iter_link =
const struct bpf_ctx_arg_aux *ctx_arg_info = &prog->aux->ctx_arg_info[i];
if (ctx_arg_info->offset == off) {
+ if (!ctx_arg_info->btf_id) {
+ bpf_log(log,"invalid btf_id for context argument offset %u\n", off);
+ return false;
+ }
+
info->reg_type = ctx_arg_info->reg_type;
info->btf = btf_vmlinux;
info->btf_id = ctx_arg_info->btf_id;
#include <linux/perf_event.h>
#include <linux/extable.h>
#include <linux/log2.h>
+
+#include <asm/barrier.h>
#include <asm/unaligned.h>
/* Registers */
/* Non-UAPI available opcodes. */
[BPF_JMP | BPF_CALL_ARGS] = &&JMP_CALL_ARGS,
[BPF_JMP | BPF_TAIL_CALL] = &&JMP_TAIL_CALL,
+ [BPF_ST | BPF_NOSPEC] = &&ST_NOSPEC,
[BPF_LDX | BPF_PROBE_MEM | BPF_B] = &&LDX_PROBE_MEM_B,
[BPF_LDX | BPF_PROBE_MEM | BPF_H] = &&LDX_PROBE_MEM_H,
[BPF_LDX | BPF_PROBE_MEM | BPF_W] = &&LDX_PROBE_MEM_W,
if (unlikely(index >= array->map.max_entries))
goto out;
- if (unlikely(tail_call_cnt > MAX_TAIL_CALL_CNT))
+ if (unlikely(tail_call_cnt >= MAX_TAIL_CALL_CNT))
goto out;
tail_call_cnt++;
COND_JMP(s, JSGE, >=)
COND_JMP(s, JSLE, <=)
#undef COND_JMP
- /* STX and ST and LDX*/
+ /* ST, STX and LDX*/
+ ST_NOSPEC:
+ /* Speculation barrier for mitigating Speculative Store Bypass.
+ * In case of arm64, we rely on the firmware mitigation as
+ * controlled via the ssbd kernel parameter. Whenever the
+ * mitigation is enabled, it works for all of the kernel code
+ * with no need to provide any additional instructions here.
+ * In case of x86, we use 'lfence' insn for mitigation. We
+ * reuse preexisting logic from Spectre v1 mitigation that
+ * happens to produce the required code on x86 for v4 as well.
+ */
+#ifdef CONFIG_X86
+ barrier_nospec();
+#endif
+ CONT;
#define LDST(SIZEOP, SIZE) \
STX_MEM_##SIZEOP: \
*(SIZE *)(unsigned long) (DST + insn->off) = SRC; \
return __xdp_enqueue(dev, xdp, dev_rx, dst->xdp_prog);
}
-static bool is_valid_dst(struct bpf_dtab_netdev *obj, struct xdp_buff *xdp,
- int exclude_ifindex)
+static bool is_valid_dst(struct bpf_dtab_netdev *obj, struct xdp_buff *xdp)
{
- if (!obj || obj->dev->ifindex == exclude_ifindex ||
+ if (!obj ||
!obj->dev->netdev_ops->ndo_xdp_xmit)
return false;
return 0;
}
+static inline bool is_ifindex_excluded(int *excluded, int num_excluded, int ifindex)
+{
+ while (num_excluded--) {
+ if (ifindex == excluded[num_excluded])
+ return true;
+ }
+ return false;
+}
+
+/* Get ifindex of each upper device. 'indexes' must be able to hold at
+ * least MAX_NEST_DEV elements.
+ * Returns the number of ifindexes added.
+ */
+static int get_upper_ifindexes(struct net_device *dev, int *indexes)
+{
+ struct net_device *upper;
+ struct list_head *iter;
+ int n = 0;
+
+ netdev_for_each_upper_dev_rcu(dev, upper, iter) {
+ indexes[n++] = upper->ifindex;
+ }
+ return n;
+}
+
int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx,
struct bpf_map *map, bool exclude_ingress)
{
struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
- int exclude_ifindex = exclude_ingress ? dev_rx->ifindex : 0;
struct bpf_dtab_netdev *dst, *last_dst = NULL;
+ int excluded_devices[1+MAX_NEST_DEV];
struct hlist_head *head;
struct xdp_frame *xdpf;
+ int num_excluded = 0;
unsigned int i;
int err;
+ if (exclude_ingress) {
+ num_excluded = get_upper_ifindexes(dev_rx, excluded_devices);
+ excluded_devices[num_excluded++] = dev_rx->ifindex;
+ }
+
xdpf = xdp_convert_buff_to_frame(xdp);
if (unlikely(!xdpf))
return -EOVERFLOW;
for (i = 0; i < map->max_entries; i++) {
dst = rcu_dereference_check(dtab->netdev_map[i],
rcu_read_lock_bh_held());
- if (!is_valid_dst(dst, xdp, exclude_ifindex))
+ if (!is_valid_dst(dst, xdp))
+ continue;
+
+ if (is_ifindex_excluded(excluded_devices, num_excluded, dst->dev->ifindex))
continue;
/* we only need n-1 clones; last_dst enqueued below */
head = dev_map_index_hash(dtab, i);
hlist_for_each_entry_rcu(dst, head, index_hlist,
lockdep_is_held(&dtab->index_lock)) {
- if (!is_valid_dst(dst, xdp, exclude_ifindex))
+ if (!is_valid_dst(dst, xdp))
+ continue;
+
+ if (is_ifindex_excluded(excluded_devices, num_excluded,
+ dst->dev->ifindex))
continue;
/* we only need n-1 clones; last_dst enqueued below */
bool exclude_ingress)
{
struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
- int exclude_ifindex = exclude_ingress ? dev->ifindex : 0;
struct bpf_dtab_netdev *dst, *last_dst = NULL;
+ int excluded_devices[1+MAX_NEST_DEV];
struct hlist_head *head;
struct hlist_node *next;
+ int num_excluded = 0;
unsigned int i;
int err;
+ if (exclude_ingress) {
+ num_excluded = get_upper_ifindexes(dev, excluded_devices);
+ excluded_devices[num_excluded++] = dev->ifindex;
+ }
+
if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
for (i = 0; i < map->max_entries; i++) {
dst = rcu_dereference_check(dtab->netdev_map[i],
rcu_read_lock_bh_held());
- if (!dst || dst->dev->ifindex == exclude_ifindex)
+ if (!dst)
+ continue;
+
+ if (is_ifindex_excluded(excluded_devices, num_excluded, dst->dev->ifindex))
continue;
/* we only need n-1 clones; last_dst enqueued below */
return err;
last_dst = dst;
+
}
} else { /* BPF_MAP_TYPE_DEVMAP_HASH */
for (i = 0; i < dtab->n_buckets; i++) {
head = dev_map_index_hash(dtab, i);
hlist_for_each_entry_safe(dst, next, head, index_hlist) {
- if (!dst || dst->dev->ifindex == exclude_ifindex)
+ if (!dst)
+ continue;
+
+ if (is_ifindex_excluded(excluded_devices, num_excluded,
+ dst->dev->ifindex))
continue;
/* we only need n-1 clones; last_dst enqueued below */
verbose(cbs->private_data, "BUG_%02x\n", insn->code);
}
} else if (class == BPF_ST) {
- if (BPF_MODE(insn->code) != BPF_MEM) {
+ if (BPF_MODE(insn->code) == BPF_MEM) {
+ verbose(cbs->private_data, "(%02x) *(%s *)(r%d %+d) = %d\n",
+ insn->code,
+ bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
+ insn->dst_reg,
+ insn->off, insn->imm);
+ } else if (BPF_MODE(insn->code) == 0xc0 /* BPF_NOSPEC, no UAPI */) {
+ verbose(cbs->private_data, "(%02x) nospec\n", insn->code);
+ } else {
verbose(cbs->private_data, "BUG_st_%02x\n", insn->code);
- return;
}
- verbose(cbs->private_data, "(%02x) *(%s *)(r%d %+d) = %d\n",
- insn->code,
- bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
- insn->dst_reg,
- insn->off, insn->imm);
} else if (class == BPF_LDX) {
if (BPF_MODE(insn->code) != BPF_MEM) {
verbose(cbs->private_data, "BUG_ldx_%02x\n", insn->code);
};
#ifdef CONFIG_CGROUP_BPF
-DECLARE_PER_CPU(struct bpf_cgroup_storage_info,
- bpf_cgroup_storage_info[BPF_CGROUP_STORAGE_NEST_MAX]);
BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags)
{
* verifier checks that its value is correct.
*/
enum bpf_cgroup_storage_type stype = cgroup_storage_type(map);
- struct bpf_cgroup_storage *storage = NULL;
+ struct bpf_cgroup_storage *storage;
+ struct bpf_cg_run_ctx *ctx;
void *ptr;
- int i;
- for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) {
- if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != current))
- continue;
-
- storage = this_cpu_read(bpf_cgroup_storage_info[i].storage[stype]);
- break;
- }
+ /* get current cgroup storage from BPF run context */
+ ctx = container_of(current->bpf_ctx, struct bpf_cg_run_ctx, run_ctx);
+ storage = ctx->prog_item->cgroup_storage[stype];
if (stype == BPF_CGROUP_STORAGE_SHARED)
ptr = &READ_ONCE(storage->buf)->data[0];
//SPDX-License-Identifier: GPL-2.0
#include <linux/bpf-cgroup.h>
#include <linux/bpf.h>
+#include <linux/bpf_local_storage.h>
#include <linux/btf.h>
#include <linux/bug.h>
#include <linux/filter.h>
#ifdef CONFIG_CGROUP_BPF
-DEFINE_PER_CPU(struct bpf_cgroup_storage_info,
- bpf_cgroup_storage_info[BPF_CGROUP_STORAGE_NEST_MAX]);
-
#include "../cgroup/cgroup-internal.h"
#define LOCAL_STORAGE_CREATE_FLAG_MASK \
static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr)
{
+ __u32 max_value_size = BPF_LOCAL_STORAGE_MAX_VALUE_SIZE;
int numa_node = bpf_map_attr_numa_node(attr);
struct bpf_cgroup_storage_map *map;
+ /* percpu is bound by PCPU_MIN_UNIT_SIZE, non-percu
+ * is the same as other local storages.
+ */
+ if (attr->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
+ max_value_size = min_t(__u32, max_value_size,
+ PCPU_MIN_UNIT_SIZE);
+
if (attr->key_size != sizeof(struct bpf_cgroup_storage_key) &&
attr->key_size != sizeof(__u64))
return ERR_PTR(-EINVAL);
if (attr->value_size == 0)
return ERR_PTR(-EINVAL);
- if (attr->value_size > PAGE_SIZE)
+ if (attr->value_size > max_value_size)
return ERR_PTR(-E2BIG);
if (attr->map_flags & ~LOCAL_STORAGE_CREATE_FLAG_MASK ||
static void cgroup_storage_seq_show_elem(struct bpf_map *map, void *key,
struct seq_file *m)
{
- enum bpf_cgroup_storage_type stype = cgroup_storage_type(map);
+ enum bpf_cgroup_storage_type stype;
struct bpf_cgroup_storage *storage;
int cpu;
cur = env->cur_state->frame[env->cur_state->curframe];
if (value_regno >= 0)
reg = &cur->regs[value_regno];
+ if (!env->bypass_spec_v4) {
+ bool sanitize = reg && is_spillable_regtype(reg->type);
+
+ for (i = 0; i < size; i++) {
+ if (state->stack[spi].slot_type[i] == STACK_INVALID) {
+ sanitize = true;
+ break;
+ }
+ }
+
+ if (sanitize)
+ env->insn_aux_data[insn_idx].sanitize_stack_spill = true;
+ }
if (reg && size == BPF_REG_SIZE && register_is_bounded(reg) &&
!register_is_null(reg) && env->bpf_capable) {
verbose(env, "invalid size of register spill\n");
return -EACCES;
}
-
if (state != cur && reg->type == PTR_TO_STACK) {
verbose(env, "cannot spill pointers to stack into stack frame of the caller\n");
return -EINVAL;
}
-
- if (!env->bypass_spec_v4) {
- bool sanitize = false;
-
- if (state->stack[spi].slot_type[0] == STACK_SPILL &&
- register_is_const(&state->stack[spi].spilled_ptr))
- sanitize = true;
- for (i = 0; i < BPF_REG_SIZE; i++)
- if (state->stack[spi].slot_type[i] == STACK_MISC) {
- sanitize = true;
- break;
- }
- if (sanitize) {
- int *poff = &env->insn_aux_data[insn_idx].sanitize_stack_off;
- int soff = (-spi - 1) * BPF_REG_SIZE;
-
- /* detected reuse of integer stack slot with a pointer
- * which means either llvm is reusing stack slot or
- * an attacker is trying to exploit CVE-2018-3639
- * (speculative store bypass)
- * Have to sanitize that slot with preemptive
- * store of zero.
- */
- if (*poff && *poff != soff) {
- /* disallow programs where single insn stores
- * into two different stack slots, since verifier
- * cannot sanitize them
- */
- verbose(env,
- "insn %d cannot access two stack slots fp%d and fp%d",
- insn_idx, *poff, soff);
- return -EINVAL;
- }
- *poff = soff;
- }
- }
save_register_state(state, spi, reg);
} else {
u8 type = STACK_MISC;
alu_state |= off_is_imm ? BPF_ALU_IMMEDIATE : 0;
alu_state |= ptr_is_dst_reg ?
BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST;
+
+ /* Limit pruning on unknown scalars to enable deep search for
+ * potential masking differences from other program paths.
+ */
+ if (!off_is_imm)
+ env->explore_alu_limits = true;
}
err = update_alu_sanitation_state(aux, alu_state, alu_limit);
}
/* Returns true if (rold safe implies rcur safe) */
-static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
- struct bpf_id_pair *idmap)
+static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
+ struct bpf_reg_state *rcur, struct bpf_id_pair *idmap)
{
bool equal;
return false;
switch (rold->type) {
case SCALAR_VALUE:
+ if (env->explore_alu_limits)
+ return false;
if (rcur->type == SCALAR_VALUE) {
if (!rold->precise && !rcur->precise)
return true;
return false;
}
-static bool stacksafe(struct bpf_func_state *old,
- struct bpf_func_state *cur,
- struct bpf_id_pair *idmap)
+static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
+ struct bpf_func_state *cur, struct bpf_id_pair *idmap)
{
int i, spi;
continue;
if (old->stack[spi].slot_type[0] != STACK_SPILL)
continue;
- if (!regsafe(&old->stack[spi].spilled_ptr,
- &cur->stack[spi].spilled_ptr,
- idmap))
+ if (!regsafe(env, &old->stack[spi].spilled_ptr,
+ &cur->stack[spi].spilled_ptr, idmap))
/* when explored and current stack slot are both storing
* spilled registers, check that stored pointers types
* are the same as well.
memset(env->idmap_scratch, 0, sizeof(env->idmap_scratch));
for (i = 0; i < MAX_BPF_REG; i++)
- if (!regsafe(&old->regs[i], &cur->regs[i], env->idmap_scratch))
+ if (!regsafe(env, &old->regs[i], &cur->regs[i],
+ env->idmap_scratch))
return false;
- if (!stacksafe(old, cur, env->idmap_scratch))
+ if (!stacksafe(env, old, cur, env->idmap_scratch))
return false;
if (!refsafe(old, cur))
for (i = 0; i < insn_cnt; i++, insn++) {
bpf_convert_ctx_access_t convert_ctx_access;
+ bool ctx_access;
if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) ||
insn->code == (BPF_LDX | BPF_MEM | BPF_H) ||
insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
- insn->code == (BPF_LDX | BPF_MEM | BPF_DW))
+ insn->code == (BPF_LDX | BPF_MEM | BPF_DW)) {
type = BPF_READ;
- else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) ||
- insn->code == (BPF_STX | BPF_MEM | BPF_H) ||
- insn->code == (BPF_STX | BPF_MEM | BPF_W) ||
- insn->code == (BPF_STX | BPF_MEM | BPF_DW))
+ ctx_access = true;
+ } else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) ||
+ insn->code == (BPF_STX | BPF_MEM | BPF_H) ||
+ insn->code == (BPF_STX | BPF_MEM | BPF_W) ||
+ insn->code == (BPF_STX | BPF_MEM | BPF_DW) ||
+ insn->code == (BPF_ST | BPF_MEM | BPF_B) ||
+ insn->code == (BPF_ST | BPF_MEM | BPF_H) ||
+ insn->code == (BPF_ST | BPF_MEM | BPF_W) ||
+ insn->code == (BPF_ST | BPF_MEM | BPF_DW)) {
type = BPF_WRITE;
- else
+ ctx_access = BPF_CLASS(insn->code) == BPF_STX;
+ } else {
continue;
+ }
if (type == BPF_WRITE &&
- env->insn_aux_data[i + delta].sanitize_stack_off) {
+ env->insn_aux_data[i + delta].sanitize_stack_spill) {
struct bpf_insn patch[] = {
- /* Sanitize suspicious stack slot with zero.
- * There are no memory dependencies for this store,
- * since it's only using frame pointer and immediate
- * constant of zero
- */
- BPF_ST_MEM(BPF_DW, BPF_REG_FP,
- env->insn_aux_data[i + delta].sanitize_stack_off,
- 0),
- /* the original STX instruction will immediately
- * overwrite the same stack slot with appropriate value
- */
*insn,
+ BPF_ST_NOSPEC(),
};
cnt = ARRAY_SIZE(patch);
continue;
}
+ if (!ctx_access)
+ continue;
+
switch (env->insn_aux_data[i + delta].ptr_type) {
case PTR_TO_CTX:
if (!ops->convert_ctx_access)
}
}
-/* The verifier is using insn_aux_data[] to store temporary data during
- * verification and to store information for passes that run after the
- * verification like dead code sanitization. do_check_common() for subprogram N
- * may analyze many other subprograms. sanitize_insn_aux_data() clears all
- * temporary data after do_check_common() finds that subprogram N cannot be
- * verified independently. pass_cnt counts the number of times
- * do_check_common() was run and insn->aux->seen tells the pass number
- * insn_aux_data was touched. These variables are compared to clear temporary
- * data from failed pass. For testing and experiments do_check_common() can be
- * run multiple times even when prior attempt to verify is unsuccessful.
- *
- * Note that special handling is needed on !env->bypass_spec_v1 if this is
- * ever called outside of error path with subsequent program rejection.
- */
-static void sanitize_insn_aux_data(struct bpf_verifier_env *env)
-{
- struct bpf_insn *insn = env->prog->insnsi;
- struct bpf_insn_aux_data *aux;
- int i, class;
-
- for (i = 0; i < env->prog->len; i++) {
- class = BPF_CLASS(insn[i].code);
- if (class != BPF_LDX && class != BPF_STX)
- continue;
- aux = &env->insn_aux_data[i];
- if (aux->seen != env->pass_cnt)
- continue;
- memset(aux, 0, offsetof(typeof(*aux), orig_idx));
- }
-}
-
static int do_check_common(struct bpf_verifier_env *env, int subprog)
{
bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
if (!ret && pop_log)
bpf_vlog_reset(&env->log, 0);
free_states(env);
- if (ret)
- /* clean aux data in case subprog was rejected */
- sanitize_insn_aux_data(env);
return ret;
}
ret = cgroup_do_get_tree(fc);
if (!ret && percpu_ref_is_dying(&ctx->root->cgrp.self.refcnt)) {
- struct super_block *sb = fc->root->d_sb;
- dput(fc->root);
- deactivate_locked_super(sb);
+ fc_drop_locked(fc);
ret = 1;
}
*/
#include <linux/dma-map-ops.h>
+static struct page *dma_common_vaddr_to_page(void *cpu_addr)
+{
+ if (is_vmalloc_addr(cpu_addr))
+ return vmalloc_to_page(cpu_addr);
+ return virt_to_page(cpu_addr);
+}
+
/*
* Create scatter-list for the already allocated DMA buffer.
*/
void *cpu_addr, dma_addr_t dma_addr, size_t size,
unsigned long attrs)
{
- struct page *page = virt_to_page(cpu_addr);
+ struct page *page = dma_common_vaddr_to_page(cpu_addr);
int ret;
ret = sg_alloc_table(sgt, 1, GFP_KERNEL);
unsigned long user_count = vma_pages(vma);
unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT;
unsigned long off = vma->vm_pgoff;
+ struct page *page = dma_common_vaddr_to_page(cpu_addr);
int ret = -ENXIO;
vma->vm_page_prot = dma_pgprot(dev, vma->vm_page_prot, attrs);
return -ENXIO;
return remap_pfn_range(vma, vma->vm_start,
- page_to_pfn(virt_to_page(cpu_addr)) + vma->vm_pgoff,
+ page_to_pfn(page) + vma->vm_pgoff,
user_count << PAGE_SHIFT, vma->vm_page_prot);
#else
return -ENXIO;
#endif
#ifdef CONFIG_BPF_SYSCALL
RCU_INIT_POINTER(p->bpf_storage, NULL);
+ p->bpf_ctx = NULL;
#endif
/* Perform scheduler related setup. Assign this task to a CPU. */
*
* Creates the thread if it does not exist.
*/
-static inline void idle_init(unsigned int cpu)
+static __always_inline void idle_init(unsigned int cpu)
{
struct task_struct *tsk = per_cpu(idle_threads, cpu);
if (!p)
goto out;
+ /* Protect timer list r/w in arm_timer() */
+ sighand = lock_task_sighand(p, &flags);
+ if (unlikely(sighand == NULL))
+ goto out;
+
/*
* Fetch the current sample and update the timer's expiry time.
*/
bump_cpu_timer(timer, now);
- /* Protect timer list r/w in arm_timer() */
- sighand = lock_task_sighand(p, &flags);
- if (unlikely(sighand == NULL))
- goto out;
-
/*
* Now re-arm for the new expiry time.
*/
unsigned int cpu;
bool next_expiry_recalc;
bool is_idle;
+ bool timers_pending;
DECLARE_BITMAP(pending_map, WHEEL_SIZE);
struct hlist_head vectors[WHEEL_SIZE];
} ____cacheline_aligned;
* can reevaluate the wheel:
*/
base->next_expiry = bucket_expiry;
+ base->timers_pending = true;
base->next_expiry_recalc = false;
trigger_dyntick_cpu(base, timer);
}
}
base->next_expiry_recalc = false;
+ base->timers_pending = !(next == base->clk + NEXT_TIMER_MAX_DELTA);
return next;
}
struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
u64 expires = KTIME_MAX;
unsigned long nextevt;
- bool is_max_delta;
/*
* Pretend that there is no timer pending if the cpu is offline.
if (base->next_expiry_recalc)
base->next_expiry = __next_timer_interrupt(base);
nextevt = base->next_expiry;
- is_max_delta = (nextevt == base->clk + NEXT_TIMER_MAX_DELTA);
/*
* We have a fresh next event. Check whether we can forward the
expires = basem;
base->is_idle = false;
} else {
- if (!is_max_delta)
+ if (base->timers_pending)
expires = basem + (u64)(nextevt - basej) * TICK_NSEC;
/*
* If we expect to sleep more than a tick, mark the base idle.
base = per_cpu_ptr(&timer_bases[b], cpu);
base->clk = jiffies;
base->next_expiry = base->clk + NEXT_TIMER_MAX_DELTA;
+ base->timers_pending = false;
base->is_idle = false;
}
return 0;
{
struct kprobe *kp = kprobe_running();
- return kp ? (u64) kp->addr : 0;
+ return kp ? (uintptr_t)kp->addr : 0;
}
static const struct bpf_func_proto bpf_get_func_ip_proto_kprobe = {
const struct bpf_func_proto *
tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
+ const struct bpf_func_proto *fn;
+
switch (func_id) {
#ifdef CONFIG_NET
case BPF_FUNC_skb_output:
case BPF_FUNC_d_path:
return &bpf_d_path_proto;
default:
- return raw_tp_prog_func_proto(func_id, prog);
+ fn = raw_tp_prog_func_proto(func_id, prog);
+ if (!fn && prog->expected_attach_type == BPF_TRACE_ITER)
+ fn = bpf_iter_get_func_proto(func_id, prog);
+ return fn;
}
}
* infrastructure to do the synchronization, thus we must do it
* ourselves.
*/
- synchronize_rcu_tasks_rude();
+ if (old_hash != EMPTY_HASH)
+ synchronize_rcu_tasks_rude();
free_ftrace_hash(old_hash);
}
*/
int register_ftrace_function(struct ftrace_ops *ops)
{
- int ret = -1;
+ int ret;
ftrace_ops_init(ops);
if (unlikely(!head))
return true;
- return reader->read == rb_page_commit(reader) &&
- (commit == reader ||
- (commit == head &&
- head->read == rb_page_commit(commit)));
+ /* Reader should exhaust content in reader page */
+ if (reader->read != rb_page_commit(reader))
+ return false;
+
+ /*
+ * If writers are committing on the reader page, knowing all
+ * committed content has been read, the ring buffer is empty.
+ */
+ if (commit == reader)
+ return true;
+
+ /*
+ * If writers are committing on a page other than reader page
+ * and head page, there should always be content to read.
+ */
+ if (commit != head)
+ return false;
+
+ /*
+ * Writers are committing on the head page, we just need
+ * to care about there're committed data, and the reader will
+ * swap reader page with head page when it is to read data.
+ */
+ return rb_page_commit(commit) == 0;
}
/**
"\t [:name=histname1]\n"
"\t [:<handler>.<action>]\n"
"\t [if <filter>]\n\n"
+ "\t Note, special fields can be used as well:\n"
+ "\t common_timestamp - to record current timestamp\n"
+ "\t common_cpu - to record the CPU the event happened on\n"
+ "\n"
"\t When a matching event is hit, an entry is added to a hash\n"
"\t table using the key(s) and value(s) named, and the value of a\n"
"\t sum called 'hitcount' is incremented. Keys and values\n"
return -EINVAL;
ret = event_trace_add_tracer(tr->dir, tr);
- if (ret)
+ if (ret) {
tracefs_remove(tr->dir);
+ return ret;
+ }
init_tracer_tracefs(tr, tr->dir);
__update_tracer_options(tr);
C(INVALID_SORT_MODIFIER,"Invalid sort modifier"), \
C(EMPTY_SORT_FIELD, "Empty sort field"), \
C(TOO_MANY_SORT_FIELDS, "Too many sort fields (Max = 2)"), \
- C(INVALID_SORT_FIELD, "Sort field must be a key or a val"),
+ C(INVALID_SORT_FIELD, "Sort field must be a key or a val"), \
+ C(INVALID_STR_OPERAND, "String type can not be an operand in expression"),
#undef C
#define C(a, b) HIST_ERR_##a
field->flags & HIST_FIELD_FL_ALIAS)
field_name = hist_field_name(field->operands[0], ++level);
else if (field->flags & HIST_FIELD_FL_CPU)
- field_name = "cpu";
+ field_name = "common_cpu";
else if (field->flags & HIST_FIELD_FL_EXPR ||
field->flags & HIST_FIELD_FL_VAR_REF) {
if (field->system) {
hist_data->enable_timestamps = true;
if (*flags & HIST_FIELD_FL_TIMESTAMP_USECS)
hist_data->attrs->ts_in_usecs = true;
- } else if (strcmp(field_name, "cpu") == 0)
+ } else if (strcmp(field_name, "common_cpu") == 0)
*flags |= HIST_FIELD_FL_CPU;
else {
field = trace_find_event_field(file->event_call, field_name);
if (!field || !field->size) {
- hist_err(tr, HIST_ERR_FIELD_NOT_FOUND, errpos(field_name));
- field = ERR_PTR(-EINVAL);
- goto out;
+ /*
+ * For backward compatibility, if field_name
+ * was "cpu", then we treat this the same as
+ * common_cpu.
+ */
+ if (strcmp(field_name, "cpu") == 0) {
+ *flags |= HIST_FIELD_FL_CPU;
+ } else {
+ hist_err(tr, HIST_ERR_FIELD_NOT_FOUND,
+ errpos(field_name));
+ field = ERR_PTR(-EINVAL);
+ goto out;
+ }
}
}
out:
ret = PTR_ERR(operand1);
goto free;
}
+ if (operand1->flags & HIST_FIELD_FL_STRING) {
+ /* String type can not be the operand of unary operator. */
+ hist_err(file->tr, HIST_ERR_INVALID_STR_OPERAND, errpos(str));
+ destroy_hist_field(operand1, 0);
+ ret = -EINVAL;
+ goto free;
+ }
expr->flags |= operand1->flags &
(HIST_FIELD_FL_TIMESTAMP | HIST_FIELD_FL_TIMESTAMP_USECS);
operand1 = NULL;
goto free;
}
+ if (operand1->flags & HIST_FIELD_FL_STRING) {
+ hist_err(file->tr, HIST_ERR_INVALID_STR_OPERAND, errpos(operand1_str));
+ ret = -EINVAL;
+ goto free;
+ }
/* rest of string could be another expression e.g. b+c in a+b+c */
operand_flags = 0;
operand2 = NULL;
goto free;
}
+ if (operand2->flags & HIST_FIELD_FL_STRING) {
+ hist_err(file->tr, HIST_ERR_INVALID_STR_OPERAND, errpos(str));
+ ret = -EINVAL;
+ goto free;
+ }
ret = check_expr_operands(file->tr, operand1, operand2);
if (ret)
expr->operands[0] = operand1;
expr->operands[1] = operand2;
+
+ /* The operand sizes should be the same, so just pick one */
+ expr->size = operand1->size;
+
expr->operator = field_op;
expr->name = expr_str(expr, 0);
expr->type = kstrdup(operand1->type, GFP_KERNEL);
seq_printf(m, "%s=", hist_field->var.name);
if (hist_field->flags & HIST_FIELD_FL_CPU)
- seq_puts(m, "cpu");
+ seq_puts(m, "common_cpu");
else if (field_name) {
if (hist_field->flags & HIST_FIELD_FL_VAR_REF ||
hist_field->flags & HIST_FIELD_FL_ALIAS)
dyn_event_init(&event->devent, &synth_event_ops);
for (i = 0, j = 0; i < n_fields; i++) {
+ fields[i]->field_pos = i;
event->fields[i] = fields[i];
- if (fields[i]->is_dynamic) {
- event->dynamic_fields[j] = fields[i];
- event->dynamic_fields[j]->field_pos = i;
+ if (fields[i]->is_dynamic)
event->dynamic_fields[j++] = fields[i];
- event->n_dynamic_fields++;
- }
}
+ event->n_dynamic_fields = j;
event->n_fields = n_fields;
out:
return event;
get_online_cpus();
cpumask_and(current_mask, cpu_online_mask, tr->tracing_cpumask);
- next_cpu = cpumask_next(smp_processor_id(), current_mask);
+ next_cpu = cpumask_next(raw_smp_processor_id(), current_mask);
put_online_cpus();
if (next_cpu >= nr_cpu_ids)
char *name;
size_t size;
unsigned int offset;
+ unsigned int field_pos;
bool is_signed;
bool is_string;
bool is_dynamic;
- bool field_pos;
};
struct synth_event {
* a pointer to it. This array is referenced by __DO_TRACE from
* include/linux/tracepoint.h using rcu_dereference_sched().
*/
- rcu_assign_pointer(tp->funcs, tp_funcs);
tracepoint_update_call(tp, tp_funcs, false);
+ rcu_assign_pointer(tp->funcs, tp_funcs);
static_key_enable(&tp->key);
release_probes(old);
{
struct hlist_head *hashent = ucounts_hashentry(ns, uid);
struct ucounts *ucounts, *new;
+ long overflow;
spin_lock_irq(&ucounts_lock);
ucounts = find_ucounts(ns, uid, hashent);
return new;
}
}
+ overflow = atomic_add_negative(1, &ucounts->count);
spin_unlock_irq(&ucounts_lock);
- ucounts = get_ucounts(ucounts);
+ if (overflow) {
+ put_ucounts(ucounts);
+ return NULL;
+ }
return ucounts;
}
{
unsigned long flags;
- if (atomic_dec_and_test(&ucounts->count)) {
- spin_lock_irqsave(&ucounts_lock, flags);
+ if (atomic_dec_and_lock_irqsave(&ucounts->count, &ucounts_lock, flags)) {
hlist_del_init(&ucounts->node);
spin_unlock_irqrestore(&ucounts_lock, flags);
kfree(ucounts);
unbound_release_work);
struct workqueue_struct *wq = pwq->wq;
struct worker_pool *pool = pwq->pool;
- bool is_last;
+ bool is_last = false;
- if (WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND)))
- return;
+ /*
+ * when @pwq is not linked, it doesn't hold any reference to the
+ * @wq, and @wq is invalid to access.
+ */
+ if (!list_empty(&pwq->pwqs_node)) {
+ if (WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND)))
+ return;
- mutex_lock(&wq->mutex);
- list_del_rcu(&pwq->pwqs_node);
- is_last = list_empty(&wq->pwqs);
- mutex_unlock(&wq->mutex);
+ mutex_lock(&wq->mutex);
+ list_del_rcu(&pwq->pwqs_node);
+ is_last = list_empty(&wq->pwqs);
+ mutex_unlock(&wq->mutex);
+ }
mutex_lock(&wq_pool_mutex);
put_unbound_pool(pool);
config OBJAGG
tristate "objagg" if COMPILE_TEST
-config STRING_SELFTEST
- tristate "Test string functions"
-
endmenu
config GENERIC_IOREMAP
config TEST_HEXDUMP
tristate "Test functions located in the hexdump module at runtime"
+config STRING_SELFTEST
+ tristate "Test string functions at runtime"
+
config TEST_STRING_HELPERS
tristate "Test functions located in the string_helpers module at runtime"
return __bpf_fill_stxdw(self, BPF_DW);
}
+static int bpf_fill_long_jmp(struct bpf_test *self)
+{
+ unsigned int len = BPF_MAXINSNS;
+ struct bpf_insn *insn;
+ int i;
+
+ insn = kmalloc_array(len, sizeof(*insn), GFP_KERNEL);
+ if (!insn)
+ return -ENOMEM;
+
+ insn[0] = BPF_ALU64_IMM(BPF_MOV, R0, 1);
+ insn[1] = BPF_JMP_IMM(BPF_JEQ, R0, 1, len - 2 - 1);
+
+ /*
+ * Fill with a complex 64-bit operation that expands to a lot of
+ * instructions on 32-bit JITs. The large jump offset can then
+ * overflow the conditional branch field size, triggering a branch
+ * conversion mechanism in some JITs.
+ *
+ * Note: BPF_MAXINSNS of ALU64 MUL is enough to trigger such branch
+ * conversion on the 32-bit MIPS JIT. For other JITs, the instruction
+ * count and/or operation may need to be modified to trigger the
+ * branch conversion.
+ */
+ for (i = 2; i < len - 1; i++)
+ insn[i] = BPF_ALU64_IMM(BPF_MUL, R0, (i << 16) + i);
+
+ insn[len - 1] = BPF_EXIT_INSN();
+
+ self->u.ptr.insns = insn;
+ self->u.ptr.len = len;
+
+ return 0;
+}
+
static struct bpf_test tests[] = {
{
"TAX",
{ },
{ { 0, -1 } }
},
+ {
+ /*
+ * Register (non-)clobbering test, in the case where a 32-bit
+ * JIT implements complex ALU64 operations via function calls.
+ * If so, the function call must be invisible in the eBPF
+ * registers. The JIT must then save and restore relevant
+ * registers during the call. The following tests check that
+ * the eBPF registers retain their values after such a call.
+ */
+ "INT: Register clobbering, R1 updated",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
+ BPF_ALU32_IMM(BPF_MOV, R1, 123456789),
+ BPF_ALU32_IMM(BPF_MOV, R2, 2),
+ BPF_ALU32_IMM(BPF_MOV, R3, 3),
+ BPF_ALU32_IMM(BPF_MOV, R4, 4),
+ BPF_ALU32_IMM(BPF_MOV, R5, 5),
+ BPF_ALU32_IMM(BPF_MOV, R6, 6),
+ BPF_ALU32_IMM(BPF_MOV, R7, 7),
+ BPF_ALU32_IMM(BPF_MOV, R8, 8),
+ BPF_ALU32_IMM(BPF_MOV, R9, 9),
+ BPF_ALU64_IMM(BPF_DIV, R1, 123456789),
+ BPF_JMP_IMM(BPF_JNE, R0, 0, 10),
+ BPF_JMP_IMM(BPF_JNE, R1, 1, 9),
+ BPF_JMP_IMM(BPF_JNE, R2, 2, 8),
+ BPF_JMP_IMM(BPF_JNE, R3, 3, 7),
+ BPF_JMP_IMM(BPF_JNE, R4, 4, 6),
+ BPF_JMP_IMM(BPF_JNE, R5, 5, 5),
+ BPF_JMP_IMM(BPF_JNE, R6, 6, 4),
+ BPF_JMP_IMM(BPF_JNE, R7, 7, 3),
+ BPF_JMP_IMM(BPF_JNE, R8, 8, 2),
+ BPF_JMP_IMM(BPF_JNE, R9, 9, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 1 } }
+ },
+ {
+ "INT: Register clobbering, R2 updated",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
+ BPF_ALU32_IMM(BPF_MOV, R1, 1),
+ BPF_ALU32_IMM(BPF_MOV, R2, 2 * 123456789),
+ BPF_ALU32_IMM(BPF_MOV, R3, 3),
+ BPF_ALU32_IMM(BPF_MOV, R4, 4),
+ BPF_ALU32_IMM(BPF_MOV, R5, 5),
+ BPF_ALU32_IMM(BPF_MOV, R6, 6),
+ BPF_ALU32_IMM(BPF_MOV, R7, 7),
+ BPF_ALU32_IMM(BPF_MOV, R8, 8),
+ BPF_ALU32_IMM(BPF_MOV, R9, 9),
+ BPF_ALU64_IMM(BPF_DIV, R2, 123456789),
+ BPF_JMP_IMM(BPF_JNE, R0, 0, 10),
+ BPF_JMP_IMM(BPF_JNE, R1, 1, 9),
+ BPF_JMP_IMM(BPF_JNE, R2, 2, 8),
+ BPF_JMP_IMM(BPF_JNE, R3, 3, 7),
+ BPF_JMP_IMM(BPF_JNE, R4, 4, 6),
+ BPF_JMP_IMM(BPF_JNE, R5, 5, 5),
+ BPF_JMP_IMM(BPF_JNE, R6, 6, 4),
+ BPF_JMP_IMM(BPF_JNE, R7, 7, 3),
+ BPF_JMP_IMM(BPF_JNE, R8, 8, 2),
+ BPF_JMP_IMM(BPF_JNE, R9, 9, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 1 } }
+ },
+ {
+ /*
+ * Test 32-bit JITs that implement complex ALU64 operations as
+ * function calls R0 = f(R1, R2), and must re-arrange operands.
+ */
+#define NUMER 0xfedcba9876543210ULL
+#define DENOM 0x0123456789abcdefULL
+ "ALU64_DIV X: Operand register permutations",
+ .u.insns_int = {
+ /* R0 / R2 */
+ BPF_LD_IMM64(R0, NUMER),
+ BPF_LD_IMM64(R2, DENOM),
+ BPF_ALU64_REG(BPF_DIV, R0, R2),
+ BPF_JMP_IMM(BPF_JEQ, R0, NUMER / DENOM, 1),
+ BPF_EXIT_INSN(),
+ /* R1 / R0 */
+ BPF_LD_IMM64(R1, NUMER),
+ BPF_LD_IMM64(R0, DENOM),
+ BPF_ALU64_REG(BPF_DIV, R1, R0),
+ BPF_JMP_IMM(BPF_JEQ, R1, NUMER / DENOM, 1),
+ BPF_EXIT_INSN(),
+ /* R0 / R1 */
+ BPF_LD_IMM64(R0, NUMER),
+ BPF_LD_IMM64(R1, DENOM),
+ BPF_ALU64_REG(BPF_DIV, R0, R1),
+ BPF_JMP_IMM(BPF_JEQ, R0, NUMER / DENOM, 1),
+ BPF_EXIT_INSN(),
+ /* R2 / R0 */
+ BPF_LD_IMM64(R2, NUMER),
+ BPF_LD_IMM64(R0, DENOM),
+ BPF_ALU64_REG(BPF_DIV, R2, R0),
+ BPF_JMP_IMM(BPF_JEQ, R2, NUMER / DENOM, 1),
+ BPF_EXIT_INSN(),
+ /* R2 / R1 */
+ BPF_LD_IMM64(R2, NUMER),
+ BPF_LD_IMM64(R1, DENOM),
+ BPF_ALU64_REG(BPF_DIV, R2, R1),
+ BPF_JMP_IMM(BPF_JEQ, R2, NUMER / DENOM, 1),
+ BPF_EXIT_INSN(),
+ /* R1 / R2 */
+ BPF_LD_IMM64(R1, NUMER),
+ BPF_LD_IMM64(R2, DENOM),
+ BPF_ALU64_REG(BPF_DIV, R1, R2),
+ BPF_JMP_IMM(BPF_JEQ, R1, NUMER / DENOM, 1),
+ BPF_EXIT_INSN(),
+ /* R1 / R1 */
+ BPF_LD_IMM64(R1, NUMER),
+ BPF_ALU64_REG(BPF_DIV, R1, R1),
+ BPF_JMP_IMM(BPF_JEQ, R1, 1, 1),
+ BPF_EXIT_INSN(),
+ /* R2 / R2 */
+ BPF_LD_IMM64(R2, DENOM),
+ BPF_ALU64_REG(BPF_DIV, R2, R2),
+ BPF_JMP_IMM(BPF_JEQ, R2, 1, 1),
+ BPF_EXIT_INSN(),
+ /* R3 / R4 */
+ BPF_LD_IMM64(R3, NUMER),
+ BPF_LD_IMM64(R4, DENOM),
+ BPF_ALU64_REG(BPF_DIV, R3, R4),
+ BPF_JMP_IMM(BPF_JEQ, R3, NUMER / DENOM, 1),
+ BPF_EXIT_INSN(),
+ /* Successful return */
+ BPF_LD_IMM64(R0, 1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 1 } },
+#undef NUMER
+#undef DENOM
+ },
+#ifdef CONFIG_32BIT
+ {
+ "INT: 32-bit context pointer word order and zero-extension",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
+ BPF_JMP32_IMM(BPF_JEQ, R1, 0, 3),
+ BPF_ALU64_IMM(BPF_RSH, R1, 32),
+ BPF_JMP32_IMM(BPF_JNE, R1, 0, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 1 } }
+ },
+#endif
{
"check: missing ret",
.u.insns = {
{ },
{ { 0, 0x1 } },
},
+ {
+ "ALU_MOV_K: small negative",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, -123),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, -123 } }
+ },
+ {
+ "ALU_MOV_K: small negative zero extension",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, -123),
+ BPF_ALU64_IMM(BPF_RSH, R0, 32),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0 } }
+ },
+ {
+ "ALU_MOV_K: large negative",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, -123456789),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, -123456789 } }
+ },
+ {
+ "ALU_MOV_K: large negative zero extension",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, -123456789),
+ BPF_ALU64_IMM(BPF_RSH, R0, 32),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0 } }
+ },
{
"ALU64_MOV_K: dst = 2",
.u.insns_int = {
{ },
{ { 0, 0x1 } },
},
+ {
+ "ALU64_MOV_K: small negative",
+ .u.insns_int = {
+ BPF_ALU64_IMM(BPF_MOV, R0, -123),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, -123 } }
+ },
+ {
+ "ALU64_MOV_K: small negative sign extension",
+ .u.insns_int = {
+ BPF_ALU64_IMM(BPF_MOV, R0, -123),
+ BPF_ALU64_IMM(BPF_RSH, R0, 32),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0xffffffff } }
+ },
+ {
+ "ALU64_MOV_K: large negative",
+ .u.insns_int = {
+ BPF_ALU64_IMM(BPF_MOV, R0, -123456789),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, -123456789 } }
+ },
+ {
+ "ALU64_MOV_K: large negative sign extension",
+ .u.insns_int = {
+ BPF_ALU64_IMM(BPF_MOV, R0, -123456789),
+ BPF_ALU64_IMM(BPF_RSH, R0, 32),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0xffffffff } }
+ },
/* BPF_ALU | BPF_ADD | BPF_X */
{
"ALU_ADD_X: 1 + 2 = 3",
{ },
{ { 0, 2147483647 } },
},
+ {
+ "ALU64_MUL_X: 64x64 multiply, low word",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x0fedcba987654321LL),
+ BPF_LD_IMM64(R1, 0x123456789abcdef0LL),
+ BPF_ALU64_REG(BPF_MUL, R0, R1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0xe5618cf0 } }
+ },
+ {
+ "ALU64_MUL_X: 64x64 multiply, high word",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x0fedcba987654321LL),
+ BPF_LD_IMM64(R1, 0x123456789abcdef0LL),
+ BPF_ALU64_REG(BPF_MUL, R0, R1),
+ BPF_ALU64_IMM(BPF_RSH, R0, 32),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x2236d88f } }
+ },
/* BPF_ALU | BPF_MUL | BPF_K */
{
"ALU_MUL_K: 2 * 3 = 6",
{ },
{ { 0, 0x1 } },
},
+ {
+ "ALU64_MUL_K: 64x32 multiply, low word",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+ BPF_ALU64_IMM(BPF_MUL, R0, 0x12345678),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0xe242d208 } }
+ },
+ {
+ "ALU64_MUL_K: 64x32 multiply, high word",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+ BPF_ALU64_IMM(BPF_MUL, R0, 0x12345678),
+ BPF_ALU64_IMM(BPF_RSH, R0, 32),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0xc28f5c28 } }
+ },
/* BPF_ALU | BPF_DIV | BPF_X */
{
"ALU_DIV_X: 6 / 2 = 3",
{ },
{ { 0, 0xffffffff } },
},
+ {
+ "ALU_AND_K: Small immediate",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, 0x01020304),
+ BPF_ALU32_IMM(BPF_AND, R0, 15),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 4 } }
+ },
+ {
+ "ALU_AND_K: Large immediate",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, 0xf1f2f3f4),
+ BPF_ALU32_IMM(BPF_AND, R0, 0xafbfcfdf),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0xa1b2c3d4 } }
+ },
+ {
+ "ALU_AND_K: Zero extension",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+ BPF_LD_IMM64(R1, 0x0000000080a0c0e0LL),
+ BPF_ALU32_IMM(BPF_AND, R0, 0xf0f0f0f0),
+ BPF_JMP_REG(BPF_JEQ, R0, R1, 2),
+ BPF_MOV32_IMM(R0, 2),
+ BPF_EXIT_INSN(),
+ BPF_MOV32_IMM(R0, 1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 1 } }
+ },
{
"ALU64_AND_K: 3 & 2 = 2",
.u.insns_int = {
{ { 0, 0xffffffff } },
},
{
- "ALU64_AND_K: 0x0000ffffffff0000 & 0x0 = 0x0000ffff00000000",
+ "ALU64_AND_K: 0x0000ffffffff0000 & 0x0 = 0x0000000000000000",
.u.insns_int = {
BPF_LD_IMM64(R2, 0x0000ffffffff0000LL),
BPF_LD_IMM64(R3, 0x0000000000000000LL),
{ { 0, 0x1 } },
},
{
- "ALU64_AND_K: 0x0000ffffffff0000 & -1 = 0x0000ffffffffffff",
+ "ALU64_AND_K: 0x0000ffffffff0000 & -1 = 0x0000ffffffff0000",
.u.insns_int = {
BPF_LD_IMM64(R2, 0x0000ffffffff0000LL),
BPF_LD_IMM64(R3, 0x0000ffffffff0000LL),
{ },
{ { 0, 0x1 } },
},
+ {
+ "ALU64_AND_K: Sign extension 1",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+ BPF_LD_IMM64(R1, 0x00000000090b0d0fLL),
+ BPF_ALU64_IMM(BPF_AND, R0, 0x0f0f0f0f),
+ BPF_JMP_REG(BPF_JEQ, R0, R1, 2),
+ BPF_MOV32_IMM(R0, 2),
+ BPF_EXIT_INSN(),
+ BPF_MOV32_IMM(R0, 1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 1 } }
+ },
+ {
+ "ALU64_AND_K: Sign extension 2",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+ BPF_LD_IMM64(R1, 0x0123456780a0c0e0LL),
+ BPF_ALU64_IMM(BPF_AND, R0, 0xf0f0f0f0),
+ BPF_JMP_REG(BPF_JEQ, R0, R1, 2),
+ BPF_MOV32_IMM(R0, 2),
+ BPF_EXIT_INSN(),
+ BPF_MOV32_IMM(R0, 1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 1 } }
+ },
/* BPF_ALU | BPF_OR | BPF_X */
{
"ALU_OR_X: 1 | 2 = 3",
{ },
{ { 0, 0xffffffff } },
},
+ {
+ "ALU_OR_K: Small immediate",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, 0x01020304),
+ BPF_ALU32_IMM(BPF_OR, R0, 1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x01020305 } }
+ },
+ {
+ "ALU_OR_K: Large immediate",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, 0x01020304),
+ BPF_ALU32_IMM(BPF_OR, R0, 0xa0b0c0d0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0xa1b2c3d4 } }
+ },
+ {
+ "ALU_OR_K: Zero extension",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+ BPF_LD_IMM64(R1, 0x00000000f9fbfdffLL),
+ BPF_ALU32_IMM(BPF_OR, R0, 0xf0f0f0f0),
+ BPF_JMP_REG(BPF_JEQ, R0, R1, 2),
+ BPF_MOV32_IMM(R0, 2),
+ BPF_EXIT_INSN(),
+ BPF_MOV32_IMM(R0, 1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 1 } }
+ },
{
"ALU64_OR_K: 1 | 2 = 3",
.u.insns_int = {
{ { 0, 0xffffffff } },
},
{
- "ALU64_OR_K: 0x0000ffffffff0000 | 0x0 = 0x0000ffff00000000",
+ "ALU64_OR_K: 0x0000ffffffff0000 | 0x0 = 0x0000ffffffff0000",
.u.insns_int = {
BPF_LD_IMM64(R2, 0x0000ffffffff0000LL),
BPF_LD_IMM64(R3, 0x0000ffffffff0000LL),
{ },
{ { 0, 0x1 } },
},
- /* BPF_ALU | BPF_XOR | BPF_X */
{
- "ALU_XOR_X: 5 ^ 6 = 3",
+ "ALU64_OR_K: Sign extension 1",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+ BPF_LD_IMM64(R1, 0x012345678fafcfefLL),
+ BPF_ALU64_IMM(BPF_OR, R0, 0x0f0f0f0f),
+ BPF_JMP_REG(BPF_JEQ, R0, R1, 2),
+ BPF_MOV32_IMM(R0, 2),
+ BPF_EXIT_INSN(),
+ BPF_MOV32_IMM(R0, 1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 1 } }
+ },
+ {
+ "ALU64_OR_K: Sign extension 2",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+ BPF_LD_IMM64(R1, 0xfffffffff9fbfdffLL),
+ BPF_ALU64_IMM(BPF_OR, R0, 0xf0f0f0f0),
+ BPF_JMP_REG(BPF_JEQ, R0, R1, 2),
+ BPF_MOV32_IMM(R0, 2),
+ BPF_EXIT_INSN(),
+ BPF_MOV32_IMM(R0, 1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 1 } }
+ },
+ /* BPF_ALU | BPF_XOR | BPF_X */
+ {
+ "ALU_XOR_X: 5 ^ 6 = 3",
.u.insns_int = {
BPF_LD_IMM64(R0, 5),
BPF_ALU32_IMM(BPF_MOV, R1, 6),
{ },
{ { 0, 0xfffffffe } },
},
+ {
+ "ALU_XOR_K: Small immediate",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, 0x01020304),
+ BPF_ALU32_IMM(BPF_XOR, R0, 15),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x0102030b } }
+ },
+ {
+ "ALU_XOR_K: Large immediate",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, 0xf1f2f3f4),
+ BPF_ALU32_IMM(BPF_XOR, R0, 0xafbfcfdf),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x5e4d3c2b } }
+ },
+ {
+ "ALU_XOR_K: Zero extension",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+ BPF_LD_IMM64(R1, 0x00000000795b3d1fLL),
+ BPF_ALU32_IMM(BPF_XOR, R0, 0xf0f0f0f0),
+ BPF_JMP_REG(BPF_JEQ, R0, R1, 2),
+ BPF_MOV32_IMM(R0, 2),
+ BPF_EXIT_INSN(),
+ BPF_MOV32_IMM(R0, 1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 1 } }
+ },
{
"ALU64_XOR_K: 5 ^ 6 = 3",
.u.insns_int = {
{ { 0, 3 } },
},
{
- "ALU64_XOR_K: 1 & 0xffffffff = 0xfffffffe",
+ "ALU64_XOR_K: 1 ^ 0xffffffff = 0xfffffffe",
.u.insns_int = {
BPF_LD_IMM64(R0, 1),
BPF_ALU64_IMM(BPF_XOR, R0, 0xffffffff),
{ },
{ { 0, 0x1 } },
},
+ {
+ "ALU64_XOR_K: Sign extension 1",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+ BPF_LD_IMM64(R1, 0x0123456786a4c2e0LL),
+ BPF_ALU64_IMM(BPF_XOR, R0, 0x0f0f0f0f),
+ BPF_JMP_REG(BPF_JEQ, R0, R1, 2),
+ BPF_MOV32_IMM(R0, 2),
+ BPF_EXIT_INSN(),
+ BPF_MOV32_IMM(R0, 1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 1 } }
+ },
+ {
+ "ALU64_XOR_K: Sign extension 2",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+ BPF_LD_IMM64(R1, 0xfedcba98795b3d1fLL),
+ BPF_ALU64_IMM(BPF_XOR, R0, 0xf0f0f0f0),
+ BPF_JMP_REG(BPF_JEQ, R0, R1, 2),
+ BPF_MOV32_IMM(R0, 2),
+ BPF_EXIT_INSN(),
+ BPF_MOV32_IMM(R0, 1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 1 } }
+ },
/* BPF_ALU | BPF_LSH | BPF_X */
{
"ALU_LSH_X: 1 << 1 = 2",
{ },
{ { 0, 0x80000000 } },
},
+ {
+ "ALU_LSH_X: 0x12345678 << 12 = 0x45678000",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, 0x12345678),
+ BPF_ALU32_IMM(BPF_MOV, R1, 12),
+ BPF_ALU32_REG(BPF_LSH, R0, R1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x45678000 } }
+ },
{
"ALU64_LSH_X: 1 << 1 = 2",
.u.insns_int = {
{ },
{ { 0, 0x80000000 } },
},
- /* BPF_ALU | BPF_LSH | BPF_K */
{
- "ALU_LSH_K: 1 << 1 = 2",
+ "ALU64_LSH_X: Shift < 32, low word",
.u.insns_int = {
- BPF_LD_IMM64(R0, 1),
- BPF_ALU32_IMM(BPF_LSH, R0, 1),
+ BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+ BPF_ALU32_IMM(BPF_MOV, R1, 12),
+ BPF_ALU64_REG(BPF_LSH, R0, R1),
BPF_EXIT_INSN(),
},
INTERNAL,
{ },
- { { 0, 2 } },
+ { { 0, 0xbcdef000 } }
},
{
- "ALU_LSH_K: 1 << 31 = 0x80000000",
+ "ALU64_LSH_X: Shift < 32, high word",
.u.insns_int = {
- BPF_LD_IMM64(R0, 1),
- BPF_ALU32_IMM(BPF_LSH, R0, 31),
+ BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+ BPF_ALU32_IMM(BPF_MOV, R1, 12),
+ BPF_ALU64_REG(BPF_LSH, R0, R1),
+ BPF_ALU64_IMM(BPF_RSH, R0, 32),
BPF_EXIT_INSN(),
},
INTERNAL,
{ },
- { { 0, 0x80000000 } },
+ { { 0, 0x3456789a } }
},
{
- "ALU64_LSH_K: 1 << 1 = 2",
+ "ALU64_LSH_X: Shift > 32, low word",
.u.insns_int = {
- BPF_LD_IMM64(R0, 1),
- BPF_ALU64_IMM(BPF_LSH, R0, 1),
+ BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+ BPF_ALU32_IMM(BPF_MOV, R1, 36),
+ BPF_ALU64_REG(BPF_LSH, R0, R1),
BPF_EXIT_INSN(),
},
INTERNAL,
{ },
- { { 0, 2 } },
+ { { 0, 0 } }
},
{
- "ALU64_LSH_K: 1 << 31 = 0x80000000",
+ "ALU64_LSH_X: Shift > 32, high word",
.u.insns_int = {
- BPF_LD_IMM64(R0, 1),
- BPF_ALU64_IMM(BPF_LSH, R0, 31),
+ BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+ BPF_ALU32_IMM(BPF_MOV, R1, 36),
+ BPF_ALU64_REG(BPF_LSH, R0, R1),
+ BPF_ALU64_IMM(BPF_RSH, R0, 32),
BPF_EXIT_INSN(),
},
INTERNAL,
{ },
- { { 0, 0x80000000 } },
+ { { 0, 0x9abcdef0 } }
},
- /* BPF_ALU | BPF_RSH | BPF_X */
{
- "ALU_RSH_X: 2 >> 1 = 1",
+ "ALU64_LSH_X: Shift == 32, low word",
.u.insns_int = {
- BPF_LD_IMM64(R0, 2),
- BPF_ALU32_IMM(BPF_MOV, R1, 1),
- BPF_ALU32_REG(BPF_RSH, R0, R1),
+ BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+ BPF_ALU32_IMM(BPF_MOV, R1, 32),
+ BPF_ALU64_REG(BPF_LSH, R0, R1),
BPF_EXIT_INSN(),
},
INTERNAL,
{ },
- { { 0, 1 } },
+ { { 0, 0 } }
},
{
- "ALU_RSH_X: 0x80000000 >> 31 = 1",
+ "ALU64_LSH_X: Shift == 32, high word",
.u.insns_int = {
- BPF_LD_IMM64(R0, 0x80000000),
- BPF_ALU32_IMM(BPF_MOV, R1, 31),
- BPF_ALU32_REG(BPF_RSH, R0, R1),
+ BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+ BPF_ALU32_IMM(BPF_MOV, R1, 32),
+ BPF_ALU64_REG(BPF_LSH, R0, R1),
+ BPF_ALU64_IMM(BPF_RSH, R0, 32),
BPF_EXIT_INSN(),
},
INTERNAL,
{ },
- { { 0, 1 } },
+ { { 0, 0x89abcdef } }
},
{
- "ALU64_RSH_X: 2 >> 1 = 1",
+ "ALU64_LSH_X: Zero shift, low word",
.u.insns_int = {
- BPF_LD_IMM64(R0, 2),
- BPF_ALU32_IMM(BPF_MOV, R1, 1),
- BPF_ALU64_REG(BPF_RSH, R0, R1),
+ BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+ BPF_ALU32_IMM(BPF_MOV, R1, 0),
+ BPF_ALU64_REG(BPF_LSH, R0, R1),
BPF_EXIT_INSN(),
},
INTERNAL,
{ },
- { { 0, 1 } },
+ { { 0, 0x89abcdef } }
},
{
- "ALU64_RSH_X: 0x80000000 >> 31 = 1",
+ "ALU64_LSH_X: Zero shift, high word",
.u.insns_int = {
- BPF_LD_IMM64(R0, 0x80000000),
- BPF_ALU32_IMM(BPF_MOV, R1, 31),
- BPF_ALU64_REG(BPF_RSH, R0, R1),
+ BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+ BPF_ALU32_IMM(BPF_MOV, R1, 0),
+ BPF_ALU64_REG(BPF_LSH, R0, R1),
+ BPF_ALU64_IMM(BPF_RSH, R0, 32),
BPF_EXIT_INSN(),
},
INTERNAL,
{ },
- { { 0, 1 } },
+ { { 0, 0x01234567 } }
},
- /* BPF_ALU | BPF_RSH | BPF_K */
+ /* BPF_ALU | BPF_LSH | BPF_K */
{
- "ALU_RSH_K: 2 >> 1 = 1",
+ "ALU_LSH_K: 1 << 1 = 2",
.u.insns_int = {
- BPF_LD_IMM64(R0, 2),
- BPF_ALU32_IMM(BPF_RSH, R0, 1),
+ BPF_LD_IMM64(R0, 1),
+ BPF_ALU32_IMM(BPF_LSH, R0, 1),
BPF_EXIT_INSN(),
},
INTERNAL,
{ },
- { { 0, 1 } },
+ { { 0, 2 } },
},
{
- "ALU_RSH_K: 0x80000000 >> 31 = 1",
+ "ALU_LSH_K: 1 << 31 = 0x80000000",
.u.insns_int = {
- BPF_LD_IMM64(R0, 0x80000000),
- BPF_ALU32_IMM(BPF_RSH, R0, 31),
+ BPF_LD_IMM64(R0, 1),
+ BPF_ALU32_IMM(BPF_LSH, R0, 31),
BPF_EXIT_INSN(),
},
INTERNAL,
{ },
- { { 0, 1 } },
+ { { 0, 0x80000000 } },
},
{
- "ALU64_RSH_K: 2 >> 1 = 1",
+ "ALU_LSH_K: 0x12345678 << 12 = 0x45678000",
.u.insns_int = {
- BPF_LD_IMM64(R0, 2),
- BPF_ALU64_IMM(BPF_RSH, R0, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0x12345678),
+ BPF_ALU32_IMM(BPF_LSH, R0, 12),
BPF_EXIT_INSN(),
},
INTERNAL,
{ },
- { { 0, 1 } },
+ { { 0, 0x45678000 } }
},
{
- "ALU64_RSH_K: 0x80000000 >> 31 = 1",
+ "ALU_LSH_K: 0x12345678 << 0 = 0x12345678",
.u.insns_int = {
- BPF_LD_IMM64(R0, 0x80000000),
- BPF_ALU64_IMM(BPF_RSH, R0, 31),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0x12345678),
+ BPF_ALU32_IMM(BPF_LSH, R0, 0),
BPF_EXIT_INSN(),
},
INTERNAL,
{ },
- { { 0, 1 } },
+ { { 0, 0x12345678 } }
},
- /* BPF_ALU | BPF_ARSH | BPF_X */
{
- "ALU_ARSH_X: 0xff00ff0000000000 >> 40 = 0xffffffffffff00ff",
+ "ALU64_LSH_K: 1 << 1 = 2",
.u.insns_int = {
- BPF_LD_IMM64(R0, 0xff00ff0000000000LL),
- BPF_ALU32_IMM(BPF_MOV, R1, 40),
- BPF_ALU64_REG(BPF_ARSH, R0, R1),
+ BPF_LD_IMM64(R0, 1),
+ BPF_ALU64_IMM(BPF_LSH, R0, 1),
BPF_EXIT_INSN(),
},
INTERNAL,
{ },
- { { 0, 0xffff00ff } },
+ { { 0, 2 } },
},
- /* BPF_ALU | BPF_ARSH | BPF_K */
{
- "ALU_ARSH_K: 0xff00ff0000000000 >> 40 = 0xffffffffffff00ff",
+ "ALU64_LSH_K: 1 << 31 = 0x80000000",
.u.insns_int = {
- BPF_LD_IMM64(R0, 0xff00ff0000000000LL),
- BPF_ALU64_IMM(BPF_ARSH, R0, 40),
+ BPF_LD_IMM64(R0, 1),
+ BPF_ALU64_IMM(BPF_LSH, R0, 31),
BPF_EXIT_INSN(),
},
INTERNAL,
{ },
- { { 0, 0xffff00ff } },
+ { { 0, 0x80000000 } },
},
- /* BPF_ALU | BPF_NEG */
{
- "ALU_NEG: -(3) = -3",
+ "ALU64_LSH_K: Shift < 32, low word",
.u.insns_int = {
- BPF_ALU32_IMM(BPF_MOV, R0, 3),
- BPF_ALU32_IMM(BPF_NEG, R0, 0),
+ BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+ BPF_ALU64_IMM(BPF_LSH, R0, 12),
BPF_EXIT_INSN(),
},
INTERNAL,
{ },
- { { 0, -3 } },
+ { { 0, 0xbcdef000 } }
},
{
- "ALU_NEG: -(-3) = 3",
+ "ALU64_LSH_K: Shift < 32, high word",
.u.insns_int = {
- BPF_ALU32_IMM(BPF_MOV, R0, -3),
- BPF_ALU32_IMM(BPF_NEG, R0, 0),
+ BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+ BPF_ALU64_IMM(BPF_LSH, R0, 12),
+ BPF_ALU64_IMM(BPF_RSH, R0, 32),
BPF_EXIT_INSN(),
},
INTERNAL,
{ },
- { { 0, 3 } },
+ { { 0, 0x3456789a } }
},
{
- "ALU64_NEG: -(3) = -3",
+ "ALU64_LSH_K: Shift > 32, low word",
.u.insns_int = {
- BPF_LD_IMM64(R0, 3),
- BPF_ALU64_IMM(BPF_NEG, R0, 0),
+ BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+ BPF_ALU64_IMM(BPF_LSH, R0, 36),
BPF_EXIT_INSN(),
},
INTERNAL,
{ },
- { { 0, -3 } },
+ { { 0, 0 } }
},
{
- "ALU64_NEG: -(-3) = 3",
+ "ALU64_LSH_K: Shift > 32, high word",
.u.insns_int = {
- BPF_LD_IMM64(R0, -3),
- BPF_ALU64_IMM(BPF_NEG, R0, 0),
+ BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+ BPF_ALU64_IMM(BPF_LSH, R0, 36),
+ BPF_ALU64_IMM(BPF_RSH, R0, 32),
BPF_EXIT_INSN(),
},
INTERNAL,
{ },
- { { 0, 3 } },
+ { { 0, 0x9abcdef0 } }
},
- /* BPF_ALU | BPF_END | BPF_FROM_BE */
{
- "ALU_END_FROM_BE 16: 0x0123456789abcdef -> 0xcdef",
+ "ALU64_LSH_K: Shift == 32, low word",
.u.insns_int = {
BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
- BPF_ENDIAN(BPF_FROM_BE, R0, 16),
+ BPF_ALU64_IMM(BPF_LSH, R0, 32),
BPF_EXIT_INSN(),
},
INTERNAL,
{ },
- { { 0, cpu_to_be16(0xcdef) } },
+ { { 0, 0 } }
},
{
- "ALU_END_FROM_BE 32: 0x0123456789abcdef -> 0x89abcdef",
+ "ALU64_LSH_K: Shift == 32, high word",
.u.insns_int = {
BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
- BPF_ENDIAN(BPF_FROM_BE, R0, 32),
- BPF_ALU64_REG(BPF_MOV, R1, R0),
- BPF_ALU64_IMM(BPF_RSH, R1, 32),
- BPF_ALU32_REG(BPF_ADD, R0, R1), /* R1 = 0 */
+ BPF_ALU64_IMM(BPF_LSH, R0, 32),
+ BPF_ALU64_IMM(BPF_RSH, R0, 32),
BPF_EXIT_INSN(),
},
INTERNAL,
{ },
- { { 0, cpu_to_be32(0x89abcdef) } },
+ { { 0, 0x89abcdef } }
},
{
- "ALU_END_FROM_BE 64: 0x0123456789abcdef -> 0x89abcdef",
+ "ALU64_LSH_K: Zero shift",
.u.insns_int = {
BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
- BPF_ENDIAN(BPF_FROM_BE, R0, 64),
+ BPF_ALU64_IMM(BPF_LSH, R0, 0),
BPF_EXIT_INSN(),
},
INTERNAL,
{ },
- { { 0, (u32) cpu_to_be64(0x0123456789abcdefLL) } },
+ { { 0, 0x89abcdef } }
},
- /* BPF_ALU | BPF_END | BPF_FROM_LE */
+ /* BPF_ALU | BPF_RSH | BPF_X */
{
- "ALU_END_FROM_LE 16: 0x0123456789abcdef -> 0xefcd",
+ "ALU_RSH_X: 2 >> 1 = 1",
.u.insns_int = {
- BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
- BPF_ENDIAN(BPF_FROM_LE, R0, 16),
+ BPF_LD_IMM64(R0, 2),
+ BPF_ALU32_IMM(BPF_MOV, R1, 1),
+ BPF_ALU32_REG(BPF_RSH, R0, R1),
BPF_EXIT_INSN(),
},
INTERNAL,
{ },
- { { 0, cpu_to_le16(0xcdef) } },
+ { { 0, 1 } },
},
{
- "ALU_END_FROM_LE 32: 0x0123456789abcdef -> 0xefcdab89",
+ "ALU_RSH_X: 0x80000000 >> 31 = 1",
.u.insns_int = {
- BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
- BPF_ENDIAN(BPF_FROM_LE, R0, 32),
- BPF_ALU64_REG(BPF_MOV, R1, R0),
- BPF_ALU64_IMM(BPF_RSH, R1, 32),
- BPF_ALU32_REG(BPF_ADD, R0, R1), /* R1 = 0 */
+ BPF_LD_IMM64(R0, 0x80000000),
+ BPF_ALU32_IMM(BPF_MOV, R1, 31),
+ BPF_ALU32_REG(BPF_RSH, R0, R1),
BPF_EXIT_INSN(),
},
INTERNAL,
{ },
- { { 0, cpu_to_le32(0x89abcdef) } },
+ { { 0, 1 } },
},
{
- "ALU_END_FROM_LE 64: 0x0123456789abcdef -> 0x67452301",
+ "ALU_RSH_X: 0x12345678 >> 20 = 0x123",
.u.insns_int = {
- BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
- BPF_ENDIAN(BPF_FROM_LE, R0, 64),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0x12345678),
+ BPF_ALU32_IMM(BPF_MOV, R1, 20),
+ BPF_ALU32_REG(BPF_RSH, R0, R1),
BPF_EXIT_INSN(),
},
INTERNAL,
{ },
- { { 0, (u32) cpu_to_le64(0x0123456789abcdefLL) } },
+ { { 0, 0x123 } }
},
- /* BPF_ST(X) | BPF_MEM | BPF_B/H/W/DW */
{
- "ST_MEM_B: Store/Load byte: max negative",
+ "ALU64_RSH_X: 2 >> 1 = 1",
.u.insns_int = {
- BPF_ALU32_IMM(BPF_MOV, R0, 1),
- BPF_ST_MEM(BPF_B, R10, -40, 0xff),
- BPF_LDX_MEM(BPF_B, R0, R10, -40),
+ BPF_LD_IMM64(R0, 2),
+ BPF_ALU32_IMM(BPF_MOV, R1, 1),
+ BPF_ALU64_REG(BPF_RSH, R0, R1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 1 } },
+ },
+ {
+ "ALU64_RSH_X: 0x80000000 >> 31 = 1",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x80000000),
+ BPF_ALU32_IMM(BPF_MOV, R1, 31),
+ BPF_ALU64_REG(BPF_RSH, R0, R1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 1 } },
+ },
+ {
+ "ALU64_RSH_X: Shift < 32, low word",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+ BPF_ALU32_IMM(BPF_MOV, R1, 12),
+ BPF_ALU64_REG(BPF_RSH, R0, R1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x56789abc } }
+ },
+ {
+ "ALU64_RSH_X: Shift < 32, high word",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+ BPF_ALU32_IMM(BPF_MOV, R1, 12),
+ BPF_ALU64_REG(BPF_RSH, R0, R1),
+ BPF_ALU64_IMM(BPF_RSH, R0, 32),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x00081234 } }
+ },
+ {
+ "ALU64_RSH_X: Shift > 32, low word",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+ BPF_ALU32_IMM(BPF_MOV, R1, 36),
+ BPF_ALU64_REG(BPF_RSH, R0, R1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x08123456 } }
+ },
+ {
+ "ALU64_RSH_X: Shift > 32, high word",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+ BPF_ALU32_IMM(BPF_MOV, R1, 36),
+ BPF_ALU64_REG(BPF_RSH, R0, R1),
+ BPF_ALU64_IMM(BPF_RSH, R0, 32),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0 } }
+ },
+ {
+ "ALU64_RSH_X: Shift == 32, low word",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+ BPF_ALU32_IMM(BPF_MOV, R1, 32),
+ BPF_ALU64_REG(BPF_RSH, R0, R1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x81234567 } }
+ },
+ {
+ "ALU64_RSH_X: Shift == 32, high word",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+ BPF_ALU32_IMM(BPF_MOV, R1, 32),
+ BPF_ALU64_REG(BPF_RSH, R0, R1),
+ BPF_ALU64_IMM(BPF_RSH, R0, 32),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0 } }
+ },
+ {
+ "ALU64_RSH_X: Zero shift, low word",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+ BPF_ALU32_IMM(BPF_MOV, R1, 0),
+ BPF_ALU64_REG(BPF_RSH, R0, R1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x89abcdef } }
+ },
+ {
+ "ALU64_RSH_X: Zero shift, high word",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+ BPF_ALU32_IMM(BPF_MOV, R1, 0),
+ BPF_ALU64_REG(BPF_RSH, R0, R1),
+ BPF_ALU64_IMM(BPF_RSH, R0, 32),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x81234567 } }
+ },
+ /* BPF_ALU | BPF_RSH | BPF_K */
+ {
+ "ALU_RSH_K: 2 >> 1 = 1",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 2),
+ BPF_ALU32_IMM(BPF_RSH, R0, 1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 1 } },
+ },
+ {
+ "ALU_RSH_K: 0x80000000 >> 31 = 1",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x80000000),
+ BPF_ALU32_IMM(BPF_RSH, R0, 31),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 1 } },
+ },
+ {
+ "ALU_RSH_K: 0x12345678 >> 20 = 0x123",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, 0x12345678),
+ BPF_ALU32_IMM(BPF_RSH, R0, 20),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x123 } }
+ },
+ {
+ "ALU_RSH_K: 0x12345678 >> 0 = 0x12345678",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, 0x12345678),
+ BPF_ALU32_IMM(BPF_RSH, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x12345678 } }
+ },
+ {
+ "ALU64_RSH_K: 2 >> 1 = 1",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 2),
+ BPF_ALU64_IMM(BPF_RSH, R0, 1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 1 } },
+ },
+ {
+ "ALU64_RSH_K: 0x80000000 >> 31 = 1",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x80000000),
+ BPF_ALU64_IMM(BPF_RSH, R0, 31),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 1 } },
+ },
+ {
+ "ALU64_RSH_K: Shift < 32, low word",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+ BPF_ALU64_IMM(BPF_RSH, R0, 12),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x56789abc } }
+ },
+ {
+ "ALU64_RSH_K: Shift < 32, high word",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+ BPF_ALU64_IMM(BPF_RSH, R0, 12),
+ BPF_ALU64_IMM(BPF_RSH, R0, 32),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x00081234 } }
+ },
+ {
+ "ALU64_RSH_K: Shift > 32, low word",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+ BPF_ALU64_IMM(BPF_RSH, R0, 36),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x08123456 } }
+ },
+ {
+ "ALU64_RSH_K: Shift > 32, high word",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+ BPF_ALU64_IMM(BPF_RSH, R0, 36),
+ BPF_ALU64_IMM(BPF_RSH, R0, 32),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0 } }
+ },
+ {
+ "ALU64_RSH_K: Shift == 32, low word",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+ BPF_ALU64_IMM(BPF_RSH, R0, 32),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x81234567 } }
+ },
+ {
+ "ALU64_RSH_K: Shift == 32, high word",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+ BPF_ALU64_IMM(BPF_RSH, R0, 32),
+ BPF_ALU64_IMM(BPF_RSH, R0, 32),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0 } }
+ },
+ {
+ "ALU64_RSH_K: Zero shift",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+ BPF_ALU64_IMM(BPF_RSH, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x89abcdef } }
+ },
+ /* BPF_ALU | BPF_ARSH | BPF_X */
+ {
+ "ALU32_ARSH_X: -1234 >> 7 = -10",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, -1234),
+ BPF_ALU32_IMM(BPF_MOV, R1, 7),
+ BPF_ALU32_REG(BPF_ARSH, R0, R1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, -10 } }
+ },
+ {
+ "ALU64_ARSH_X: 0xff00ff0000000000 >> 40 = 0xffffffffffff00ff",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0xff00ff0000000000LL),
+ BPF_ALU32_IMM(BPF_MOV, R1, 40),
+ BPF_ALU64_REG(BPF_ARSH, R0, R1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0xffff00ff } },
+ },
+ {
+ "ALU64_ARSH_X: Shift < 32, low word",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+ BPF_ALU32_IMM(BPF_MOV, R1, 12),
+ BPF_ALU64_REG(BPF_ARSH, R0, R1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x56789abc } }
+ },
+ {
+ "ALU64_ARSH_X: Shift < 32, high word",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+ BPF_ALU32_IMM(BPF_MOV, R1, 12),
+ BPF_ALU64_REG(BPF_ARSH, R0, R1),
+ BPF_ALU64_IMM(BPF_RSH, R0, 32),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0xfff81234 } }
+ },
+ {
+ "ALU64_ARSH_X: Shift > 32, low word",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+ BPF_ALU32_IMM(BPF_MOV, R1, 36),
+ BPF_ALU64_REG(BPF_ARSH, R0, R1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0xf8123456 } }
+ },
+ {
+ "ALU64_ARSH_X: Shift > 32, high word",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+ BPF_ALU32_IMM(BPF_MOV, R1, 36),
+ BPF_ALU64_REG(BPF_ARSH, R0, R1),
+ BPF_ALU64_IMM(BPF_RSH, R0, 32),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, -1 } }
+ },
+ {
+ "ALU64_ARSH_X: Shift == 32, low word",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+ BPF_ALU32_IMM(BPF_MOV, R1, 32),
+ BPF_ALU64_REG(BPF_ARSH, R0, R1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x81234567 } }
+ },
+ {
+ "ALU64_ARSH_X: Shift == 32, high word",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+ BPF_ALU32_IMM(BPF_MOV, R1, 32),
+ BPF_ALU64_REG(BPF_ARSH, R0, R1),
+ BPF_ALU64_IMM(BPF_RSH, R0, 32),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, -1 } }
+ },
+ {
+ "ALU64_ARSH_X: Zero shift, low word",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+ BPF_ALU32_IMM(BPF_MOV, R1, 0),
+ BPF_ALU64_REG(BPF_ARSH, R0, R1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x89abcdef } }
+ },
+ {
+ "ALU64_ARSH_X: Zero shift, high word",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+ BPF_ALU32_IMM(BPF_MOV, R1, 0),
+ BPF_ALU64_REG(BPF_ARSH, R0, R1),
+ BPF_ALU64_IMM(BPF_RSH, R0, 32),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x81234567 } }
+ },
+ /* BPF_ALU | BPF_ARSH | BPF_K */
+ {
+ "ALU32_ARSH_K: -1234 >> 7 = -10",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, -1234),
+ BPF_ALU32_IMM(BPF_ARSH, R0, 7),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, -10 } }
+ },
+ {
+ "ALU32_ARSH_K: -1234 >> 0 = -1234",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, -1234),
+ BPF_ALU32_IMM(BPF_ARSH, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, -1234 } }
+ },
+ {
+ "ALU64_ARSH_K: 0xff00ff0000000000 >> 40 = 0xffffffffffff00ff",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0xff00ff0000000000LL),
+ BPF_ALU64_IMM(BPF_ARSH, R0, 40),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0xffff00ff } },
+ },
+ {
+ "ALU64_ARSH_K: Shift < 32, low word",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+ BPF_ALU64_IMM(BPF_RSH, R0, 12),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x56789abc } }
+ },
+ {
+ "ALU64_ARSH_K: Shift < 32, high word",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+ BPF_ALU64_IMM(BPF_ARSH, R0, 12),
+ BPF_ALU64_IMM(BPF_RSH, R0, 32),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0xfff81234 } }
+ },
+ {
+ "ALU64_ARSH_K: Shift > 32, low word",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+ BPF_ALU64_IMM(BPF_ARSH, R0, 36),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0xf8123456 } }
+ },
+ {
+ "ALU64_ARSH_K: Shift > 32, high word",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0xf123456789abcdefLL),
+ BPF_ALU64_IMM(BPF_ARSH, R0, 36),
+ BPF_ALU64_IMM(BPF_RSH, R0, 32),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, -1 } }
+ },
+ {
+ "ALU64_ARSH_K: Shift == 32, low word",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+ BPF_ALU64_IMM(BPF_ARSH, R0, 32),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x81234567 } }
+ },
+ {
+ "ALU64_ARSH_K: Shift == 32, high word",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+ BPF_ALU64_IMM(BPF_ARSH, R0, 32),
+ BPF_ALU64_IMM(BPF_RSH, R0, 32),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, -1 } }
+ },
+ {
+ "ALU64_ARSH_K: Zero shoft",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+ BPF_ALU64_IMM(BPF_ARSH, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x89abcdef } }
+ },
+ /* BPF_ALU | BPF_NEG */
+ {
+ "ALU_NEG: -(3) = -3",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, 3),
+ BPF_ALU32_IMM(BPF_NEG, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, -3 } },
+ },
+ {
+ "ALU_NEG: -(-3) = 3",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, -3),
+ BPF_ALU32_IMM(BPF_NEG, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 3 } },
+ },
+ {
+ "ALU64_NEG: -(3) = -3",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 3),
+ BPF_ALU64_IMM(BPF_NEG, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, -3 } },
+ },
+ {
+ "ALU64_NEG: -(-3) = 3",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, -3),
+ BPF_ALU64_IMM(BPF_NEG, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 3 } },
+ },
+ /* BPF_ALU | BPF_END | BPF_FROM_BE */
+ {
+ "ALU_END_FROM_BE 16: 0x0123456789abcdef -> 0xcdef",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+ BPF_ENDIAN(BPF_FROM_BE, R0, 16),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, cpu_to_be16(0xcdef) } },
+ },
+ {
+ "ALU_END_FROM_BE 32: 0x0123456789abcdef -> 0x89abcdef",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+ BPF_ENDIAN(BPF_FROM_BE, R0, 32),
+ BPF_ALU64_REG(BPF_MOV, R1, R0),
+ BPF_ALU64_IMM(BPF_RSH, R1, 32),
+ BPF_ALU32_REG(BPF_ADD, R0, R1), /* R1 = 0 */
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, cpu_to_be32(0x89abcdef) } },
+ },
+ {
+ "ALU_END_FROM_BE 64: 0x0123456789abcdef -> 0x89abcdef",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+ BPF_ENDIAN(BPF_FROM_BE, R0, 64),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, (u32) cpu_to_be64(0x0123456789abcdefLL) } },
+ },
+ /* BPF_ALU | BPF_END | BPF_FROM_LE */
+ {
+ "ALU_END_FROM_LE 16: 0x0123456789abcdef -> 0xefcd",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+ BPF_ENDIAN(BPF_FROM_LE, R0, 16),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, cpu_to_le16(0xcdef) } },
+ },
+ {
+ "ALU_END_FROM_LE 32: 0x0123456789abcdef -> 0xefcdab89",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+ BPF_ENDIAN(BPF_FROM_LE, R0, 32),
+ BPF_ALU64_REG(BPF_MOV, R1, R0),
+ BPF_ALU64_IMM(BPF_RSH, R1, 32),
+ BPF_ALU32_REG(BPF_ADD, R0, R1), /* R1 = 0 */
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, cpu_to_le32(0x89abcdef) } },
+ },
+ {
+ "ALU_END_FROM_LE 64: 0x0123456789abcdef -> 0x67452301",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+ BPF_ENDIAN(BPF_FROM_LE, R0, 64),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, (u32) cpu_to_le64(0x0123456789abcdefLL) } },
+ },
+ /* BPF_ST(X) | BPF_MEM | BPF_B/H/W/DW */
+ {
+ "ST_MEM_B: Store/Load byte: max negative",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, 1),
+ BPF_ST_MEM(BPF_B, R10, -40, 0xff),
+ BPF_LDX_MEM(BPF_B, R0, R10, -40),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0xff } },
+ .stack_depth = 40,
+ },
+ {
+ "ST_MEM_B: Store/Load byte: max positive",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, 1),
+ BPF_ST_MEM(BPF_H, R10, -40, 0x7f),
+ BPF_LDX_MEM(BPF_H, R0, R10, -40),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x7f } },
+ .stack_depth = 40,
+ },
+ {
+ "STX_MEM_B: Store/Load byte: max negative",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0),
+ BPF_LD_IMM64(R1, 0xffLL),
+ BPF_STX_MEM(BPF_B, R10, R1, -40),
+ BPF_LDX_MEM(BPF_B, R0, R10, -40),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0xff } },
+ .stack_depth = 40,
+ },
+ {
+ "ST_MEM_H: Store/Load half word: max negative",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, 1),
+ BPF_ST_MEM(BPF_H, R10, -40, 0xffff),
+ BPF_LDX_MEM(BPF_H, R0, R10, -40),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0xffff } },
+ .stack_depth = 40,
+ },
+ {
+ "ST_MEM_H: Store/Load half word: max positive",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, 1),
+ BPF_ST_MEM(BPF_H, R10, -40, 0x7fff),
+ BPF_LDX_MEM(BPF_H, R0, R10, -40),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x7fff } },
+ .stack_depth = 40,
+ },
+ {
+ "STX_MEM_H: Store/Load half word: max negative",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0),
+ BPF_LD_IMM64(R1, 0xffffLL),
+ BPF_STX_MEM(BPF_H, R10, R1, -40),
+ BPF_LDX_MEM(BPF_H, R0, R10, -40),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0xffff } },
+ .stack_depth = 40,
+ },
+ {
+ "ST_MEM_W: Store/Load word: max negative",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, 1),
+ BPF_ST_MEM(BPF_W, R10, -40, 0xffffffff),
+ BPF_LDX_MEM(BPF_W, R0, R10, -40),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0xffffffff } },
+ .stack_depth = 40,
+ },
+ {
+ "ST_MEM_W: Store/Load word: max positive",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, 1),
+ BPF_ST_MEM(BPF_W, R10, -40, 0x7fffffff),
+ BPF_LDX_MEM(BPF_W, R0, R10, -40),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x7fffffff } },
+ .stack_depth = 40,
+ },
+ {
+ "STX_MEM_W: Store/Load word: max negative",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0),
+ BPF_LD_IMM64(R1, 0xffffffffLL),
+ BPF_STX_MEM(BPF_W, R10, R1, -40),
+ BPF_LDX_MEM(BPF_W, R0, R10, -40),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0xffffffff } },
+ .stack_depth = 40,
+ },
+ {
+ "ST_MEM_DW: Store/Load double word: max negative",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, 1),
+ BPF_ST_MEM(BPF_DW, R10, -40, 0xffffffff),
+ BPF_LDX_MEM(BPF_DW, R0, R10, -40),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0xffffffff } },
+ .stack_depth = 40,
+ },
+ {
+ "ST_MEM_DW: Store/Load double word: max negative 2",
+ .u.insns_int = {
+ BPF_LD_IMM64(R2, 0xffff00000000ffffLL),
+ BPF_LD_IMM64(R3, 0xffffffffffffffffLL),
+ BPF_ST_MEM(BPF_DW, R10, -40, 0xffffffff),
+ BPF_LDX_MEM(BPF_DW, R2, R10, -40),
+ BPF_JMP_REG(BPF_JEQ, R2, R3, 2),
+ BPF_MOV32_IMM(R0, 2),
+ BPF_EXIT_INSN(),
+ BPF_MOV32_IMM(R0, 1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x1 } },
+ .stack_depth = 40,
+ },
+ {
+ "ST_MEM_DW: Store/Load double word: max positive",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, 1),
+ BPF_ST_MEM(BPF_DW, R10, -40, 0x7fffffff),
+ BPF_LDX_MEM(BPF_DW, R0, R10, -40),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x7fffffff } },
+ .stack_depth = 40,
+ },
+ {
+ "STX_MEM_DW: Store/Load double word: max negative",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0),
+ BPF_LD_IMM64(R1, 0xffffffffffffffffLL),
+ BPF_STX_MEM(BPF_DW, R10, R1, -40),
+ BPF_LDX_MEM(BPF_DW, R0, R10, -40),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0xffffffff } },
+ .stack_depth = 40,
+ },
+ {
+ "STX_MEM_DW: Store double word: first word in memory",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0),
+ BPF_LD_IMM64(R1, 0x0123456789abcdefLL),
+ BPF_STX_MEM(BPF_DW, R10, R1, -40),
+ BPF_LDX_MEM(BPF_W, R0, R10, -40),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+#ifdef __BIG_ENDIAN
+ { { 0, 0x01234567 } },
+#else
+ { { 0, 0x89abcdef } },
+#endif
+ .stack_depth = 40,
+ },
+ {
+ "STX_MEM_DW: Store double word: second word in memory",
+ .u.insns_int = {
+ BPF_LD_IMM64(R0, 0),
+ BPF_LD_IMM64(R1, 0x0123456789abcdefLL),
+ BPF_STX_MEM(BPF_DW, R10, R1, -40),
+ BPF_LDX_MEM(BPF_W, R0, R10, -36),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+#ifdef __BIG_ENDIAN
+ { { 0, 0x89abcdef } },
+#else
+ { { 0, 0x01234567 } },
+#endif
+ .stack_depth = 40,
+ },
+ /* BPF_STX | BPF_ATOMIC | BPF_W/DW */
+ {
+ "STX_XADD_W: X + 1 + 1 + 1 + ...",
+ { },
+ INTERNAL,
+ { },
+ { { 0, 4134 } },
+ .fill_helper = bpf_fill_stxw,
+ },
+ {
+ "STX_XADD_DW: X + 1 + 1 + 1 + ...",
+ { },
+ INTERNAL,
+ { },
+ { { 0, 4134 } },
+ .fill_helper = bpf_fill_stxdw,
+ },
+ /*
+ * Exhaustive tests of atomic operation variants.
+ * Individual tests are expanded from template macros for all
+ * combinations of ALU operation, word size and fetching.
+ */
+#define BPF_ATOMIC_OP_TEST1(width, op, logic, old, update, result) \
+{ \
+ "BPF_ATOMIC | " #width ", " #op ": Test: " \
+ #old " " #logic " " #update " = " #result, \
+ .u.insns_int = { \
+ BPF_ALU32_IMM(BPF_MOV, R5, update), \
+ BPF_ST_MEM(width, R10, -40, old), \
+ BPF_ATOMIC_OP(width, op, R10, R5, -40), \
+ BPF_LDX_MEM(width, R0, R10, -40), \
+ BPF_EXIT_INSN(), \
+ }, \
+ INTERNAL, \
+ { }, \
+ { { 0, result } }, \
+ .stack_depth = 40, \
+}
+#define BPF_ATOMIC_OP_TEST2(width, op, logic, old, update, result) \
+{ \
+ "BPF_ATOMIC | " #width ", " #op ": Test side effects, r10: " \
+ #old " " #logic " " #update " = " #result, \
+ .u.insns_int = { \
+ BPF_ALU64_REG(BPF_MOV, R1, R10), \
+ BPF_ALU32_IMM(BPF_MOV, R0, update), \
+ BPF_ST_MEM(BPF_W, R10, -40, old), \
+ BPF_ATOMIC_OP(width, op, R10, R0, -40), \
+ BPF_ALU64_REG(BPF_MOV, R0, R10), \
+ BPF_ALU64_REG(BPF_SUB, R0, R1), \
+ BPF_EXIT_INSN(), \
+ }, \
+ INTERNAL, \
+ { }, \
+ { { 0, 0 } }, \
+ .stack_depth = 40, \
+}
+#define BPF_ATOMIC_OP_TEST3(width, op, logic, old, update, result) \
+{ \
+ "BPF_ATOMIC | " #width ", " #op ": Test side effects, r0: " \
+ #old " " #logic " " #update " = " #result, \
+ .u.insns_int = { \
+ BPF_ALU64_REG(BPF_MOV, R0, R10), \
+ BPF_ALU32_IMM(BPF_MOV, R1, update), \
+ BPF_ST_MEM(width, R10, -40, old), \
+ BPF_ATOMIC_OP(width, op, R10, R1, -40), \
+ BPF_ALU64_REG(BPF_SUB, R0, R10), \
+ BPF_EXIT_INSN(), \
+ }, \
+ INTERNAL, \
+ { }, \
+ { { 0, 0 } }, \
+ .stack_depth = 40, \
+}
+#define BPF_ATOMIC_OP_TEST4(width, op, logic, old, update, result) \
+{ \
+ "BPF_ATOMIC | " #width ", " #op ": Test fetch: " \
+ #old " " #logic " " #update " = " #result, \
+ .u.insns_int = { \
+ BPF_ALU32_IMM(BPF_MOV, R3, update), \
+ BPF_ST_MEM(width, R10, -40, old), \
+ BPF_ATOMIC_OP(width, op, R10, R3, -40), \
+ BPF_ALU64_REG(BPF_MOV, R0, R3), \
+ BPF_EXIT_INSN(), \
+ }, \
+ INTERNAL, \
+ { }, \
+ { { 0, (op) & BPF_FETCH ? old : update } }, \
+ .stack_depth = 40, \
+}
+ /* BPF_ATOMIC | BPF_W: BPF_ADD */
+ BPF_ATOMIC_OP_TEST1(BPF_W, BPF_ADD, +, 0x12, 0xab, 0xbd),
+ BPF_ATOMIC_OP_TEST2(BPF_W, BPF_ADD, +, 0x12, 0xab, 0xbd),
+ BPF_ATOMIC_OP_TEST3(BPF_W, BPF_ADD, +, 0x12, 0xab, 0xbd),
+ BPF_ATOMIC_OP_TEST4(BPF_W, BPF_ADD, +, 0x12, 0xab, 0xbd),
+ /* BPF_ATOMIC | BPF_W: BPF_ADD | BPF_FETCH */
+ BPF_ATOMIC_OP_TEST1(BPF_W, BPF_ADD | BPF_FETCH, +, 0x12, 0xab, 0xbd),
+ BPF_ATOMIC_OP_TEST2(BPF_W, BPF_ADD | BPF_FETCH, +, 0x12, 0xab, 0xbd),
+ BPF_ATOMIC_OP_TEST3(BPF_W, BPF_ADD | BPF_FETCH, +, 0x12, 0xab, 0xbd),
+ BPF_ATOMIC_OP_TEST4(BPF_W, BPF_ADD | BPF_FETCH, +, 0x12, 0xab, 0xbd),
+ /* BPF_ATOMIC | BPF_DW: BPF_ADD */
+ BPF_ATOMIC_OP_TEST1(BPF_DW, BPF_ADD, +, 0x12, 0xab, 0xbd),
+ BPF_ATOMIC_OP_TEST2(BPF_DW, BPF_ADD, +, 0x12, 0xab, 0xbd),
+ BPF_ATOMIC_OP_TEST3(BPF_DW, BPF_ADD, +, 0x12, 0xab, 0xbd),
+ BPF_ATOMIC_OP_TEST4(BPF_DW, BPF_ADD, +, 0x12, 0xab, 0xbd),
+ /* BPF_ATOMIC | BPF_DW: BPF_ADD | BPF_FETCH */
+ BPF_ATOMIC_OP_TEST1(BPF_DW, BPF_ADD | BPF_FETCH, +, 0x12, 0xab, 0xbd),
+ BPF_ATOMIC_OP_TEST2(BPF_DW, BPF_ADD | BPF_FETCH, +, 0x12, 0xab, 0xbd),
+ BPF_ATOMIC_OP_TEST3(BPF_DW, BPF_ADD | BPF_FETCH, +, 0x12, 0xab, 0xbd),
+ BPF_ATOMIC_OP_TEST4(BPF_DW, BPF_ADD | BPF_FETCH, +, 0x12, 0xab, 0xbd),
+ /* BPF_ATOMIC | BPF_W: BPF_AND */
+ BPF_ATOMIC_OP_TEST1(BPF_W, BPF_AND, &, 0x12, 0xab, 0x02),
+ BPF_ATOMIC_OP_TEST2(BPF_W, BPF_AND, &, 0x12, 0xab, 0x02),
+ BPF_ATOMIC_OP_TEST3(BPF_W, BPF_AND, &, 0x12, 0xab, 0x02),
+ BPF_ATOMIC_OP_TEST4(BPF_W, BPF_AND, &, 0x12, 0xab, 0x02),
+ /* BPF_ATOMIC | BPF_W: BPF_AND | BPF_FETCH */
+ BPF_ATOMIC_OP_TEST1(BPF_W, BPF_AND | BPF_FETCH, &, 0x12, 0xab, 0x02),
+ BPF_ATOMIC_OP_TEST2(BPF_W, BPF_AND | BPF_FETCH, &, 0x12, 0xab, 0x02),
+ BPF_ATOMIC_OP_TEST3(BPF_W, BPF_AND | BPF_FETCH, &, 0x12, 0xab, 0x02),
+ BPF_ATOMIC_OP_TEST4(BPF_W, BPF_AND | BPF_FETCH, &, 0x12, 0xab, 0x02),
+ /* BPF_ATOMIC | BPF_DW: BPF_AND */
+ BPF_ATOMIC_OP_TEST1(BPF_DW, BPF_AND, &, 0x12, 0xab, 0x02),
+ BPF_ATOMIC_OP_TEST2(BPF_DW, BPF_AND, &, 0x12, 0xab, 0x02),
+ BPF_ATOMIC_OP_TEST3(BPF_DW, BPF_AND, &, 0x12, 0xab, 0x02),
+ BPF_ATOMIC_OP_TEST4(BPF_DW, BPF_AND, &, 0x12, 0xab, 0x02),
+ /* BPF_ATOMIC | BPF_DW: BPF_AND | BPF_FETCH */
+ BPF_ATOMIC_OP_TEST1(BPF_DW, BPF_AND | BPF_FETCH, &, 0x12, 0xab, 0x02),
+ BPF_ATOMIC_OP_TEST2(BPF_DW, BPF_AND | BPF_FETCH, &, 0x12, 0xab, 0x02),
+ BPF_ATOMIC_OP_TEST3(BPF_DW, BPF_AND | BPF_FETCH, &, 0x12, 0xab, 0x02),
+ BPF_ATOMIC_OP_TEST4(BPF_DW, BPF_AND | BPF_FETCH, &, 0x12, 0xab, 0x02),
+ /* BPF_ATOMIC | BPF_W: BPF_OR */
+ BPF_ATOMIC_OP_TEST1(BPF_W, BPF_OR, |, 0x12, 0xab, 0xbb),
+ BPF_ATOMIC_OP_TEST2(BPF_W, BPF_OR, |, 0x12, 0xab, 0xbb),
+ BPF_ATOMIC_OP_TEST3(BPF_W, BPF_OR, |, 0x12, 0xab, 0xbb),
+ BPF_ATOMIC_OP_TEST4(BPF_W, BPF_OR, |, 0x12, 0xab, 0xbb),
+ /* BPF_ATOMIC | BPF_W: BPF_OR | BPF_FETCH */
+ BPF_ATOMIC_OP_TEST1(BPF_W, BPF_OR | BPF_FETCH, |, 0x12, 0xab, 0xbb),
+ BPF_ATOMIC_OP_TEST2(BPF_W, BPF_OR | BPF_FETCH, |, 0x12, 0xab, 0xbb),
+ BPF_ATOMIC_OP_TEST3(BPF_W, BPF_OR | BPF_FETCH, |, 0x12, 0xab, 0xbb),
+ BPF_ATOMIC_OP_TEST4(BPF_W, BPF_OR | BPF_FETCH, |, 0x12, 0xab, 0xbb),
+ /* BPF_ATOMIC | BPF_DW: BPF_OR */
+ BPF_ATOMIC_OP_TEST1(BPF_DW, BPF_OR, |, 0x12, 0xab, 0xbb),
+ BPF_ATOMIC_OP_TEST2(BPF_DW, BPF_OR, |, 0x12, 0xab, 0xbb),
+ BPF_ATOMIC_OP_TEST3(BPF_DW, BPF_OR, |, 0x12, 0xab, 0xbb),
+ BPF_ATOMIC_OP_TEST4(BPF_DW, BPF_OR, |, 0x12, 0xab, 0xbb),
+ /* BPF_ATOMIC | BPF_DW: BPF_OR | BPF_FETCH */
+ BPF_ATOMIC_OP_TEST1(BPF_DW, BPF_OR | BPF_FETCH, |, 0x12, 0xab, 0xbb),
+ BPF_ATOMIC_OP_TEST2(BPF_DW, BPF_OR | BPF_FETCH, |, 0x12, 0xab, 0xbb),
+ BPF_ATOMIC_OP_TEST3(BPF_DW, BPF_OR | BPF_FETCH, |, 0x12, 0xab, 0xbb),
+ BPF_ATOMIC_OP_TEST4(BPF_DW, BPF_OR | BPF_FETCH, |, 0x12, 0xab, 0xbb),
+ /* BPF_ATOMIC | BPF_W: BPF_XOR */
+ BPF_ATOMIC_OP_TEST1(BPF_W, BPF_XOR, ^, 0x12, 0xab, 0xb9),
+ BPF_ATOMIC_OP_TEST2(BPF_W, BPF_XOR, ^, 0x12, 0xab, 0xb9),
+ BPF_ATOMIC_OP_TEST3(BPF_W, BPF_XOR, ^, 0x12, 0xab, 0xb9),
+ BPF_ATOMIC_OP_TEST4(BPF_W, BPF_XOR, ^, 0x12, 0xab, 0xb9),
+ /* BPF_ATOMIC | BPF_W: BPF_XOR | BPF_FETCH */
+ BPF_ATOMIC_OP_TEST1(BPF_W, BPF_XOR | BPF_FETCH, ^, 0x12, 0xab, 0xb9),
+ BPF_ATOMIC_OP_TEST2(BPF_W, BPF_XOR | BPF_FETCH, ^, 0x12, 0xab, 0xb9),
+ BPF_ATOMIC_OP_TEST3(BPF_W, BPF_XOR | BPF_FETCH, ^, 0x12, 0xab, 0xb9),
+ BPF_ATOMIC_OP_TEST4(BPF_W, BPF_XOR | BPF_FETCH, ^, 0x12, 0xab, 0xb9),
+ /* BPF_ATOMIC | BPF_DW: BPF_XOR */
+ BPF_ATOMIC_OP_TEST1(BPF_DW, BPF_XOR, ^, 0x12, 0xab, 0xb9),
+ BPF_ATOMIC_OP_TEST2(BPF_DW, BPF_XOR, ^, 0x12, 0xab, 0xb9),
+ BPF_ATOMIC_OP_TEST3(BPF_DW, BPF_XOR, ^, 0x12, 0xab, 0xb9),
+ BPF_ATOMIC_OP_TEST4(BPF_DW, BPF_XOR, ^, 0x12, 0xab, 0xb9),
+ /* BPF_ATOMIC | BPF_DW: BPF_XOR | BPF_FETCH */
+ BPF_ATOMIC_OP_TEST1(BPF_DW, BPF_XOR | BPF_FETCH, ^, 0x12, 0xab, 0xb9),
+ BPF_ATOMIC_OP_TEST2(BPF_DW, BPF_XOR | BPF_FETCH, ^, 0x12, 0xab, 0xb9),
+ BPF_ATOMIC_OP_TEST3(BPF_DW, BPF_XOR | BPF_FETCH, ^, 0x12, 0xab, 0xb9),
+ BPF_ATOMIC_OP_TEST4(BPF_DW, BPF_XOR | BPF_FETCH, ^, 0x12, 0xab, 0xb9),
+ /* BPF_ATOMIC | BPF_W: BPF_XCHG */
+ BPF_ATOMIC_OP_TEST1(BPF_W, BPF_XCHG, xchg, 0x12, 0xab, 0xab),
+ BPF_ATOMIC_OP_TEST2(BPF_W, BPF_XCHG, xchg, 0x12, 0xab, 0xab),
+ BPF_ATOMIC_OP_TEST3(BPF_W, BPF_XCHG, xchg, 0x12, 0xab, 0xab),
+ BPF_ATOMIC_OP_TEST4(BPF_W, BPF_XCHG, xchg, 0x12, 0xab, 0xab),
+ /* BPF_ATOMIC | BPF_DW: BPF_XCHG */
+ BPF_ATOMIC_OP_TEST1(BPF_DW, BPF_XCHG, xchg, 0x12, 0xab, 0xab),
+ BPF_ATOMIC_OP_TEST2(BPF_DW, BPF_XCHG, xchg, 0x12, 0xab, 0xab),
+ BPF_ATOMIC_OP_TEST3(BPF_DW, BPF_XCHG, xchg, 0x12, 0xab, 0xab),
+ BPF_ATOMIC_OP_TEST4(BPF_DW, BPF_XCHG, xchg, 0x12, 0xab, 0xab),
+#undef BPF_ATOMIC_OP_TEST1
+#undef BPF_ATOMIC_OP_TEST2
+#undef BPF_ATOMIC_OP_TEST3
+#undef BPF_ATOMIC_OP_TEST4
+ /* BPF_ATOMIC | BPF_W, BPF_CMPXCHG */
+ {
+ "BPF_ATOMIC | BPF_W, BPF_CMPXCHG: Test successful return",
+ .u.insns_int = {
+ BPF_ST_MEM(BPF_W, R10, -40, 0x01234567),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0x01234567),
+ BPF_ALU32_IMM(BPF_MOV, R3, 0x89abcdef),
+ BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, R10, R3, -40),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x01234567 } },
+ .stack_depth = 40,
+ },
+ {
+ "BPF_ATOMIC | BPF_W, BPF_CMPXCHG: Test successful store",
+ .u.insns_int = {
+ BPF_ST_MEM(BPF_W, R10, -40, 0x01234567),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0x01234567),
+ BPF_ALU32_IMM(BPF_MOV, R3, 0x89abcdef),
+ BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, R10, R3, -40),
+ BPF_LDX_MEM(BPF_W, R0, R10, -40),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x89abcdef } },
+ .stack_depth = 40,
+ },
+ {
+ "BPF_ATOMIC | BPF_W, BPF_CMPXCHG: Test failure return",
+ .u.insns_int = {
+ BPF_ST_MEM(BPF_W, R10, -40, 0x01234567),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0x76543210),
+ BPF_ALU32_IMM(BPF_MOV, R3, 0x89abcdef),
+ BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, R10, R3, -40),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x01234567 } },
+ .stack_depth = 40,
+ },
+ {
+ "BPF_ATOMIC | BPF_W, BPF_CMPXCHG: Test failure store",
+ .u.insns_int = {
+ BPF_ST_MEM(BPF_W, R10, -40, 0x01234567),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0x76543210),
+ BPF_ALU32_IMM(BPF_MOV, R3, 0x89abcdef),
+ BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, R10, R3, -40),
+ BPF_LDX_MEM(BPF_W, R0, R10, -40),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x01234567 } },
+ .stack_depth = 40,
+ },
+ {
+ "BPF_ATOMIC | BPF_W, BPF_CMPXCHG: Test side effects",
+ .u.insns_int = {
+ BPF_ST_MEM(BPF_W, R10, -40, 0x01234567),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0x01234567),
+ BPF_ALU32_IMM(BPF_MOV, R3, 0x89abcdef),
+ BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, R10, R3, -40),
+ BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, R10, R3, -40),
+ BPF_ALU32_REG(BPF_MOV, R0, R3),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x89abcdef } },
+ .stack_depth = 40,
+ },
+ /* BPF_ATOMIC | BPF_DW, BPF_CMPXCHG */
+ {
+ "BPF_ATOMIC | BPF_DW, BPF_CMPXCHG: Test successful return",
+ .u.insns_int = {
+ BPF_LD_IMM64(R1, 0x0123456789abcdefULL),
+ BPF_LD_IMM64(R2, 0xfecdba9876543210ULL),
+ BPF_ALU64_REG(BPF_MOV, R0, R1),
+ BPF_STX_MEM(BPF_DW, R10, R1, -40),
+ BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, R10, R2, -40),
+ BPF_JMP_REG(BPF_JNE, R0, R1, 1),
+ BPF_ALU64_REG(BPF_SUB, R0, R1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0 } },
+ .stack_depth = 40,
+ },
+ {
+ "BPF_ATOMIC | BPF_DW, BPF_CMPXCHG: Test successful store",
+ .u.insns_int = {
+ BPF_LD_IMM64(R1, 0x0123456789abcdefULL),
+ BPF_LD_IMM64(R2, 0xfecdba9876543210ULL),
+ BPF_ALU64_REG(BPF_MOV, R0, R1),
+ BPF_STX_MEM(BPF_DW, R10, R0, -40),
+ BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, R10, R2, -40),
+ BPF_LDX_MEM(BPF_DW, R0, R10, -40),
+ BPF_JMP_REG(BPF_JNE, R0, R2, 1),
+ BPF_ALU64_REG(BPF_SUB, R0, R2),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0 } },
+ .stack_depth = 40,
+ },
+ {
+ "BPF_ATOMIC | BPF_DW, BPF_CMPXCHG: Test failure return",
+ .u.insns_int = {
+ BPF_LD_IMM64(R1, 0x0123456789abcdefULL),
+ BPF_LD_IMM64(R2, 0xfecdba9876543210ULL),
+ BPF_ALU64_REG(BPF_MOV, R0, R1),
+ BPF_ALU64_IMM(BPF_ADD, R0, 1),
+ BPF_STX_MEM(BPF_DW, R10, R1, -40),
+ BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, R10, R2, -40),
+ BPF_JMP_REG(BPF_JNE, R0, R1, 1),
+ BPF_ALU64_REG(BPF_SUB, R0, R1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0 } },
+ .stack_depth = 40,
+ },
+ {
+ "BPF_ATOMIC | BPF_DW, BPF_CMPXCHG: Test failure store",
+ .u.insns_int = {
+ BPF_LD_IMM64(R1, 0x0123456789abcdefULL),
+ BPF_LD_IMM64(R2, 0xfecdba9876543210ULL),
+ BPF_ALU64_REG(BPF_MOV, R0, R1),
+ BPF_ALU64_IMM(BPF_ADD, R0, 1),
+ BPF_STX_MEM(BPF_DW, R10, R1, -40),
+ BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, R10, R2, -40),
+ BPF_LDX_MEM(BPF_DW, R0, R10, -40),
+ BPF_JMP_REG(BPF_JNE, R0, R1, 1),
+ BPF_ALU64_REG(BPF_SUB, R0, R1),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0 } },
+ .stack_depth = 40,
+ },
+ {
+ "BPF_ATOMIC | BPF_DW, BPF_CMPXCHG: Test side effects",
+ .u.insns_int = {
+ BPF_LD_IMM64(R1, 0x0123456789abcdefULL),
+ BPF_LD_IMM64(R2, 0xfecdba9876543210ULL),
+ BPF_ALU64_REG(BPF_MOV, R0, R1),
+ BPF_STX_MEM(BPF_DW, R10, R1, -40),
+ BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, R10, R2, -40),
+ BPF_LD_IMM64(R0, 0xfecdba9876543210ULL),
+ BPF_JMP_REG(BPF_JNE, R0, R2, 1),
+ BPF_ALU64_REG(BPF_SUB, R0, R2),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0 } },
+ .stack_depth = 40,
+ },
+ /* BPF_JMP32 | BPF_JEQ | BPF_K */
+ {
+ "JMP32_JEQ_K: Small immediate",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, 123),
+ BPF_JMP32_IMM(BPF_JEQ, R0, 321, 1),
+ BPF_JMP32_IMM(BPF_JEQ, R0, 123, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 123 } }
+ },
+ {
+ "JMP32_JEQ_K: Large immediate",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, 12345678),
+ BPF_JMP32_IMM(BPF_JEQ, R0, 12345678 & 0xffff, 1),
+ BPF_JMP32_IMM(BPF_JEQ, R0, 12345678, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 12345678 } }
+ },
+ {
+ "JMP32_JEQ_K: negative immediate",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, -123),
+ BPF_JMP32_IMM(BPF_JEQ, R0, 123, 1),
+ BPF_JMP32_IMM(BPF_JEQ, R0, -123, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, -123 } }
+ },
+ /* BPF_JMP32 | BPF_JEQ | BPF_X */
+ {
+ "JMP32_JEQ_X",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, 1234),
+ BPF_ALU32_IMM(BPF_MOV, R1, 4321),
+ BPF_JMP32_REG(BPF_JEQ, R0, R1, 2),
+ BPF_ALU32_IMM(BPF_MOV, R1, 1234),
+ BPF_JMP32_REG(BPF_JEQ, R0, R1, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 1234 } }
+ },
+ /* BPF_JMP32 | BPF_JNE | BPF_K */
+ {
+ "JMP32_JNE_K: Small immediate",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, 123),
+ BPF_JMP32_IMM(BPF_JNE, R0, 123, 1),
+ BPF_JMP32_IMM(BPF_JNE, R0, 321, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 123 } }
+ },
+ {
+ "JMP32_JNE_K: Large immediate",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, 12345678),
+ BPF_JMP32_IMM(BPF_JNE, R0, 12345678, 1),
+ BPF_JMP32_IMM(BPF_JNE, R0, 12345678 & 0xffff, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 12345678 } }
+ },
+ {
+ "JMP32_JNE_K: negative immediate",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, -123),
+ BPF_JMP32_IMM(BPF_JNE, R0, -123, 1),
+ BPF_JMP32_IMM(BPF_JNE, R0, 123, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, -123 } }
+ },
+ /* BPF_JMP32 | BPF_JNE | BPF_X */
+ {
+ "JMP32_JNE_X",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, 1234),
+ BPF_ALU32_IMM(BPF_MOV, R1, 1234),
+ BPF_JMP32_REG(BPF_JNE, R0, R1, 2),
+ BPF_ALU32_IMM(BPF_MOV, R1, 4321),
+ BPF_JMP32_REG(BPF_JNE, R0, R1, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 1234 } }
+ },
+ /* BPF_JMP32 | BPF_JSET | BPF_K */
+ {
+ "JMP32_JSET_K: Small immediate",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, 1),
+ BPF_JMP32_IMM(BPF_JSET, R0, 2, 1),
+ BPF_JMP32_IMM(BPF_JSET, R0, 3, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 1 } }
+ },
+ {
+ "JMP32_JSET_K: Large immediate",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, 0x40000000),
+ BPF_JMP32_IMM(BPF_JSET, R0, 0x3fffffff, 1),
+ BPF_JMP32_IMM(BPF_JSET, R0, 0x60000000, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0x40000000 } }
+ },
+ {
+ "JMP32_JSET_K: negative immediate",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, -123),
+ BPF_JMP32_IMM(BPF_JSET, R0, -1, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, -123 } }
+ },
+ /* BPF_JMP32 | BPF_JSET | BPF_X */
+ {
+ "JMP32_JSET_X",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, 8),
+ BPF_ALU32_IMM(BPF_MOV, R1, 7),
+ BPF_JMP32_REG(BPF_JSET, R0, R1, 2),
+ BPF_ALU32_IMM(BPF_MOV, R1, 8 | 2),
+ BPF_JMP32_REG(BPF_JNE, R0, R1, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 8 } }
+ },
+ /* BPF_JMP32 | BPF_JGT | BPF_K */
+ {
+ "JMP32_JGT_K: Small immediate",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, 123),
+ BPF_JMP32_IMM(BPF_JGT, R0, 123, 1),
+ BPF_JMP32_IMM(BPF_JGT, R0, 122, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 123 } }
+ },
+ {
+ "JMP32_JGT_K: Large immediate",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, 0xfffffffe),
+ BPF_JMP32_IMM(BPF_JGT, R0, 0xffffffff, 1),
+ BPF_JMP32_IMM(BPF_JGT, R0, 0xfffffffd, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0xfffffffe } }
+ },
+ /* BPF_JMP32 | BPF_JGT | BPF_X */
+ {
+ "JMP32_JGT_X",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, 0xfffffffe),
+ BPF_ALU32_IMM(BPF_MOV, R1, 0xffffffff),
+ BPF_JMP32_REG(BPF_JGT, R0, R1, 2),
+ BPF_ALU32_IMM(BPF_MOV, R1, 0xfffffffd),
+ BPF_JMP32_REG(BPF_JGT, R0, R1, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
+ BPF_EXIT_INSN(),
+ },
+ INTERNAL,
+ { },
+ { { 0, 0xfffffffe } }
+ },
+ /* BPF_JMP32 | BPF_JGE | BPF_K */
+ {
+ "JMP32_JGE_K: Small immediate",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, 123),
+ BPF_JMP32_IMM(BPF_JGE, R0, 124, 1),
+ BPF_JMP32_IMM(BPF_JGE, R0, 123, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
BPF_EXIT_INSN(),
},
INTERNAL,
{ },
- { { 0, 0xff } },
- .stack_depth = 40,
+ { { 0, 123 } }
},
{
- "ST_MEM_B: Store/Load byte: max positive",
+ "JMP32_JGE_K: Large immediate",
.u.insns_int = {
- BPF_ALU32_IMM(BPF_MOV, R0, 1),
- BPF_ST_MEM(BPF_H, R10, -40, 0x7f),
- BPF_LDX_MEM(BPF_H, R0, R10, -40),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0xfffffffe),
+ BPF_JMP32_IMM(BPF_JGE, R0, 0xffffffff, 1),
+ BPF_JMP32_IMM(BPF_JGE, R0, 0xfffffffe, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
BPF_EXIT_INSN(),
},
INTERNAL,
{ },
- { { 0, 0x7f } },
- .stack_depth = 40,
+ { { 0, 0xfffffffe } }
},
+ /* BPF_JMP32 | BPF_JGE | BPF_X */
{
- "STX_MEM_B: Store/Load byte: max negative",
+ "JMP32_JGE_X",
.u.insns_int = {
- BPF_LD_IMM64(R0, 0),
- BPF_LD_IMM64(R1, 0xffLL),
- BPF_STX_MEM(BPF_B, R10, R1, -40),
- BPF_LDX_MEM(BPF_B, R0, R10, -40),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0xfffffffe),
+ BPF_ALU32_IMM(BPF_MOV, R1, 0xffffffff),
+ BPF_JMP32_REG(BPF_JGE, R0, R1, 2),
+ BPF_ALU32_IMM(BPF_MOV, R1, 0xfffffffe),
+ BPF_JMP32_REG(BPF_JGE, R0, R1, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
BPF_EXIT_INSN(),
},
INTERNAL,
{ },
- { { 0, 0xff } },
- .stack_depth = 40,
+ { { 0, 0xfffffffe } }
},
+ /* BPF_JMP32 | BPF_JLT | BPF_K */
{
- "ST_MEM_H: Store/Load half word: max negative",
+ "JMP32_JLT_K: Small immediate",
.u.insns_int = {
- BPF_ALU32_IMM(BPF_MOV, R0, 1),
- BPF_ST_MEM(BPF_H, R10, -40, 0xffff),
- BPF_LDX_MEM(BPF_H, R0, R10, -40),
+ BPF_ALU32_IMM(BPF_MOV, R0, 123),
+ BPF_JMP32_IMM(BPF_JLT, R0, 123, 1),
+ BPF_JMP32_IMM(BPF_JLT, R0, 124, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
BPF_EXIT_INSN(),
},
INTERNAL,
{ },
- { { 0, 0xffff } },
- .stack_depth = 40,
+ { { 0, 123 } }
},
{
- "ST_MEM_H: Store/Load half word: max positive",
+ "JMP32_JLT_K: Large immediate",
.u.insns_int = {
- BPF_ALU32_IMM(BPF_MOV, R0, 1),
- BPF_ST_MEM(BPF_H, R10, -40, 0x7fff),
- BPF_LDX_MEM(BPF_H, R0, R10, -40),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0xfffffffe),
+ BPF_JMP32_IMM(BPF_JLT, R0, 0xfffffffd, 1),
+ BPF_JMP32_IMM(BPF_JLT, R0, 0xffffffff, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
BPF_EXIT_INSN(),
},
INTERNAL,
{ },
- { { 0, 0x7fff } },
- .stack_depth = 40,
+ { { 0, 0xfffffffe } }
},
+ /* BPF_JMP32 | BPF_JLT | BPF_X */
{
- "STX_MEM_H: Store/Load half word: max negative",
+ "JMP32_JLT_X",
.u.insns_int = {
- BPF_LD_IMM64(R0, 0),
- BPF_LD_IMM64(R1, 0xffffLL),
- BPF_STX_MEM(BPF_H, R10, R1, -40),
- BPF_LDX_MEM(BPF_H, R0, R10, -40),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0xfffffffe),
+ BPF_ALU32_IMM(BPF_MOV, R1, 0xfffffffd),
+ BPF_JMP32_REG(BPF_JLT, R0, R1, 2),
+ BPF_ALU32_IMM(BPF_MOV, R1, 0xffffffff),
+ BPF_JMP32_REG(BPF_JLT, R0, R1, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
BPF_EXIT_INSN(),
},
INTERNAL,
{ },
- { { 0, 0xffff } },
- .stack_depth = 40,
+ { { 0, 0xfffffffe } }
},
+ /* BPF_JMP32 | BPF_JLE | BPF_K */
{
- "ST_MEM_W: Store/Load word: max negative",
+ "JMP32_JLE_K: Small immediate",
.u.insns_int = {
- BPF_ALU32_IMM(BPF_MOV, R0, 1),
- BPF_ST_MEM(BPF_W, R10, -40, 0xffffffff),
- BPF_LDX_MEM(BPF_W, R0, R10, -40),
+ BPF_ALU32_IMM(BPF_MOV, R0, 123),
+ BPF_JMP32_IMM(BPF_JLE, R0, 122, 1),
+ BPF_JMP32_IMM(BPF_JLE, R0, 123, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
BPF_EXIT_INSN(),
},
INTERNAL,
{ },
- { { 0, 0xffffffff } },
- .stack_depth = 40,
+ { { 0, 123 } }
},
{
- "ST_MEM_W: Store/Load word: max positive",
+ "JMP32_JLE_K: Large immediate",
.u.insns_int = {
- BPF_ALU32_IMM(BPF_MOV, R0, 1),
- BPF_ST_MEM(BPF_W, R10, -40, 0x7fffffff),
- BPF_LDX_MEM(BPF_W, R0, R10, -40),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0xfffffffe),
+ BPF_JMP32_IMM(BPF_JLE, R0, 0xfffffffd, 1),
+ BPF_JMP32_IMM(BPF_JLE, R0, 0xfffffffe, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
BPF_EXIT_INSN(),
},
INTERNAL,
{ },
- { { 0, 0x7fffffff } },
- .stack_depth = 40,
+ { { 0, 0xfffffffe } }
},
+ /* BPF_JMP32 | BPF_JLE | BPF_X */
{
- "STX_MEM_W: Store/Load word: max negative",
+ "JMP32_JLE_X",
.u.insns_int = {
- BPF_LD_IMM64(R0, 0),
- BPF_LD_IMM64(R1, 0xffffffffLL),
- BPF_STX_MEM(BPF_W, R10, R1, -40),
- BPF_LDX_MEM(BPF_W, R0, R10, -40),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0xfffffffe),
+ BPF_ALU32_IMM(BPF_MOV, R1, 0xfffffffd),
+ BPF_JMP32_REG(BPF_JLE, R0, R1, 2),
+ BPF_ALU32_IMM(BPF_MOV, R1, 0xfffffffe),
+ BPF_JMP32_REG(BPF_JLE, R0, R1, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
BPF_EXIT_INSN(),
},
INTERNAL,
{ },
- { { 0, 0xffffffff } },
- .stack_depth = 40,
+ { { 0, 0xfffffffe } }
},
+ /* BPF_JMP32 | BPF_JSGT | BPF_K */
{
- "ST_MEM_DW: Store/Load double word: max negative",
+ "JMP32_JSGT_K: Small immediate",
.u.insns_int = {
- BPF_ALU32_IMM(BPF_MOV, R0, 1),
- BPF_ST_MEM(BPF_DW, R10, -40, 0xffffffff),
- BPF_LDX_MEM(BPF_DW, R0, R10, -40),
+ BPF_ALU32_IMM(BPF_MOV, R0, -123),
+ BPF_JMP32_IMM(BPF_JSGT, R0, -123, 1),
+ BPF_JMP32_IMM(BPF_JSGT, R0, -124, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
BPF_EXIT_INSN(),
},
INTERNAL,
{ },
- { { 0, 0xffffffff } },
- .stack_depth = 40,
+ { { 0, -123 } }
},
{
- "ST_MEM_DW: Store/Load double word: max negative 2",
+ "JMP32_JSGT_K: Large immediate",
.u.insns_int = {
- BPF_LD_IMM64(R2, 0xffff00000000ffffLL),
- BPF_LD_IMM64(R3, 0xffffffffffffffffLL),
- BPF_ST_MEM(BPF_DW, R10, -40, 0xffffffff),
- BPF_LDX_MEM(BPF_DW, R2, R10, -40),
- BPF_JMP_REG(BPF_JEQ, R2, R3, 2),
- BPF_MOV32_IMM(R0, 2),
- BPF_EXIT_INSN(),
- BPF_MOV32_IMM(R0, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, -12345678),
+ BPF_JMP32_IMM(BPF_JSGT, R0, -12345678, 1),
+ BPF_JMP32_IMM(BPF_JSGT, R0, -12345679, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
BPF_EXIT_INSN(),
},
INTERNAL,
{ },
- { { 0, 0x1 } },
- .stack_depth = 40,
+ { { 0, -12345678 } }
},
+ /* BPF_JMP32 | BPF_JSGT | BPF_X */
{
- "ST_MEM_DW: Store/Load double word: max positive",
+ "JMP32_JSGT_X",
.u.insns_int = {
- BPF_ALU32_IMM(BPF_MOV, R0, 1),
- BPF_ST_MEM(BPF_DW, R10, -40, 0x7fffffff),
- BPF_LDX_MEM(BPF_DW, R0, R10, -40),
+ BPF_ALU32_IMM(BPF_MOV, R0, -12345678),
+ BPF_ALU32_IMM(BPF_MOV, R1, -12345678),
+ BPF_JMP32_REG(BPF_JSGT, R0, R1, 2),
+ BPF_ALU32_IMM(BPF_MOV, R1, -12345679),
+ BPF_JMP32_REG(BPF_JSGT, R0, R1, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
BPF_EXIT_INSN(),
},
INTERNAL,
{ },
- { { 0, 0x7fffffff } },
- .stack_depth = 40,
+ { { 0, -12345678 } }
},
+ /* BPF_JMP32 | BPF_JSGE | BPF_K */
{
- "STX_MEM_DW: Store/Load double word: max negative",
+ "JMP32_JSGE_K: Small immediate",
.u.insns_int = {
- BPF_LD_IMM64(R0, 0),
- BPF_LD_IMM64(R1, 0xffffffffffffffffLL),
- BPF_STX_MEM(BPF_W, R10, R1, -40),
- BPF_LDX_MEM(BPF_W, R0, R10, -40),
+ BPF_ALU32_IMM(BPF_MOV, R0, -123),
+ BPF_JMP32_IMM(BPF_JSGE, R0, -122, 1),
+ BPF_JMP32_IMM(BPF_JSGE, R0, -123, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
BPF_EXIT_INSN(),
},
INTERNAL,
{ },
- { { 0, 0xffffffff } },
- .stack_depth = 40,
+ { { 0, -123 } }
},
- /* BPF_STX | BPF_ATOMIC | BPF_W/DW */
{
- "STX_XADD_W: Test: 0x12 + 0x10 = 0x22",
+ "JMP32_JSGE_K: Large immediate",
.u.insns_int = {
- BPF_ALU32_IMM(BPF_MOV, R0, 0x12),
- BPF_ST_MEM(BPF_W, R10, -40, 0x10),
- BPF_ATOMIC_OP(BPF_W, BPF_ADD, R10, R0, -40),
- BPF_LDX_MEM(BPF_W, R0, R10, -40),
+ BPF_ALU32_IMM(BPF_MOV, R0, -12345678),
+ BPF_JMP32_IMM(BPF_JSGE, R0, -12345677, 1),
+ BPF_JMP32_IMM(BPF_JSGE, R0, -12345678, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
BPF_EXIT_INSN(),
},
INTERNAL,
{ },
- { { 0, 0x22 } },
- .stack_depth = 40,
+ { { 0, -12345678 } }
},
+ /* BPF_JMP32 | BPF_JSGE | BPF_X */
{
- "STX_XADD_W: Test side-effects, r10: 0x12 + 0x10 = 0x22",
+ "JMP32_JSGE_X",
.u.insns_int = {
- BPF_ALU64_REG(BPF_MOV, R1, R10),
- BPF_ALU32_IMM(BPF_MOV, R0, 0x12),
- BPF_ST_MEM(BPF_W, R10, -40, 0x10),
- BPF_ATOMIC_OP(BPF_W, BPF_ADD, R10, R0, -40),
- BPF_ALU64_REG(BPF_MOV, R0, R10),
- BPF_ALU64_REG(BPF_SUB, R0, R1),
+ BPF_ALU32_IMM(BPF_MOV, R0, -12345678),
+ BPF_ALU32_IMM(BPF_MOV, R1, -12345677),
+ BPF_JMP32_REG(BPF_JSGE, R0, R1, 2),
+ BPF_ALU32_IMM(BPF_MOV, R1, -12345678),
+ BPF_JMP32_REG(BPF_JSGE, R0, R1, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
BPF_EXIT_INSN(),
},
INTERNAL,
{ },
- { { 0, 0 } },
- .stack_depth = 40,
+ { { 0, -12345678 } }
},
+ /* BPF_JMP32 | BPF_JSLT | BPF_K */
{
- "STX_XADD_W: Test side-effects, r0: 0x12 + 0x10 = 0x22",
+ "JMP32_JSLT_K: Small immediate",
.u.insns_int = {
- BPF_ALU32_IMM(BPF_MOV, R0, 0x12),
- BPF_ST_MEM(BPF_W, R10, -40, 0x10),
- BPF_ATOMIC_OP(BPF_W, BPF_ADD, R10, R0, -40),
+ BPF_ALU32_IMM(BPF_MOV, R0, -123),
+ BPF_JMP32_IMM(BPF_JSLT, R0, -123, 1),
+ BPF_JMP32_IMM(BPF_JSLT, R0, -122, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
BPF_EXIT_INSN(),
},
INTERNAL,
{ },
- { { 0, 0x12 } },
- .stack_depth = 40,
+ { { 0, -123 } }
},
{
- "STX_XADD_W: X + 1 + 1 + 1 + ...",
- { },
+ "JMP32_JSLT_K: Large immediate",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, -12345678),
+ BPF_JMP32_IMM(BPF_JSLT, R0, -12345678, 1),
+ BPF_JMP32_IMM(BPF_JSLT, R0, -12345677, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
+ BPF_EXIT_INSN(),
+ },
INTERNAL,
{ },
- { { 0, 4134 } },
- .fill_helper = bpf_fill_stxw,
+ { { 0, -12345678 } }
},
+ /* BPF_JMP32 | BPF_JSLT | BPF_X */
{
- "STX_XADD_DW: Test: 0x12 + 0x10 = 0x22",
+ "JMP32_JSLT_X",
.u.insns_int = {
- BPF_ALU32_IMM(BPF_MOV, R0, 0x12),
- BPF_ST_MEM(BPF_DW, R10, -40, 0x10),
- BPF_ATOMIC_OP(BPF_DW, BPF_ADD, R10, R0, -40),
- BPF_LDX_MEM(BPF_DW, R0, R10, -40),
+ BPF_ALU32_IMM(BPF_MOV, R0, -12345678),
+ BPF_ALU32_IMM(BPF_MOV, R1, -12345678),
+ BPF_JMP32_REG(BPF_JSLT, R0, R1, 2),
+ BPF_ALU32_IMM(BPF_MOV, R1, -12345677),
+ BPF_JMP32_REG(BPF_JSLT, R0, R1, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
BPF_EXIT_INSN(),
},
INTERNAL,
{ },
- { { 0, 0x22 } },
- .stack_depth = 40,
+ { { 0, -12345678 } }
},
+ /* BPF_JMP32 | BPF_JSLE | BPF_K */
{
- "STX_XADD_DW: Test side-effects, r10: 0x12 + 0x10 = 0x22",
+ "JMP32_JSLE_K: Small immediate",
.u.insns_int = {
- BPF_ALU64_REG(BPF_MOV, R1, R10),
- BPF_ALU32_IMM(BPF_MOV, R0, 0x12),
- BPF_ST_MEM(BPF_DW, R10, -40, 0x10),
- BPF_ATOMIC_OP(BPF_DW, BPF_ADD, R10, R0, -40),
- BPF_ALU64_REG(BPF_MOV, R0, R10),
- BPF_ALU64_REG(BPF_SUB, R0, R1),
+ BPF_ALU32_IMM(BPF_MOV, R0, -123),
+ BPF_JMP32_IMM(BPF_JSLE, R0, -124, 1),
+ BPF_JMP32_IMM(BPF_JSLE, R0, -123, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
BPF_EXIT_INSN(),
},
INTERNAL,
{ },
- { { 0, 0 } },
- .stack_depth = 40,
+ { { 0, -123 } }
},
{
- "STX_XADD_DW: Test side-effects, r0: 0x12 + 0x10 = 0x22",
+ "JMP32_JSLE_K: Large immediate",
.u.insns_int = {
- BPF_ALU32_IMM(BPF_MOV, R0, 0x12),
- BPF_ST_MEM(BPF_DW, R10, -40, 0x10),
- BPF_ATOMIC_OP(BPF_DW, BPF_ADD, R10, R0, -40),
+ BPF_ALU32_IMM(BPF_MOV, R0, -12345678),
+ BPF_JMP32_IMM(BPF_JSLE, R0, -12345679, 1),
+ BPF_JMP32_IMM(BPF_JSLE, R0, -12345678, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
BPF_EXIT_INSN(),
},
INTERNAL,
{ },
- { { 0, 0x12 } },
- .stack_depth = 40,
+ { { 0, -12345678 } }
},
+ /* BPF_JMP32 | BPF_JSLE | BPF_K */
{
- "STX_XADD_DW: X + 1 + 1 + 1 + ...",
- { },
+ "JMP32_JSLE_X",
+ .u.insns_int = {
+ BPF_ALU32_IMM(BPF_MOV, R0, -12345678),
+ BPF_ALU32_IMM(BPF_MOV, R1, -12345679),
+ BPF_JMP32_REG(BPF_JSLE, R0, R1, 2),
+ BPF_ALU32_IMM(BPF_MOV, R1, -12345678),
+ BPF_JMP32_REG(BPF_JSLE, R0, R1, 1),
+ BPF_ALU32_IMM(BPF_MOV, R0, 0),
+ BPF_EXIT_INSN(),
+ },
INTERNAL,
{ },
- { { 0, 4134 } },
- .fill_helper = bpf_fill_stxdw,
+ { { 0, -12345678 } }
},
/* BPF_JMP | BPF_EXIT */
{
{ },
{ { 0, 1 } },
},
+ { /* Mainly checking JIT here. */
+ "BPF_MAXINSNS: Very long conditional jump",
+ { },
+ INTERNAL | FLAG_NO_DATA,
+ { },
+ { { 0, 1 } },
+ .fill_helper = bpf_fill_long_jmp,
+ },
{
"JMP_JA: Jump, gap, jump, ...",
{ },
u64 duration;
u32 ret;
- if (test->test[i].data_size == 0 &&
+ /*
+ * NOTE: Several sub-tests may be present, in which case
+ * a zero {data_size, result} tuple indicates the end of
+ * the sub-test array. The first test is always run,
+ * even if both data_size and result happen to be zero.
+ */
+ if (i > 0 &&
+ test->test[i].data_size == 0 &&
test->test[i].result == 0)
break;
return err_cnt ? -EINVAL : 0;
}
+struct tail_call_test {
+ const char *descr;
+ struct bpf_insn insns[MAX_INSNS];
+ int result;
+ int stack_depth;
+};
+
+/*
+ * Magic marker used in test snippets for tail calls below.
+ * BPF_LD/MOV to R2 and R2 with this immediate value is replaced
+ * with the proper values by the test runner.
+ */
+#define TAIL_CALL_MARKER 0x7a11ca11
+
+/* Special offset to indicate a NULL call target */
+#define TAIL_CALL_NULL 0x7fff
+
+/* Special offset to indicate an out-of-range index */
+#define TAIL_CALL_INVALID 0x7ffe
+
+#define TAIL_CALL(offset) \
+ BPF_LD_IMM64(R2, TAIL_CALL_MARKER), \
+ BPF_RAW_INSN(BPF_ALU | BPF_MOV | BPF_K, R3, 0, \
+ offset, TAIL_CALL_MARKER), \
+ BPF_JMP_IMM(BPF_TAIL_CALL, 0, 0, 0)
+
+/*
+ * Tail call tests. Each test case may call any other test in the table,
+ * including itself, specified as a relative index offset from the calling
+ * test. The index TAIL_CALL_NULL can be used to specify a NULL target
+ * function to test the JIT error path. Similarly, the index TAIL_CALL_INVALID
+ * results in a target index that is out of range.
+ */
+static struct tail_call_test tail_call_tests[] = {
+ {
+ "Tail call leaf",
+ .insns = {
+ BPF_ALU64_REG(BPF_MOV, R0, R1),
+ BPF_ALU64_IMM(BPF_ADD, R0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .result = 1,
+ },
+ {
+ "Tail call 2",
+ .insns = {
+ BPF_ALU64_IMM(BPF_ADD, R1, 2),
+ TAIL_CALL(-1),
+ BPF_ALU64_IMM(BPF_MOV, R0, -1),
+ BPF_EXIT_INSN(),
+ },
+ .result = 3,
+ },
+ {
+ "Tail call 3",
+ .insns = {
+ BPF_ALU64_IMM(BPF_ADD, R1, 3),
+ TAIL_CALL(-1),
+ BPF_ALU64_IMM(BPF_MOV, R0, -1),
+ BPF_EXIT_INSN(),
+ },
+ .result = 6,
+ },
+ {
+ "Tail call 4",
+ .insns = {
+ BPF_ALU64_IMM(BPF_ADD, R1, 4),
+ TAIL_CALL(-1),
+ BPF_ALU64_IMM(BPF_MOV, R0, -1),
+ BPF_EXIT_INSN(),
+ },
+ .result = 10,
+ },
+ {
+ "Tail call error path, max count reached",
+ .insns = {
+ BPF_ALU64_IMM(BPF_ADD, R1, 1),
+ BPF_ALU64_REG(BPF_MOV, R0, R1),
+ TAIL_CALL(0),
+ BPF_EXIT_INSN(),
+ },
+ .result = MAX_TAIL_CALL_CNT + 1,
+ },
+ {
+ "Tail call error path, NULL target",
+ .insns = {
+ BPF_ALU64_IMM(BPF_MOV, R0, -1),
+ TAIL_CALL(TAIL_CALL_NULL),
+ BPF_ALU64_IMM(BPF_MOV, R0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .result = 1,
+ },
+ {
+ "Tail call error path, index out of range",
+ .insns = {
+ BPF_ALU64_IMM(BPF_MOV, R0, -1),
+ TAIL_CALL(TAIL_CALL_INVALID),
+ BPF_ALU64_IMM(BPF_MOV, R0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .result = 1,
+ },
+};
+
+static void __init destroy_tail_call_tests(struct bpf_array *progs)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(tail_call_tests); i++)
+ if (progs->ptrs[i])
+ bpf_prog_free(progs->ptrs[i]);
+ kfree(progs);
+}
+
+static __init int prepare_tail_call_tests(struct bpf_array **pprogs)
+{
+ int ntests = ARRAY_SIZE(tail_call_tests);
+ struct bpf_array *progs;
+ int which, err;
+
+ /* Allocate the table of programs to be used for tall calls */
+ progs = kzalloc(sizeof(*progs) + (ntests + 1) * sizeof(progs->ptrs[0]),
+ GFP_KERNEL);
+ if (!progs)
+ goto out_nomem;
+
+ /* Create all eBPF programs and populate the table */
+ for (which = 0; which < ntests; which++) {
+ struct tail_call_test *test = &tail_call_tests[which];
+ struct bpf_prog *fp;
+ int len, i;
+
+ /* Compute the number of program instructions */
+ for (len = 0; len < MAX_INSNS; len++) {
+ struct bpf_insn *insn = &test->insns[len];
+
+ if (len < MAX_INSNS - 1 &&
+ insn->code == (BPF_LD | BPF_DW | BPF_IMM))
+ len++;
+ if (insn->code == 0)
+ break;
+ }
+
+ /* Allocate and initialize the program */
+ fp = bpf_prog_alloc(bpf_prog_size(len), 0);
+ if (!fp)
+ goto out_nomem;
+
+ fp->len = len;
+ fp->type = BPF_PROG_TYPE_SOCKET_FILTER;
+ fp->aux->stack_depth = test->stack_depth;
+ memcpy(fp->insnsi, test->insns, len * sizeof(struct bpf_insn));
+
+ /* Relocate runtime tail call offsets and addresses */
+ for (i = 0; i < len; i++) {
+ struct bpf_insn *insn = &fp->insnsi[i];
+
+ if (insn->imm != TAIL_CALL_MARKER)
+ continue;
+
+ switch (insn->code) {
+ case BPF_LD | BPF_DW | BPF_IMM:
+ insn[0].imm = (u32)(long)progs;
+ insn[1].imm = ((u64)(long)progs) >> 32;
+ break;
+
+ case BPF_ALU | BPF_MOV | BPF_K:
+ if (insn->off == TAIL_CALL_NULL)
+ insn->imm = ntests;
+ else if (insn->off == TAIL_CALL_INVALID)
+ insn->imm = ntests + 1;
+ else
+ insn->imm = which + insn->off;
+ insn->off = 0;
+ }
+ }
+
+ fp = bpf_prog_select_runtime(fp, &err);
+ if (err)
+ goto out_err;
+
+ progs->ptrs[which] = fp;
+ }
+
+ /* The last entry contains a NULL program pointer */
+ progs->map.max_entries = ntests + 1;
+ *pprogs = progs;
+ return 0;
+
+out_nomem:
+ err = -ENOMEM;
+
+out_err:
+ if (progs)
+ destroy_tail_call_tests(progs);
+ return err;
+}
+
+static __init int test_tail_calls(struct bpf_array *progs)
+{
+ int i, err_cnt = 0, pass_cnt = 0;
+ int jit_cnt = 0, run_cnt = 0;
+
+ for (i = 0; i < ARRAY_SIZE(tail_call_tests); i++) {
+ struct tail_call_test *test = &tail_call_tests[i];
+ struct bpf_prog *fp = progs->ptrs[i];
+ u64 duration;
+ int ret;
+
+ cond_resched();
+
+ pr_info("#%d %s ", i, test->descr);
+ if (!fp) {
+ err_cnt++;
+ continue;
+ }
+ pr_cont("jited:%u ", fp->jited);
+
+ run_cnt++;
+ if (fp->jited)
+ jit_cnt++;
+
+ ret = __run_one(fp, NULL, MAX_TESTRUNS, &duration);
+ if (ret == test->result) {
+ pr_cont("%lld PASS", duration);
+ pass_cnt++;
+ } else {
+ pr_cont("ret %d != %d FAIL", ret, test->result);
+ err_cnt++;
+ }
+ }
+
+ pr_info("%s: Summary: %d PASSED, %d FAILED, [%d/%d JIT'ed]\n",
+ __func__, pass_cnt, err_cnt, jit_cnt, run_cnt);
+
+ return err_cnt ? -EINVAL : 0;
+}
+
static int __init test_bpf_init(void)
{
+ struct bpf_array *progs = NULL;
int ret;
ret = prepare_bpf_tests();
if (ret)
return ret;
+ ret = prepare_tail_call_tests(&progs);
+ if (ret)
+ return ret;
+ ret = test_tail_calls(progs);
+ destroy_tail_call_tests(progs);
+ if (ret)
+ return ret;
+
return test_skb_segment();
}
blkcg_unpin_online(blkcg);
fprop_local_destroy_percpu(&wb->memcg_completions);
- percpu_ref_exit(&wb->refcnt);
spin_lock_irq(&cgwb_lock);
list_del(&wb->offline_node);
spin_unlock_irq(&cgwb_lock);
+ percpu_ref_exit(&wb->refcnt);
wb_exit(wb);
WARN_ON_ONCE(!list_empty(&wb->b_attached));
kfree_rcu(wb, rcu);
void *__kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags)
{
+ /*
+ * Perform size check before switching kfence_allocation_gate, so that
+ * we don't disable KFENCE without making an allocation.
+ */
+ if (size > PAGE_SIZE)
+ return NULL;
+
+ /*
+ * Skip allocations from non-default zones, including DMA. We cannot
+ * guarantee that pages in the KFENCE pool will have the requested
+ * properties (e.g. reside in DMAable memory).
+ */
+ if ((flags & GFP_ZONEMASK) ||
+ (s->flags & (SLAB_CACHE_DMA | SLAB_CACHE_DMA32)))
+ return NULL;
+
/*
* allocation_gate only needs to become non-zero, so it doesn't make
* sense to continue writing to it and pay the associated contention
if (!READ_ONCE(kfence_enabled))
return NULL;
- if (size > PAGE_SIZE)
- return NULL;
-
return kfence_guarded_alloc(s, size, flags);
}
tracepoint_synchronize_unregister();
}
-late_initcall(kfence_test_init);
+late_initcall_sync(kfence_test_init);
module_exit(kfence_test_exit);
MODULE_LICENSE("GPL v2");
return true;
/* skip hotpluggable memory regions if needed */
- if (movable_node_is_enabled() && memblock_is_hotpluggable(m))
+ if (movable_node_is_enabled() && memblock_is_hotpluggable(m) &&
+ !(flags & MEMBLOCK_HOTPLUG))
return true;
/* if we want mirror memory skip non-mirror memory regions */
unsigned long val;
if (mem_cgroup_is_root(memcg)) {
- cgroup_rstat_flush(memcg->css.cgroup);
+ /* mem_cgroup_threshold() calls here from irqsafe context */
+ cgroup_rstat_flush_irqsafe(memcg->css.cgroup);
val = memcg_page_state(memcg, NR_FILE_PAGES) +
memcg_page_state(memcg, NR_ANON_MAPPED);
if (swap)
return ret;
}
- if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd)))
+ if (vmf->prealloc_pte) {
+ vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
+ if (likely(pmd_none(*vmf->pmd))) {
+ mm_inc_nr_ptes(vma->vm_mm);
+ pmd_populate(vma->vm_mm, vmf->pmd, vmf->prealloc_pte);
+ vmf->prealloc_pte = NULL;
+ }
+ spin_unlock(vmf->ptl);
+ } else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd))) {
return VM_FAULT_OOM;
+ }
}
/* See comment in handle_pte_fault() */
LIST_HEAD(migratepages);
new_page_t *new;
bool compound;
- unsigned int nr_pages = thp_nr_pages(page);
+ int nr_pages = thp_nr_pages(page);
/*
* PTE mapped THP or HugeTLB page can't reach here so the page could
#define TRACE_MMAP_LOCK_EVENT(type, mm, ...) \
do { \
const char *memcg_path; \
- preempt_disable(); \
+ local_lock(&memcg_paths.lock); \
memcg_path = get_mm_memcg_path(mm); \
trace_mmap_lock_##type(mm, \
memcg_path != NULL ? memcg_path : "", \
##__VA_ARGS__); \
if (likely(memcg_path != NULL)) \
put_memcg_path_buf(); \
- preempt_enable(); \
+ local_unlock(&memcg_paths.lock); \
} while (0)
#else /* !CONFIG_MEMCG */
}
#endif
- if (_init_on_alloc_enabled_early) {
- if (page_poisoning_requested)
- pr_info("mem auto-init: CONFIG_PAGE_POISONING is on, "
- "will take precedence over init_on_alloc\n");
- else
- static_branch_enable(&init_on_alloc);
- }
- if (_init_on_free_enabled_early) {
- if (page_poisoning_requested)
- pr_info("mem auto-init: CONFIG_PAGE_POISONING is on, "
- "will take precedence over init_on_free\n");
- else
- static_branch_enable(&init_on_free);
+ if ((_init_on_alloc_enabled_early || _init_on_free_enabled_early) &&
+ page_poisoning_requested) {
+ pr_info("mem auto-init: CONFIG_PAGE_POISONING is on, "
+ "will take precedence over init_on_alloc and init_on_free\n");
+ _init_on_alloc_enabled_early = false;
+ _init_on_free_enabled_early = false;
}
+ if (_init_on_alloc_enabled_early)
+ static_branch_enable(&init_on_alloc);
+ else
+ static_branch_disable(&init_on_alloc);
+
+ if (_init_on_free_enabled_early)
+ static_branch_enable(&init_on_free);
+ else
+ static_branch_disable(&init_on_free);
+
#ifdef CONFIG_DEBUG_PAGEALLOC
if (!debug_pagealloc_enabled())
return;
}
const struct address_space_operations secretmem_aops = {
+ .set_page_dirty = __set_page_dirty_no_writeback,
.freepage = secretmem_freepage,
.migratepage = secretmem_migratepage,
.isolate_page = secretmem_isolate_page,
continue;
page = virt_to_head_page(p[i]);
- objcgs = page_objcgs(page);
+ objcgs = page_objcgs_check(page);
if (!objcgs)
continue;
struct kmem_cache *s;
};
+static inline void free_nonslab_page(struct page *page)
+{
+ unsigned int order = compound_order(page);
+
+ VM_BUG_ON_PAGE(!PageCompound(page), page);
+ kfree_hook(page_address(page));
+ mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B, -(PAGE_SIZE << order));
+ __free_pages(page, order);
+}
+
/*
* This function progressively scans the array with free objects (with
* a limited look ahead) and extract objects belonging to the same
if (!s) {
/* Handle kalloc'ed objects */
if (unlikely(!PageSlab(page))) {
- BUG_ON(!PageCompound(page));
- kfree_hook(object);
- __free_pages(page, compound_order(page));
+ free_nonslab_page(page);
p[size] = NULL; /* mark object processed */
return size;
}
page = virt_to_head_page(x);
if (unlikely(!PageSlab(page))) {
- unsigned int order = compound_order(page);
-
- BUG_ON(!PageCompound(page));
- kfree_hook(object);
- mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B,
- -(PAGE_SIZE << order));
- __free_pages(page, order);
+ free_nonslab_page(page);
return;
}
slab_free(page->slab_cache, page, object, NULL, 1, _RET_IP_);
skb_pull(skb, AX25_KISS_HEADER_LEN);
if (digipeat != NULL) {
- if ((ourskb = ax25_rt_build_path(skb, src, dst, route->digipeat)) == NULL) {
- kfree_skb(skb);
+ if ((ourskb = ax25_rt_build_path(skb, src, dst, route->digipeat)) == NULL)
goto put;
- }
skb = ourskb;
}
void ax25_transmit_buffer(ax25_cb *ax25, struct sk_buff *skb, int type)
{
- struct sk_buff *skbn;
unsigned char *ptr;
int headroom;
headroom = ax25_addr_size(ax25->digipeat);
- if (skb_headroom(skb) < headroom) {
- if ((skbn = skb_realloc_headroom(skb, headroom)) == NULL) {
+ if (unlikely(skb_headroom(skb) < headroom)) {
+ skb = skb_expand_head(skb, headroom);
+ if (!skb) {
printk(KERN_CRIT "AX.25: ax25_transmit_buffer - out of memory\n");
- kfree_skb(skb);
return;
}
-
- if (skb->sk != NULL)
- skb_set_owner_w(skbn, skb->sk);
-
- consume_skb(skb);
- skb = skbn;
}
ptr = skb_push(skb, headroom);
struct sk_buff *ax25_rt_build_path(struct sk_buff *skb, ax25_address *src,
ax25_address *dest, ax25_digi *digi)
{
- struct sk_buff *skbn;
unsigned char *bp;
int len;
len = digi->ndigi * AX25_ADDR_LEN;
- if (skb_headroom(skb) < len) {
- if ((skbn = skb_realloc_headroom(skb, len)) == NULL) {
+ if (unlikely(skb_headroom(skb) < len)) {
+ skb = skb_expand_head(skb, len);
+ if (!skb) {
printk(KERN_CRIT "AX.25: ax25_dg_build_path - out of memory\n");
return NULL;
}
-
- if (skb->sk != NULL)
- skb_set_owner_w(skbn, skb->sk);
-
- consume_skb(skb);
-
- skb = skbn;
}
bp = skb_push(skb, len);
if (primary_if)
batadv_hardif_put(primary_if);
- if (soft_iface)
- dev_put(soft_iface);
+ dev_put(soft_iface);
return ret;
}
if (primary_if)
batadv_hardif_put(primary_if);
- if (soft_iface)
- dev_put(soft_iface);
+ dev_put(soft_iface);
return ret;
}
if (primary_if)
batadv_hardif_put(primary_if);
- if (soft_iface)
- dev_put(soft_iface);
+ dev_put(soft_iface);
return ret;
}
out:
if (primary_if)
batadv_hardif_put(primary_if);
- if (soft_iface)
- dev_put(soft_iface);
+ dev_put(soft_iface);
return ret;
}
upper = netdev_master_upper_dev_get_rcu(upper);
} while (upper && !(upper->priv_flags & IFF_EBRIDGE));
- if (upper)
- dev_hold(upper);
+ dev_hold(upper);
rcu_read_unlock();
return upper;
}
out:
- if (bridge)
- dev_put(bridge);
+ dev_put(bridge);
return ret4 + ret6;
}
}
out:
- if (soft_iface)
- dev_put(soft_iface);
+ dev_put(soft_iface);
if (!ret && primary_if)
*primary_if = hard_iface;
out:
if (hardif)
batadv_hardif_put(hardif);
- if (hard_iface)
- dev_put(hard_iface);
+ dev_put(hard_iface);
if (primary_if)
batadv_hardif_put(primary_if);
- if (soft_iface)
- dev_put(soft_iface);
+ dev_put(soft_iface);
return ret;
}
out:
if (hardif)
batadv_hardif_put(hardif);
- if (hard_iface)
- dev_put(hard_iface);
+ dev_put(hard_iface);
if (primary_if)
batadv_hardif_put(primary_if);
- if (soft_iface)
- dev_put(soft_iface);
+ dev_put(soft_iface);
return ret;
}
out:
if (in_hardif)
batadv_hardif_put(in_hardif);
- if (in_dev)
- dev_put(in_dev);
+ dev_put(in_dev);
if (tt_local)
batadv_tt_local_entry_put(tt_local);
if (tt_global)
out:
if (primary_if)
batadv_hardif_put(primary_if);
- if (soft_iface)
- dev_put(soft_iface);
+ dev_put(soft_iface);
cb->args[0] = bucket;
cb->args[1] = idx;
out:
if (primary_if)
batadv_hardif_put(primary_if);
- if (soft_iface)
- dev_put(soft_iface);
+ dev_put(soft_iface);
cb->args[0] = bucket;
cb->args[1] = idx;
/* Unregister HCI device */
void hci_unregister_dev(struct hci_dev *hdev)
{
- int id;
-
BT_DBG("%p name %s bus %d", hdev, hdev->name, hdev->bus);
hci_dev_set_flag(hdev, HCI_UNREGISTER);
- id = hdev->id;
-
write_lock(&hci_dev_list_lock);
list_del(&hdev->list);
write_unlock(&hci_dev_list_lock);
}
device_del(&hdev->dev);
+ /* Actual cleanup is deferred until hci_cleanup_dev(). */
+ hci_dev_put(hdev);
+}
+EXPORT_SYMBOL(hci_unregister_dev);
+/* Cleanup HCI device */
+void hci_cleanup_dev(struct hci_dev *hdev)
+{
debugfs_remove_recursive(hdev->debugfs);
kfree_const(hdev->hw_info);
kfree_const(hdev->fw_info);
hci_blocked_keys_clear(hdev);
hci_dev_unlock(hdev);
- hci_dev_put(hdev);
-
- ida_simple_remove(&hci_index_ida, id);
+ ida_simple_remove(&hci_index_ida, hdev->id);
}
-EXPORT_SYMBOL(hci_unregister_dev);
/* Suspend HCI device */
int hci_suspend_dev(struct hci_dev *hdev)
char comm[TASK_COMM_LEN];
};
+static struct hci_dev *hci_hdev_from_sock(struct sock *sk)
+{
+ struct hci_dev *hdev = hci_pi(sk)->hdev;
+
+ if (!hdev)
+ return ERR_PTR(-EBADFD);
+ if (hci_dev_test_flag(hdev, HCI_UNREGISTER))
+ return ERR_PTR(-EPIPE);
+ return hdev;
+}
+
void hci_sock_set_flag(struct sock *sk, int nr)
{
set_bit(nr, &hci_pi(sk)->flags);
if (event == HCI_DEV_UNREG) {
struct sock *sk;
- /* Detach sockets from device */
+ /* Wake up sockets using this dead device */
read_lock(&hci_sk_list.lock);
sk_for_each(sk, &hci_sk_list.head) {
- lock_sock(sk);
if (hci_pi(sk)->hdev == hdev) {
- hci_pi(sk)->hdev = NULL;
sk->sk_err = EPIPE;
- sk->sk_state = BT_OPEN;
sk->sk_state_change(sk);
-
- hci_dev_put(hdev);
}
- release_sock(sk);
}
read_unlock(&hci_sk_list.lock);
}
static int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd,
unsigned long arg)
{
- struct hci_dev *hdev = hci_pi(sk)->hdev;
+ struct hci_dev *hdev = hci_hdev_from_sock(sk);
- if (!hdev)
- return -EBADFD;
+ if (IS_ERR(hdev))
+ return PTR_ERR(hdev);
if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL))
return -EBUSY;
lock_sock(sk);
+ /* Allow detaching from dead device and attaching to alive device, if
+ * the caller wants to re-bind (instead of close) this socket in
+ * response to hci_sock_dev_event(HCI_DEV_UNREG) notification.
+ */
+ hdev = hci_pi(sk)->hdev;
+ if (hdev && hci_dev_test_flag(hdev, HCI_UNREGISTER)) {
+ hci_pi(sk)->hdev = NULL;
+ sk->sk_state = BT_OPEN;
+ hci_dev_put(hdev);
+ }
+ hdev = NULL;
+
if (sk->sk_state == BT_BOUND) {
err = -EALREADY;
goto done;
lock_sock(sk);
- hdev = hci_pi(sk)->hdev;
- if (!hdev) {
- err = -EBADFD;
+ hdev = hci_hdev_from_sock(sk);
+ if (IS_ERR(hdev)) {
+ err = PTR_ERR(hdev);
goto done;
}
goto done;
}
- hdev = hci_pi(sk)->hdev;
- if (!hdev) {
- err = -EBADFD;
+ hdev = hci_hdev_from_sock(sk);
+ if (IS_ERR(hdev)) {
+ err = PTR_ERR(hdev);
goto done;
}
static void bt_host_release(struct device *dev)
{
struct hci_dev *hdev = to_hci_dev(dev);
+
+ if (hci_dev_test_flag(hdev, HCI_UNREGISTER))
+ hci_cleanup_dev(hdev);
kfree(hdev);
module_put(THIS_MODULE);
}
static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
u32 *retval, u32 *time, bool xdp)
{
- struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = { NULL };
+ struct bpf_prog_array_item item = {.prog = prog};
+ struct bpf_run_ctx *old_ctx;
+ struct bpf_cg_run_ctx run_ctx;
struct bpf_test_timer t = { NO_MIGRATE };
enum bpf_cgroup_storage_type stype;
int ret;
for_each_cgroup_storage_type(stype) {
- storage[stype] = bpf_cgroup_storage_alloc(prog, stype);
- if (IS_ERR(storage[stype])) {
- storage[stype] = NULL;
+ item.cgroup_storage[stype] = bpf_cgroup_storage_alloc(prog, stype);
+ if (IS_ERR(item.cgroup_storage[stype])) {
+ item.cgroup_storage[stype] = NULL;
for_each_cgroup_storage_type(stype)
- bpf_cgroup_storage_free(storage[stype]);
+ bpf_cgroup_storage_free(item.cgroup_storage[stype]);
return -ENOMEM;
}
}
repeat = 1;
bpf_test_timer_enter(&t);
+ old_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
do {
- ret = bpf_cgroup_storage_set(storage);
- if (ret)
- break;
-
+ run_ctx.prog_item = &item;
if (xdp)
*retval = bpf_prog_run_xdp(prog, ctx);
else
*retval = BPF_PROG_RUN(prog, ctx);
-
- bpf_cgroup_storage_unset();
} while (bpf_test_timer_continue(&t, repeat, &ret, time));
+ bpf_reset_run_ctx(old_ctx);
bpf_test_timer_leave(&t);
for_each_cgroup_storage_type(stype)
- bpf_cgroup_storage_free(storage[stype]);
+ bpf_cgroup_storage_free(item.cgroup_storage[stype]);
return ret;
}
if (prog->expected_attach_type == BPF_XDP_DEVMAP ||
prog->expected_attach_type == BPF_XDP_CPUMAP)
return -EINVAL;
- if (kattr->test.ctx_in || kattr->test.ctx_out)
- return -EINVAL;
+
ctx = bpf_ctx_init(kattr, sizeof(struct xdp_md));
if (IS_ERR(ctx))
return PTR_ERR(ctx);
case SWITCHDEV_FDB_ADD_TO_BRIDGE:
fdb_info = ptr;
err = br_fdb_external_learn_add(br, p, fdb_info->addr,
- fdb_info->vid, false);
+ fdb_info->vid,
+ fdb_info->is_local, false);
if (err) {
err = notifier_from_errno(err);
break;
.notifier_call = br_switchdev_event,
};
+/* called under rtnl_mutex */
+static int br_switchdev_blocking_event(struct notifier_block *nb,
+ unsigned long event, void *ptr)
+{
+ struct netlink_ext_ack *extack = netdev_notifier_info_to_extack(ptr);
+ struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
+ struct switchdev_notifier_brport_info *brport_info;
+ const struct switchdev_brport *b;
+ struct net_bridge_port *p;
+ int err = NOTIFY_DONE;
+
+ p = br_port_get_rtnl(dev);
+ if (!p)
+ goto out;
+
+ switch (event) {
+ case SWITCHDEV_BRPORT_OFFLOADED:
+ brport_info = ptr;
+ b = &brport_info->brport;
+
+ err = br_switchdev_port_offload(p, b->dev, b->ctx,
+ b->atomic_nb, b->blocking_nb,
+ b->tx_fwd_offload, extack);
+ err = notifier_from_errno(err);
+ break;
+ case SWITCHDEV_BRPORT_UNOFFLOADED:
+ brport_info = ptr;
+ b = &brport_info->brport;
+
+ br_switchdev_port_unoffload(p, b->ctx, b->atomic_nb,
+ b->blocking_nb);
+ break;
+ }
+
+out:
+ return err;
+}
+
+static struct notifier_block br_switchdev_blocking_notifier = {
+ .notifier_call = br_switchdev_blocking_event,
+};
+
/* br_boolopt_toggle - change user-controlled boolean option
*
* @br: bridge device
if (err)
goto err_out4;
- err = br_netlink_init();
+ err = register_switchdev_blocking_notifier(&br_switchdev_blocking_notifier);
if (err)
goto err_out5;
+ err = br_netlink_init();
+ if (err)
+ goto err_out6;
+
brioctl_set(br_ioctl_stub);
#if IS_ENABLED(CONFIG_ATM_LANE)
return 0;
+err_out6:
+ unregister_switchdev_blocking_notifier(&br_switchdev_blocking_notifier);
err_out5:
unregister_switchdev_notifier(&br_switchdev_notifier);
err_out4:
{
stp_proto_unregister(&br_stp_proto);
br_netlink_fini();
+ unregister_switchdev_blocking_notifier(&br_switchdev_blocking_notifier);
unregister_switchdev_notifier(&br_switchdev_notifier);
unregister_netdevice_notifier(&br_device_notifier);
brioctl_set(NULL);
item.added_by_user = test_bit(BR_FDB_ADDED_BY_USER, &fdb->flags);
item.offloaded = test_bit(BR_FDB_OFFLOADED, &fdb->flags);
item.is_local = test_bit(BR_FDB_LOCAL, &fdb->flags);
- item.info.dev = item.is_local ? br->dev : p->dev;
+ item.info.dev = (!p || item.is_local) ? br->dev : p->dev;
item.info.ctx = ctx;
err = nb->notifier_call(nb, action, &item);
static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge *br,
struct net_bridge_port *p, const unsigned char *addr,
- u16 nlh_flags, u16 vid, struct nlattr *nfea_tb[])
+ u16 nlh_flags, u16 vid, struct nlattr *nfea_tb[],
+ struct netlink_ext_ack *extack)
{
int err = 0;
rcu_read_unlock();
local_bh_enable();
} else if (ndm->ndm_flags & NTF_EXT_LEARNED) {
- err = br_fdb_external_learn_add(br, p, addr, vid, true);
+ if (!p && !(ndm->ndm_state & NUD_PERMANENT)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "FDB entry towards bridge must be permanent");
+ return -EINVAL;
+ }
+
+ err = br_fdb_external_learn_add(br, p, addr, vid,
+ ndm->ndm_state & NUD_PERMANENT,
+ true);
} else {
spin_lock_bh(&br->hash_lock);
err = fdb_add_entry(br, p, addr, ndm, nlh_flags, vid, nfea_tb);
}
/* VID was specified, so use it. */
- err = __br_fdb_add(ndm, br, p, addr, nlh_flags, vid, nfea_tb);
+ err = __br_fdb_add(ndm, br, p, addr, nlh_flags, vid, nfea_tb,
+ extack);
} else {
- err = __br_fdb_add(ndm, br, p, addr, nlh_flags, 0, nfea_tb);
+ err = __br_fdb_add(ndm, br, p, addr, nlh_flags, 0, nfea_tb,
+ extack);
if (err || !vg || !vg->num_vlans)
goto out;
if (!br_vlan_should_use(v))
continue;
err = __br_fdb_add(ndm, br, p, addr, nlh_flags, v->vid,
- nfea_tb);
+ nfea_tb, extack);
if (err)
goto out;
}
}
int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
- const unsigned char *addr, u16 vid,
+ const unsigned char *addr, u16 vid, bool is_local,
bool swdev_notify)
{
struct net_bridge_fdb_entry *fdb;
if (swdev_notify)
flags |= BIT(BR_FDB_ADDED_BY_USER);
+
+ if (is_local)
+ flags |= BIT(BR_FDB_LOCAL);
+
fdb = fdb_create(br, p, addr, vid, flags);
if (!fdb) {
err = -ENOMEM;
if (swdev_notify)
set_bit(BR_FDB_ADDED_BY_USER, &fdb->flags);
+ if (is_local)
+ set_bit(BR_FDB_LOCAL, &fdb->flags);
+
if (modified)
fdb_notify(br, fdb, RTM_NEWNEIGH, swdev_notify);
}
dev_net_set(dev, net);
dev->rtnl_link_ops = &br_link_ops;
- res = register_netdev(dev);
+ res = register_netdevice(dev);
if (res)
free_netdev(dev);
return res;
struct net_device *dev;
int ret = 0;
- rtnl_lock();
dev = __dev_get_by_name(net, name);
if (dev == NULL)
ret = -ENXIO; /* Could not find device */
else
br_dev_delete(dev, NULL);
- rtnl_unlock();
return ret;
}
if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EPERM;
- if (copy_from_user(buf, uarg, IFNAMSIZ))
+ if (copy_from_user(buf, (void __user *)args[1], IFNAMSIZ))
return -EFAULT;
buf[IFNAMSIZ-1] = 0;
int br_ioctl_stub(struct net *net, struct net_bridge *br, unsigned int cmd,
struct ifreq *ifr, void __user *uarg)
{
+ int ret = -EOPNOTSUPP;
+
+ rtnl_lock();
+
switch (cmd) {
case SIOCGIFBR:
case SIOCSIFBR:
- return old_deviceless(net, uarg);
-
+ ret = old_deviceless(net, uarg);
+ break;
case SIOCBRADDBR:
case SIOCBRDELBR:
{
char buf[IFNAMSIZ];
- if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
- return -EPERM;
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) {
+ ret = -EPERM;
+ break;
+ }
- if (copy_from_user(buf, uarg, IFNAMSIZ))
- return -EFAULT;
+ if (copy_from_user(buf, uarg, IFNAMSIZ)) {
+ ret = -EFAULT;
+ break;
+ }
buf[IFNAMSIZ-1] = 0;
if (cmd == SIOCBRADDBR)
- return br_add_bridge(net, buf);
-
- return br_del_bridge(net, buf);
+ ret = br_add_bridge(net, buf);
+ else
+ ret = br_del_bridge(net, buf);
}
-
+ break;
case SIOCBRADDIF:
case SIOCBRDELIF:
- return add_del_if(br, ifr->ifr_ifindex, cmd == SIOCBRADDIF);
-
+ ret = add_del_if(br, ifr->ifr_ifindex, cmd == SIOCBRADDIF);
+ break;
}
- return -EOPNOTSUPP;
+
+ rtnl_unlock();
+
+ return ret;
}
int br_fdb_sync_static(struct net_bridge *br, struct net_bridge_port *p);
void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p);
int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
- const unsigned char *addr, u16 vid,
+ const unsigned char *addr, u16 vid, bool is_local,
bool swdev_notify);
int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p,
const unsigned char *addr, u16 vid,
/* br_switchdev.c */
#ifdef CONFIG_NET_SWITCHDEV
+int br_switchdev_port_offload(struct net_bridge_port *p,
+ struct net_device *dev, const void *ctx,
+ struct notifier_block *atomic_nb,
+ struct notifier_block *blocking_nb,
+ bool tx_fwd_offload,
+ struct netlink_ext_ack *extack);
+
+void br_switchdev_port_unoffload(struct net_bridge_port *p, const void *ctx,
+ struct notifier_block *atomic_nb,
+ struct notifier_block *blocking_nb);
+
bool br_switchdev_frame_uses_tx_fwd_offload(struct sk_buff *skb);
void br_switchdev_frame_set_offload_fwd_mark(struct sk_buff *skb);
skb->offload_fwd_mark = 0;
}
#else
+static inline int
+br_switchdev_port_offload(struct net_bridge_port *p,
+ struct net_device *dev, const void *ctx,
+ struct notifier_block *atomic_nb,
+ struct notifier_block *blocking_nb,
+ bool tx_fwd_offload,
+ struct netlink_ext_ack *extack)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline void
+br_switchdev_port_unoffload(struct net_bridge_port *p, const void *ctx,
+ struct notifier_block *atomic_nb,
+ struct notifier_block *blocking_nb)
+{
+}
+
static inline bool br_switchdev_frame_uses_tx_fwd_offload(struct sk_buff *skb)
{
return false;
.is_local = test_bit(BR_FDB_LOCAL, &fdb->flags),
.offloaded = test_bit(BR_FDB_OFFLOADED, &fdb->flags),
};
- struct net_device *dev = info.is_local ? br->dev : dst->dev;
+ struct net_device *dev = (!dst || info.is_local) ? br->dev : dst->dev;
switch (type) {
case RTM_DELNEIGH:
/* Let the bridge know that this port is offloaded, so that it can assign a
* switchdev hardware domain to it.
*/
-int switchdev_bridge_port_offload(struct net_device *brport_dev,
- struct net_device *dev, const void *ctx,
- struct notifier_block *atomic_nb,
- struct notifier_block *blocking_nb,
- bool tx_fwd_offload,
- struct netlink_ext_ack *extack)
+int br_switchdev_port_offload(struct net_bridge_port *p,
+ struct net_device *dev, const void *ctx,
+ struct notifier_block *atomic_nb,
+ struct notifier_block *blocking_nb,
+ bool tx_fwd_offload,
+ struct netlink_ext_ack *extack)
{
struct netdev_phys_item_id ppid;
- struct net_bridge_port *p;
int err;
- ASSERT_RTNL();
-
- p = br_port_get_rtnl(brport_dev);
- if (!p)
- return -ENODEV;
-
err = dev_get_port_parent_id(dev, &ppid, false);
if (err)
return err;
return err;
}
-EXPORT_SYMBOL_GPL(switchdev_bridge_port_offload);
-void switchdev_bridge_port_unoffload(struct net_device *brport_dev,
- const void *ctx,
- struct notifier_block *atomic_nb,
- struct notifier_block *blocking_nb)
+void br_switchdev_port_unoffload(struct net_bridge_port *p, const void *ctx,
+ struct notifier_block *atomic_nb,
+ struct notifier_block *blocking_nb)
{
- struct net_bridge_port *p;
-
- ASSERT_RTNL();
-
- p = br_port_get_rtnl(brport_dev);
- if (!p)
- return;
-
nbp_switchdev_unsync_objs(p, ctx, atomic_nb, blocking_nb);
nbp_switchdev_del(p);
}
-EXPORT_SYMBOL_GPL(switchdev_bridge_port_unoffload);
struct j1939_session;
enum j1939_sk_errqueue_type {
- J1939_ERRQUEUE_ACK,
- J1939_ERRQUEUE_SCHED,
- J1939_ERRQUEUE_ABORT,
+ J1939_ERRQUEUE_TX_ACK,
+ J1939_ERRQUEUE_TX_SCHED,
+ J1939_ERRQUEUE_TX_ABORT,
+ J1939_ERRQUEUE_RX_RTS,
+ J1939_ERRQUEUE_RX_DPO,
+ J1939_ERRQUEUE_RX_ABORT,
};
/* j1939 devices */
struct list_head j1939_socks;
struct kref rx_kref;
+ u32 rx_tskey;
};
void j1939_ecu_put(struct j1939_ecu *ecu);
return NULL;
}
-static size_t j1939_sk_opt_stats_get_size(void)
+static size_t j1939_sk_opt_stats_get_size(enum j1939_sk_errqueue_type type)
{
- return
- nla_total_size(sizeof(u32)) + /* J1939_NLA_BYTES_ACKED */
- 0;
+ switch (type) {
+ case J1939_ERRQUEUE_RX_RTS:
+ return
+ nla_total_size(sizeof(u32)) + /* J1939_NLA_TOTAL_SIZE */
+ nla_total_size(sizeof(u32)) + /* J1939_NLA_PGN */
+ nla_total_size(sizeof(u64)) + /* J1939_NLA_SRC_NAME */
+ nla_total_size(sizeof(u64)) + /* J1939_NLA_DEST_NAME */
+ nla_total_size(sizeof(u8)) + /* J1939_NLA_SRC_ADDR */
+ nla_total_size(sizeof(u8)) + /* J1939_NLA_DEST_ADDR */
+ 0;
+ default:
+ return
+ nla_total_size(sizeof(u32)) + /* J1939_NLA_BYTES_ACKED */
+ 0;
+ }
}
static struct sk_buff *
-j1939_sk_get_timestamping_opt_stats(struct j1939_session *session)
+j1939_sk_get_timestamping_opt_stats(struct j1939_session *session,
+ enum j1939_sk_errqueue_type type)
{
struct sk_buff *stats;
u32 size;
- stats = alloc_skb(j1939_sk_opt_stats_get_size(), GFP_ATOMIC);
+ stats = alloc_skb(j1939_sk_opt_stats_get_size(type), GFP_ATOMIC);
if (!stats)
return NULL;
size = min(session->pkt.tx_acked * 7,
session->total_message_size);
- nla_put_u32(stats, J1939_NLA_BYTES_ACKED, size);
+ switch (type) {
+ case J1939_ERRQUEUE_RX_RTS:
+ nla_put_u32(stats, J1939_NLA_TOTAL_SIZE,
+ session->total_message_size);
+ nla_put_u32(stats, J1939_NLA_PGN,
+ session->skcb.addr.pgn);
+ nla_put_u64_64bit(stats, J1939_NLA_SRC_NAME,
+ session->skcb.addr.src_name, J1939_NLA_PAD);
+ nla_put_u64_64bit(stats, J1939_NLA_DEST_NAME,
+ session->skcb.addr.dst_name, J1939_NLA_PAD);
+ nla_put_u8(stats, J1939_NLA_SRC_ADDR,
+ session->skcb.addr.sa);
+ nla_put_u8(stats, J1939_NLA_DEST_ADDR,
+ session->skcb.addr.da);
+ break;
+ default:
+ nla_put_u32(stats, J1939_NLA_BYTES_ACKED, size);
+ }
return stats;
}
-void j1939_sk_errqueue(struct j1939_session *session,
- enum j1939_sk_errqueue_type type)
+static void __j1939_sk_errqueue(struct j1939_session *session, struct sock *sk,
+ enum j1939_sk_errqueue_type type)
{
struct j1939_priv *priv = session->priv;
- struct sock *sk = session->sk;
struct j1939_sock *jsk;
struct sock_exterr_skb *serr;
struct sk_buff *skb;
char *state = "UNK";
int err;
- /* currently we have no sk for the RX session */
- if (!sk)
- return;
-
jsk = j1939_sk(sk);
if (!(jsk->state & J1939_SOCK_ERRQUEUE))
return;
- skb = j1939_sk_get_timestamping_opt_stats(session);
+ switch (type) {
+ case J1939_ERRQUEUE_TX_ACK:
+ if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK))
+ return;
+ break;
+ case J1939_ERRQUEUE_TX_SCHED:
+ if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_SCHED))
+ return;
+ break;
+ case J1939_ERRQUEUE_TX_ABORT:
+ break;
+ case J1939_ERRQUEUE_RX_RTS:
+ fallthrough;
+ case J1939_ERRQUEUE_RX_DPO:
+ fallthrough;
+ case J1939_ERRQUEUE_RX_ABORT:
+ if (!(sk->sk_tsflags & SOF_TIMESTAMPING_RX_SOFTWARE))
+ return;
+ break;
+ default:
+ netdev_err(priv->ndev, "Unknown errqueue type %i\n", type);
+ }
+
+ skb = j1939_sk_get_timestamping_opt_stats(session, type);
if (!skb)
return;
serr = SKB_EXT_ERR(skb);
memset(serr, 0, sizeof(*serr));
switch (type) {
- case J1939_ERRQUEUE_ACK:
- if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK)) {
- kfree_skb(skb);
- return;
- }
-
+ case J1939_ERRQUEUE_TX_ACK:
serr->ee.ee_errno = ENOMSG;
serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
serr->ee.ee_info = SCM_TSTAMP_ACK;
- state = "ACK";
+ state = "TX ACK";
break;
- case J1939_ERRQUEUE_SCHED:
- if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_SCHED)) {
- kfree_skb(skb);
- return;
- }
-
+ case J1939_ERRQUEUE_TX_SCHED:
serr->ee.ee_errno = ENOMSG;
serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
serr->ee.ee_info = SCM_TSTAMP_SCHED;
- state = "SCH";
+ state = "TX SCH";
break;
- case J1939_ERRQUEUE_ABORT:
+ case J1939_ERRQUEUE_TX_ABORT:
serr->ee.ee_errno = session->err;
serr->ee.ee_origin = SO_EE_ORIGIN_LOCAL;
serr->ee.ee_info = J1939_EE_INFO_TX_ABORT;
- state = "ABT";
+ state = "TX ABT";
+ break;
+ case J1939_ERRQUEUE_RX_RTS:
+ serr->ee.ee_errno = ENOMSG;
+ serr->ee.ee_origin = SO_EE_ORIGIN_LOCAL;
+ serr->ee.ee_info = J1939_EE_INFO_RX_RTS;
+ state = "RX RTS";
+ break;
+ case J1939_ERRQUEUE_RX_DPO:
+ serr->ee.ee_errno = ENOMSG;
+ serr->ee.ee_origin = SO_EE_ORIGIN_LOCAL;
+ serr->ee.ee_info = J1939_EE_INFO_RX_DPO;
+ state = "RX DPO";
+ break;
+ case J1939_ERRQUEUE_RX_ABORT:
+ serr->ee.ee_errno = session->err;
+ serr->ee.ee_origin = SO_EE_ORIGIN_LOCAL;
+ serr->ee.ee_info = J1939_EE_INFO_RX_ABORT;
+ state = "RX ABT";
break;
- default:
- netdev_err(priv->ndev, "Unknown errqueue type %i\n", type);
}
serr->opt_stats = true;
kfree_skb(skb);
};
+void j1939_sk_errqueue(struct j1939_session *session,
+ enum j1939_sk_errqueue_type type)
+{
+ struct j1939_priv *priv = session->priv;
+ struct j1939_sock *jsk;
+
+ if (session->sk) {
+ /* send TX notifications to the socket of origin */
+ __j1939_sk_errqueue(session, session->sk, type);
+ return;
+ }
+
+ /* spread RX notifications to all sockets subscribed to this session */
+ spin_lock_bh(&priv->j1939_socks_lock);
+ list_for_each_entry(jsk, &priv->j1939_socks, list) {
+ if (j1939_sk_recv_match_one(jsk, &session->skcb))
+ __j1939_sk_errqueue(session, &jsk->sk, type);
+ }
+ spin_unlock_bh(&priv->j1939_socks_lock);
+};
+
void j1939_sk_send_loop_abort(struct sock *sk, int err)
{
sk->sk_err = err;
static void j1939_session_destroy(struct j1939_session *session)
{
- if (session->err)
- j1939_sk_errqueue(session, J1939_ERRQUEUE_ABORT);
- else
- j1939_sk_errqueue(session, J1939_ERRQUEUE_ACK);
+ if (session->transmission) {
+ if (session->err)
+ j1939_sk_errqueue(session, J1939_ERRQUEUE_TX_ABORT);
+ else
+ j1939_sk_errqueue(session, J1939_ERRQUEUE_TX_ACK);
+ } else if (session->err) {
+ j1939_sk_errqueue(session, J1939_ERRQUEUE_RX_ABORT);
+ }
netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session);
memcpy(&dat[1], &tpdat[offset], len);
ret = j1939_tp_tx_dat(session, dat, len + 1);
if (ret < 0) {
- /* ENOBUS == CAN interface TX queue is full */
+ /* ENOBUFS == CAN interface TX queue is full */
if (ret != -ENOBUFS)
netdev_alert(priv->ndev,
"%s: 0x%p: queue data error: %i\n",
if (ret)
goto out_free;
- j1939_sk_errqueue(session, J1939_ERRQUEUE_SCHED);
+ j1939_sk_errqueue(session, J1939_ERRQUEUE_TX_SCHED);
j1939_sk_queue_activate_next(session);
out_free:
static bool j1939_session_deactivate(struct j1939_session *session)
{
+ struct j1939_priv *priv = session->priv;
bool active;
- j1939_session_list_lock(session->priv);
+ j1939_session_list_lock(priv);
+ /* This function should be called with a session ref-count of at
+ * least 2.
+ */
+ WARN_ON_ONCE(kref_read(&session->kref) < 2);
active = j1939_session_deactivate_locked(session);
- j1939_session_list_unlock(session->priv);
+ j1939_session_list_unlock(priv);
return active;
}
if (session->sk)
j1939_sk_send_loop_abort(session->sk, session->err);
+ else
+ j1939_sk_errqueue(session, J1939_ERRQUEUE_RX_ABORT);
}
static void j1939_session_cancel(struct j1939_session *session,
session->err = j1939_xtp_abort_to_errno(priv, abort);
if (session->sk)
j1939_sk_send_loop_abort(session->sk, session->err);
+ else
+ j1939_sk_errqueue(session, J1939_ERRQUEUE_RX_ABORT);
j1939_session_deactivate_activate_next(session);
abort_put:
if (session->transmission) {
if (session->pkt.tx_acked)
j1939_sk_errqueue(session,
- J1939_ERRQUEUE_SCHED);
+ J1939_ERRQUEUE_TX_SCHED);
j1939_session_txtimer_cancel(session);
j1939_tp_schedule_txtimer(session, 0);
}
session->pkt.rx = 0;
session->pkt.tx = 0;
+ session->tskey = priv->rx_tskey++;
+ j1939_sk_errqueue(session, J1939_ERRQUEUE_RX_RTS);
+
WARN_ON_ONCE(j1939_session_activate(session));
return session;
session->pkt.dpo = j1939_etp_ctl_to_packet(skb->data);
session->last_cmd = dat[0];
j1939_tp_set_rxtimeout(session, 750);
+
+ if (!session->transmission)
+ j1939_sk_errqueue(session, J1939_ERRQUEUE_RX_DPO);
}
static void j1939_xtp_rx_dpo(struct j1939_priv *priv, struct sk_buff *skb,
if (!session->transmission)
j1939_tp_schedule_txtimer(session, 0);
} else {
- j1939_tp_set_rxtimeout(session, 250);
+ j1939_tp_set_rxtimeout(session, 750);
}
session->last_cmd = 0xff;
consume_skb(se_skb);
return -EFAULT;
}
+ rtnl_lock();
lock_sock(sk);
- if (ro->bound && ro->ifindex)
+ if (ro->bound && ro->ifindex) {
dev = dev_get_by_index(sock_net(sk), ro->ifindex);
+ if (!dev) {
+ if (count > 1)
+ kfree(filter);
+ err = -ENODEV;
+ goto out_fil;
+ }
+ }
if (ro->bound) {
/* (try to) register the new filters */
ro->count = count;
out_fil:
- if (dev)
- dev_put(dev);
-
+ dev_put(dev);
release_sock(sk);
+ rtnl_unlock();
break;
err_mask &= CAN_ERR_MASK;
+ rtnl_lock();
lock_sock(sk);
- if (ro->bound && ro->ifindex)
+ if (ro->bound && ro->ifindex) {
dev = dev_get_by_index(sock_net(sk), ro->ifindex);
+ if (!dev) {
+ err = -ENODEV;
+ goto out_err;
+ }
+ }
/* remove current error mask */
if (ro->bound) {
ro->err_mask = err_mask;
out_err:
- if (dev)
- dev_put(dev);
-
+ dev_put(dev);
release_sock(sk);
+ rtnl_unlock();
break;
}
EXPORT_SYMBOL(dev_remove_offload);
-/******************************************************************************
- *
- * Device Boot-time Settings Routines
- *
- ******************************************************************************/
-
-/* Boot time configuration table */
-static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
-
-/**
- * netdev_boot_setup_add - add new setup entry
- * @name: name of the device
- * @map: configured settings for the device
- *
- * Adds new setup entry to the dev_boot_setup list. The function
- * returns 0 on error and 1 on success. This is a generic routine to
- * all netdevices.
- */
-static int netdev_boot_setup_add(char *name, struct ifmap *map)
-{
- struct netdev_boot_setup *s;
- int i;
-
- s = dev_boot_setup;
- for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
- if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
- memset(s[i].name, 0, sizeof(s[i].name));
- strlcpy(s[i].name, name, IFNAMSIZ);
- memcpy(&s[i].map, map, sizeof(s[i].map));
- break;
- }
- }
-
- return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
-}
-
-/**
- * netdev_boot_setup_check - check boot time settings
- * @dev: the netdevice
- *
- * Check boot time settings for the device.
- * The found settings are set for the device to be used
- * later in the device probing.
- * Returns 0 if no settings found, 1 if they are.
- */
-int netdev_boot_setup_check(struct net_device *dev)
-{
- struct netdev_boot_setup *s = dev_boot_setup;
- int i;
-
- for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
- if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
- !strcmp(dev->name, s[i].name)) {
- dev->irq = s[i].map.irq;
- dev->base_addr = s[i].map.base_addr;
- dev->mem_start = s[i].map.mem_start;
- dev->mem_end = s[i].map.mem_end;
- return 1;
- }
- }
- return 0;
-}
-EXPORT_SYMBOL(netdev_boot_setup_check);
-
-
-/**
- * netdev_boot_base - get address from boot time settings
- * @prefix: prefix for network device
- * @unit: id for network device
- *
- * Check boot time settings for the base address of device.
- * The found settings are set for the device to be used
- * later in the device probing.
- * Returns 0 if no settings found.
- */
-unsigned long netdev_boot_base(const char *prefix, int unit)
-{
- const struct netdev_boot_setup *s = dev_boot_setup;
- char name[IFNAMSIZ];
- int i;
-
- sprintf(name, "%s%d", prefix, unit);
-
- /*
- * If device already registered then return base of 1
- * to indicate not to probe for this interface
- */
- if (__dev_get_by_name(&init_net, name))
- return 1;
-
- for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
- if (!strcmp(name, s[i].name))
- return s[i].map.base_addr;
- return 0;
-}
-
-/*
- * Saves at boot time configured settings for any netdevice.
- */
-int __init netdev_boot_setup(char *str)
-{
- int ints[5];
- struct ifmap map;
-
- str = get_options(str, ARRAY_SIZE(ints), ints);
- if (!str || !*str)
- return 0;
-
- /* Save settings */
- memset(&map, 0, sizeof(map));
- if (ints[0] > 0)
- map.irq = ints[1];
- if (ints[0] > 1)
- map.base_addr = ints[2];
- if (ints[0] > 2)
- map.mem_start = ints[3];
- if (ints[0] > 3)
- map.mem_end = ints[4];
-
- /* Add new entry to the list */
- return netdev_boot_setup_add(str, &map);
-}
-
-__setup("netdev=", netdev_boot_setup);
-
/*******************************************************************************
*
* Device Interface Subroutines
rcu_read_lock();
dev = dev_get_by_name_rcu(net, name);
- if (dev)
- dev_hold(dev);
+ dev_hold(dev);
rcu_read_unlock();
return dev;
}
rcu_read_lock();
dev = dev_get_by_index_rcu(net, ifindex);
- if (dev)
- dev_hold(dev);
+ dev_hold(dev);
rcu_read_unlock();
return dev;
}
EXPORT_SYMBOL(netif_set_real_num_rx_queues);
#endif
+/**
+ * netif_set_real_num_queues - set actual number of RX and TX queues used
+ * @dev: Network device
+ * @txq: Actual number of TX queues
+ * @rxq: Actual number of RX queues
+ *
+ * Set the real number of both TX and RX queues.
+ * Does nothing if the number of queues is already correct.
+ */
+int netif_set_real_num_queues(struct net_device *dev,
+ unsigned int txq, unsigned int rxq)
+{
+ unsigned int old_rxq = dev->real_num_rx_queues;
+ int err;
+
+ if (txq < 1 || txq > dev->num_tx_queues ||
+ rxq < 1 || rxq > dev->num_rx_queues)
+ return -EINVAL;
+
+ /* Start from increases, so the error path only does decreases -
+ * decreases can't fail.
+ */
+ if (rxq > dev->real_num_rx_queues) {
+ err = netif_set_real_num_rx_queues(dev, rxq);
+ if (err)
+ return err;
+ }
+ if (txq > dev->real_num_tx_queues) {
+ err = netif_set_real_num_tx_queues(dev, txq);
+ if (err)
+ goto undo_rx;
+ }
+ if (rxq < dev->real_num_rx_queues)
+ WARN_ON(netif_set_real_num_rx_queues(dev, rxq));
+ if (txq < dev->real_num_tx_queues)
+ WARN_ON(netif_set_real_num_tx_queues(dev, txq));
+
+ return 0;
+undo_rx:
+ WARN_ON(netif_set_real_num_rx_queues(dev, old_rxq));
+ return err;
+}
+EXPORT_SYMBOL(netif_set_real_num_queues);
+
/**
* netif_get_num_default_rss_queues - default number of RSS queues
*
*/
ASSERT_RTNL();
- get_online_cpus();
+ cpus_read_lock();
cpumask_clear(&flush_cpus);
for_each_online_cpu(cpu) {
for_each_cpu(cpu, &flush_cpus)
flush_work(per_cpu_ptr(&flush_works, cpu));
- put_online_cpus();
+ cpus_read_unlock();
}
/* Pass the currently batched GRO_NORMAL SKBs up to the stack. */
{
struct netdev_adjacent *lower;
- WARN_ON_ONCE(!rcu_read_lock_held());
+ WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
lower = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
return dev->xdp_state[mode].prog;
}
-static u8 dev_xdp_prog_count(struct net_device *dev)
+u8 dev_xdp_prog_count(struct net_device *dev)
{
u8 count = 0;
int i;
count++;
return count;
}
+EXPORT_SYMBOL_GPL(dev_xdp_prog_count);
u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode)
{
{
unsigned int num_modes = hweight32(flags & XDP_FLAGS_MODES);
struct bpf_prog *cur_prog;
+ struct net_device *upper;
+ struct list_head *iter;
enum bpf_xdp_mode mode;
bpf_op_t bpf_op;
int err;
return -EBUSY;
}
+ /* don't allow if an upper device already has a program */
+ netdev_for_each_upper_dev_rcu(dev, upper, iter) {
+ if (dev_xdp_prog_count(upper) > 0) {
+ NL_SET_ERR_MSG(extack, "Cannot attach when an upper device already has a program");
+ return -EEXIST;
+ }
+ }
+
cur_prog = dev_xdp_prog(dev, mode);
/* can't replace attached prog with link */
if (link && cur_prog) {
case SIOCBRDELIF:
if (!netif_device_present(dev))
return -ENODEV;
- return br_ioctl_call(net, netdev_priv(dev), cmd, ifr, NULL);
+ if (!netif_is_bridge_master(dev))
+ return -EOPNOTSUPP;
+ dev_hold(dev);
+ rtnl_unlock();
+ err = br_ioctl_call(net, netdev_priv(dev), cmd, ifr, NULL);
+ dev_put(dev);
+ rtnl_lock();
+ return err;
case SIOCSHWTSTAMP:
err = net_hwtstamp_validate(ifr);
return 0;
}
-static int
-devlink_port_fn_hw_addr_fill(struct devlink *devlink, const struct devlink_ops *ops,
- struct devlink_port *port, struct sk_buff *msg,
- struct netlink_ext_ack *extack, bool *msg_updated)
+static int devlink_port_fn_hw_addr_fill(const struct devlink_ops *ops,
+ struct devlink_port *port,
+ struct sk_buff *msg,
+ struct netlink_ext_ack *extack,
+ bool *msg_updated)
{
u8 hw_addr[MAX_ADDR_LEN];
int hw_addr_len;
if (!ops->port_function_hw_addr_get)
return 0;
- err = ops->port_function_hw_addr_get(devlink, port, hw_addr, &hw_addr_len, extack);
+ err = ops->port_function_hw_addr_get(port, hw_addr, &hw_addr_len,
+ extack);
if (err) {
if (err == -EOPNOTSUPP)
return 0;
opstate == DEVLINK_PORT_FN_OPSTATE_ATTACHED;
}
-static int
-devlink_port_fn_state_fill(struct devlink *devlink,
- const struct devlink_ops *ops,
- struct devlink_port *port, struct sk_buff *msg,
- struct netlink_ext_ack *extack,
- bool *msg_updated)
+static int devlink_port_fn_state_fill(const struct devlink_ops *ops,
+ struct devlink_port *port,
+ struct sk_buff *msg,
+ struct netlink_ext_ack *extack,
+ bool *msg_updated)
{
enum devlink_port_fn_opstate opstate;
enum devlink_port_fn_state state;
if (!ops->port_fn_state_get)
return 0;
- err = ops->port_fn_state_get(devlink, port, &state, &opstate, extack);
+ err = ops->port_fn_state_get(port, &state, &opstate, extack);
if (err) {
if (err == -EOPNOTSUPP)
return 0;
devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *port,
struct netlink_ext_ack *extack)
{
- struct devlink *devlink = port->devlink;
const struct devlink_ops *ops;
struct nlattr *function_attr;
bool msg_updated = false;
if (!function_attr)
return -EMSGSIZE;
- ops = devlink->ops;
- err = devlink_port_fn_hw_addr_fill(devlink, ops, port, msg,
- extack, &msg_updated);
+ ops = port->devlink->ops;
+ err = devlink_port_fn_hw_addr_fill(ops, port, msg, extack,
+ &msg_updated);
if (err)
goto out;
- err = devlink_port_fn_state_fill(devlink, ops, port, msg, extack,
- &msg_updated);
+ err = devlink_port_fn_state_fill(ops, port, msg, extack, &msg_updated);
out:
if (err || !msg_updated)
nla_nest_cancel(msg, function_attr);
return msg->len;
}
-static int devlink_port_type_set(struct devlink *devlink,
- struct devlink_port *devlink_port,
+static int devlink_port_type_set(struct devlink_port *devlink_port,
enum devlink_port_type port_type)
{
int err;
- if (devlink->ops->port_type_set) {
- if (port_type == devlink_port->type)
- return 0;
- err = devlink->ops->port_type_set(devlink_port, port_type);
- if (err)
- return err;
- devlink_port->desired_type = port_type;
- devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
+ if (!devlink_port->devlink->ops->port_type_set)
+ return -EOPNOTSUPP;
+
+ if (port_type == devlink_port->type)
return 0;
- }
- return -EOPNOTSUPP;
+
+ err = devlink_port->devlink->ops->port_type_set(devlink_port,
+ port_type);
+ if (err)
+ return err;
+
+ devlink_port->desired_type = port_type;
+ devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
+ return 0;
}
-static int
-devlink_port_function_hw_addr_set(struct devlink *devlink, struct devlink_port *port,
- const struct nlattr *attr, struct netlink_ext_ack *extack)
+static int devlink_port_function_hw_addr_set(struct devlink_port *port,
+ const struct nlattr *attr,
+ struct netlink_ext_ack *extack)
{
- const struct devlink_ops *ops;
+ const struct devlink_ops *ops = port->devlink->ops;
const u8 *hw_addr;
int hw_addr_len;
}
}
- ops = devlink->ops;
if (!ops->port_function_hw_addr_set) {
NL_SET_ERR_MSG_MOD(extack, "Port doesn't support function attributes");
return -EOPNOTSUPP;
}
- return ops->port_function_hw_addr_set(devlink, port, hw_addr, hw_addr_len, extack);
+ return ops->port_function_hw_addr_set(port, hw_addr, hw_addr_len,
+ extack);
}
-static int devlink_port_fn_state_set(struct devlink *devlink,
- struct devlink_port *port,
+static int devlink_port_fn_state_set(struct devlink_port *port,
const struct nlattr *attr,
struct netlink_ext_ack *extack)
{
const struct devlink_ops *ops;
state = nla_get_u8(attr);
- ops = devlink->ops;
+ ops = port->devlink->ops;
if (!ops->port_fn_state_set) {
NL_SET_ERR_MSG_MOD(extack,
"Function does not support state setting");
return -EOPNOTSUPP;
}
- return ops->port_fn_state_set(devlink, port, state, extack);
+ return ops->port_fn_state_set(port, state, extack);
}
-static int
-devlink_port_function_set(struct devlink *devlink, struct devlink_port *port,
- const struct nlattr *attr, struct netlink_ext_ack *extack)
+static int devlink_port_function_set(struct devlink_port *port,
+ const struct nlattr *attr,
+ struct netlink_ext_ack *extack)
{
struct nlattr *tb[DEVLINK_PORT_FUNCTION_ATTR_MAX + 1];
int err;
attr = tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR];
if (attr) {
- err = devlink_port_function_hw_addr_set(devlink, port, attr, extack);
+ err = devlink_port_function_hw_addr_set(port, attr, extack);
if (err)
return err;
}
*/
attr = tb[DEVLINK_PORT_FN_ATTR_STATE];
if (attr)
- err = devlink_port_fn_state_set(devlink, port, attr, extack);
+ err = devlink_port_fn_state_set(port, attr, extack);
if (!err)
devlink_port_notify(port, DEVLINK_CMD_PORT_NEW);
struct genl_info *info)
{
struct devlink_port *devlink_port = info->user_ptr[1];
- struct devlink *devlink = devlink_port->devlink;
int err;
if (info->attrs[DEVLINK_ATTR_PORT_TYPE]) {
enum devlink_port_type port_type;
port_type = nla_get_u16(info->attrs[DEVLINK_ATTR_PORT_TYPE]);
- err = devlink_port_type_set(devlink, devlink_port, port_type);
+ err = devlink_port_type_set(devlink_port, port_type);
if (err)
return err;
}
struct nlattr *attr = info->attrs[DEVLINK_ATTR_PORT_FUNCTION];
struct netlink_ext_ack *extack = info->extack;
- err = devlink_port_function_set(devlink, devlink_port, attr, extack);
+ err = devlink_port_function_set(devlink_port, attr, extack);
if (err)
return err;
}
* @ops: ops
* @priv_size: size of user private data
* @net: net namespace
+ * @dev: parent device
*
* Allocate new devlink instance resources, including devlink index
* and name.
*/
struct devlink *devlink_alloc_ns(const struct devlink_ops *ops,
- size_t priv_size, struct net *net)
+ size_t priv_size, struct net *net,
+ struct device *dev)
{
struct devlink *devlink;
- if (WARN_ON(!ops))
- return NULL;
-
+ WARN_ON(!ops || !dev);
if (!devlink_reload_actions_valid(ops))
return NULL;
devlink = kzalloc(sizeof(*devlink) + priv_size, GFP_KERNEL);
if (!devlink)
return NULL;
+
+ devlink->dev = dev;
devlink->ops = ops;
xa_init_flags(&devlink->snapshot_ids, XA_FLAGS_ALLOC);
write_pnet(&devlink->_net, net);
* devlink_register - Register devlink instance
*
* @devlink: devlink
- * @dev: parent device
*/
-int devlink_register(struct devlink *devlink, struct device *dev)
+int devlink_register(struct devlink *devlink)
{
- WARN_ON(devlink->dev);
- devlink->dev = dev;
mutex_lock(&devlink_mutex);
list_add_tail(&devlink->list, &devlink_list);
devlink_notify(devlink, DEVLINK_CMD_NEW);
switch (attrs->flavour) {
case DEVLINK_PORT_FLAVOUR_PHYSICAL:
- case DEVLINK_PORT_FLAVOUR_VIRTUAL:
n = snprintf(name, len, "p%u", attrs->phys.port_number);
if (n < len && attrs->split)
n += snprintf(name + n, len - n, "s%u",
attrs->phys.split_subport_number);
- if (!attrs->split)
- n = snprintf(name, len, "p%u", attrs->phys.port_number);
- else
- n = snprintf(name, len, "p%us%u",
- attrs->phys.port_number,
- attrs->phys.split_subport_number);
-
break;
case DEVLINK_PORT_FLAVOUR_CPU:
case DEVLINK_PORT_FLAVOUR_DSA:
n = snprintf(name, len, "pf%usf%u", attrs->pci_sf.pf,
attrs->pci_sf.sf);
break;
+ case DEVLINK_PORT_FLAVOUR_VIRTUAL:
+ return -EOPNOTSUPP;
}
if (n >= len)
}
hw_metadata->input_dev = metadata->input_dev;
- if (hw_metadata->input_dev)
- dev_hold(hw_metadata->input_dev);
+ dev_hold(hw_metadata->input_dev);
return hw_metadata;
static void
net_dm_hw_metadata_free(const struct devlink_trap_metadata *hw_metadata)
{
- if (hw_metadata->input_dev)
- dev_put(hw_metadata->input_dev);
+ dev_put(hw_metadata->input_dev);
kfree(hw_metadata->fa_cookie);
kfree(hw_metadata->trap_name);
kfree(hw_metadata->trap_group_name);
unsigned short flags)
{
dst->dev = dev;
- if (dev)
- dev_hold(dev);
+ dev_hold(dev);
dst->ops = ops;
dst_init_metrics(dst, dst_default_metrics.metrics, true);
dst->expires = 0UL;
if (dst->ops->destroy)
dst->ops->destroy(dst);
- if (dst->dev)
- dev_put(dst->dev);
+ dev_put(dst->dev);
lwtstate_put(dst->lwtstate);
skb->tstamp = 0;
if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
- struct sk_buff *skb2;
-
- skb2 = skb_realloc_headroom(skb, hh_len);
- if (unlikely(!skb2)) {
- kfree_skb(skb);
+ skb = skb_expand_head(skb, hh_len);
+ if (!skb)
return -ENOMEM;
- }
- if (skb->sk)
- skb_set_owner_w(skb2, skb->sk);
- consume_skb(skb);
- skb = skb2;
}
rcu_read_lock_bh();
}
rcu_read_unlock_bh();
if (dst)
- IP6_INC_STATS(dev_net(dst->dev),
- ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
+ IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
out_drop:
kfree_skb(skb);
return -ENETDOWN;
skb->tstamp = 0;
if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
- struct sk_buff *skb2;
-
- skb2 = skb_realloc_headroom(skb, hh_len);
- if (unlikely(!skb2)) {
- kfree_skb(skb);
+ skb = skb_expand_head(skb, hh_len);
+ if (!skb)
return -ENOMEM;
- }
- if (skb->sk)
- skb_set_owner_w(skb2, skb->sk);
- consume_skb(skb);
- skb = skb2;
}
rcu_read_lock_bh();
}
}
+DEFINE_STATIC_KEY_FALSE(bpf_master_redirect_enabled_key);
+EXPORT_SYMBOL_GPL(bpf_master_redirect_enabled_key);
+
+u32 xdp_master_redirect(struct xdp_buff *xdp)
+{
+ struct net_device *master, *slave;
+ struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+
+ master = netdev_master_upper_dev_get_rcu(xdp->rxq->dev);
+ slave = master->netdev_ops->ndo_xdp_get_xmit_slave(master, xdp);
+ if (slave && slave != xdp->rxq->dev) {
+ /* The target device is different from the receiving device, so
+ * redirect it to the new device.
+ * Using XDP_REDIRECT gets the correct behaviour from XDP enabled
+ * drivers to unmap the packet from their rx ring.
+ */
+ ri->tgt_index = slave->ifindex;
+ ri->map_id = INT_MAX;
+ ri->map_type = BPF_MAP_TYPE_UNSPEC;
+ return XDP_REDIRECT;
+ }
+ return XDP_TX;
+}
+EXPORT_SYMBOL_GPL(xdp_master_redirect);
+
int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
struct bpf_prog *xdp_prog)
{
return -EINVAL;
}
+BPF_CALL_5(bpf_sk_setsockopt, struct sock *, sk, int, level,
+ int, optname, char *, optval, int, optlen)
+{
+ return _bpf_setsockopt(sk, level, optname, optval, optlen);
+}
+
+const struct bpf_func_proto bpf_sk_setsockopt_proto = {
+ .func = bpf_sk_setsockopt,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_ANYTHING,
+ .arg4_type = ARG_PTR_TO_MEM,
+ .arg5_type = ARG_CONST_SIZE,
+};
+
+BPF_CALL_5(bpf_sk_getsockopt, struct sock *, sk, int, level,
+ int, optname, char *, optval, int, optlen)
+{
+ return _bpf_getsockopt(sk, level, optname, optval, optlen);
+}
+
+const struct bpf_func_proto bpf_sk_getsockopt_proto = {
+ .func = bpf_sk_getsockopt,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_ANYTHING,
+ .arg4_type = ARG_PTR_TO_UNINIT_MEM,
+ .arg5_type = ARG_CONST_SIZE,
+};
+
BPF_CALL_5(bpf_sock_addr_setsockopt, struct bpf_sock_addr_kern *, ctx,
int, level, int, optname, char *, optval, int, optlen)
{
}
EXPORT_SYMBOL(flow_get_u32_dst);
-/* Sort the source and destination IP (and the ports if the IP are the same),
+/* Sort the source and destination IP and the ports,
* to have consistent hash within the two directions
*/
static inline void __flow_hash_consistentify(struct flow_keys *keys)
case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
addr_diff = (__force u32)keys->addrs.v4addrs.dst -
(__force u32)keys->addrs.v4addrs.src;
- if ((addr_diff < 0) ||
- (addr_diff == 0 &&
- ((__force u16)keys->ports.dst <
- (__force u16)keys->ports.src))) {
+ if (addr_diff < 0)
swap(keys->addrs.v4addrs.src, keys->addrs.v4addrs.dst);
+
+ if ((__force u16)keys->ports.dst <
+ (__force u16)keys->ports.src) {
swap(keys->ports.src, keys->ports.dst);
}
break;
addr_diff = memcmp(&keys->addrs.v6addrs.dst,
&keys->addrs.v6addrs.src,
sizeof(keys->addrs.v6addrs.dst));
- if ((addr_diff < 0) ||
- (addr_diff == 0 &&
- ((__force u16)keys->ports.dst <
- (__force u16)keys->ports.src))) {
+ if (addr_diff < 0) {
for (i = 0; i < 4; i++)
swap(keys->addrs.v6addrs.src.s6_addr32[i],
keys->addrs.v6addrs.dst.s6_addr32[i]);
+ }
+ if ((__force u16)keys->ports.dst <
+ (__force u16)keys->ports.src) {
swap(keys->ports.src, keys->ports.dst);
}
break;
write_pnet(&n->net, net);
memcpy(n->key, pkey, key_len);
n->dev = dev;
- if (dev)
- dev_hold(dev);
+ dev_hold(dev);
if (tbl->pconstructor && tbl->pconstructor(n)) {
- if (dev)
- dev_put(dev);
+ dev_put(dev);
kfree(n);
n = NULL;
goto out;
write_unlock_bh(&tbl->lock);
if (tbl->pdestructor)
tbl->pdestructor(n);
- if (n->dev)
- dev_put(n->dev);
+ dev_put(n->dev);
kfree(n);
return 0;
}
n->next = NULL;
if (tbl->pdestructor)
tbl->pdestructor(n);
- if (n->dev)
- dev_put(n->dev);
+ dev_put(n->dev);
kfree(n);
}
return -ENOENT;
list_del(&parms->list);
parms->dead = 1;
write_unlock_bh(&tbl->lock);
- if (parms->dev)
- dev_put(parms->dev);
+ dev_put(parms->dev);
call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
}
EXPORT_SYMBOL(neigh_parms_release);
return false;
master = dev ? netdev_master_upper_dev_get(dev) : NULL;
+
+ /* 0 is already used to denote NDA_MASTER wasn't passed, therefore need another
+ * invalid value for ifindex to denote "no master".
+ */
+ if (master_idx == -1)
+ return !!master;
+
if (!master || master->ifindex != master_idx)
return true;
struct neigh_statistics *st = v;
if (v == SEQ_START_TOKEN) {
- seq_printf(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs unresolved_discards table_fulls\n");
+ seq_puts(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs unresolved_discards table_fulls\n");
return 0;
}
- seq_printf(seq, "%08x %08lx %08lx %08lx %08lx %08lx %08lx "
- "%08lx %08lx %08lx %08lx %08lx %08lx\n",
+ seq_printf(seq, "%08x %08lx %08lx %08lx %08lx %08lx %08lx "
+ "%08lx %08lx %08lx "
+ "%08lx %08lx %08lx\n",
atomic_read(&tbl->entries),
st->allocs,
#define DEFER_TIME (msecs_to_jiffies(1000))
#define DEFER_WARN_INTERVAL (60 * HZ)
+#define BIAS_MAX LONG_MAX
+
static int page_pool_init(struct page_pool *pool,
const struct page_pool_params *params)
{
*/
}
+ if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT &&
+ pool->p.flags & PP_FLAG_PAGE_FRAG)
+ return -EINVAL;
+
if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0)
return -ENOMEM;
return true;
}
+static void page_pool_set_pp_info(struct page_pool *pool,
+ struct page *page)
+{
+ page->pp = pool;
+ page->pp_magic |= PP_SIGNATURE;
+}
+
+static void page_pool_clear_pp_info(struct page *page)
+{
+ page->pp_magic = 0;
+ page->pp = NULL;
+}
+
static struct page *__page_pool_alloc_page_order(struct page_pool *pool,
gfp_t gfp)
{
return NULL;
}
- page->pp_magic |= PP_SIGNATURE;
+ page_pool_set_pp_info(pool, page);
/* Track how many pages are held 'in-flight' */
pool->pages_state_hold_cnt++;
put_page(page);
continue;
}
- page->pp_magic |= PP_SIGNATURE;
+
+ page_pool_set_pp_info(pool, page);
pool->alloc.cache[pool->alloc.count++] = page;
/* Track how many pages are held 'in-flight' */
pool->pages_state_hold_cnt++;
DMA_ATTR_SKIP_CPU_SYNC);
page_pool_set_dma_addr(page, 0);
skip_dma_unmap:
- page->pp_magic = 0;
+ page_pool_clear_pp_info(page);
/* This may be the last page returned, releasing the pool, so
* it is not safe to reference pool afterwards.
__page_pool_put_page(struct page_pool *pool, struct page *page,
unsigned int dma_sync_size, bool allow_direct)
{
+ /* It is not the last user for the page frag case */
+ if (pool->p.flags & PP_FLAG_PAGE_FRAG &&
+ page_pool_atomic_sub_frag_count_return(page, 1))
+ return NULL;
+
/* This allocator is optimized for the XDP mode that uses
* one-frame-per-page, but have fallbacks that act like the
* regular page allocator APIs.
}
EXPORT_SYMBOL(page_pool_put_page_bulk);
+static struct page *page_pool_drain_frag(struct page_pool *pool,
+ struct page *page)
+{
+ long drain_count = BIAS_MAX - pool->frag_users;
+
+ /* Some user is still using the page frag */
+ if (likely(page_pool_atomic_sub_frag_count_return(page,
+ drain_count)))
+ return NULL;
+
+ if (page_ref_count(page) == 1 && !page_is_pfmemalloc(page)) {
+ if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV)
+ page_pool_dma_sync_for_device(pool, page, -1);
+
+ return page;
+ }
+
+ page_pool_return_page(pool, page);
+ return NULL;
+}
+
+static void page_pool_free_frag(struct page_pool *pool)
+{
+ long drain_count = BIAS_MAX - pool->frag_users;
+ struct page *page = pool->frag_page;
+
+ pool->frag_page = NULL;
+
+ if (!page ||
+ page_pool_atomic_sub_frag_count_return(page, drain_count))
+ return;
+
+ page_pool_return_page(pool, page);
+}
+
+struct page *page_pool_alloc_frag(struct page_pool *pool,
+ unsigned int *offset,
+ unsigned int size, gfp_t gfp)
+{
+ unsigned int max_size = PAGE_SIZE << pool->p.order;
+ struct page *page = pool->frag_page;
+
+ if (WARN_ON(!(pool->p.flags & PP_FLAG_PAGE_FRAG) ||
+ size > max_size))
+ return NULL;
+
+ size = ALIGN(size, dma_get_cache_alignment());
+ *offset = pool->frag_offset;
+
+ if (page && *offset + size > max_size) {
+ page = page_pool_drain_frag(pool, page);
+ if (page)
+ goto frag_reset;
+ }
+
+ if (!page) {
+ page = page_pool_alloc_pages(pool, gfp);
+ if (unlikely(!page)) {
+ pool->frag_page = NULL;
+ return NULL;
+ }
+
+ pool->frag_page = page;
+
+frag_reset:
+ pool->frag_users = 1;
+ *offset = 0;
+ pool->frag_offset = size;
+ page_pool_set_frag_count(page, BIAS_MAX);
+ return page;
+ }
+
+ pool->frag_users++;
+ pool->frag_offset = *offset + size;
+ return page;
+}
+EXPORT_SYMBOL(page_pool_alloc_frag);
+
static void page_pool_empty_ring(struct page_pool *pool)
{
struct page *page;
if (!page_pool_put(pool))
return;
+ page_pool_free_frag(pool);
+
if (!page_pool_release(pool))
return;
* The page will be returned to the pool here regardless of the
* 'flipped' fragment being in use or not.
*/
- page->pp = NULL;
page_pool_put_full_page(pp, page, false);
return true;
* pktgen_xmit() is called
*/
pkt_dev->last_ok = 1;
-
- /* override clone_skb if user passed default value
- * at module loading time
- */
- pkt_dev->clone_skb = 0;
} else if (strcmp(f, "queue_xmit") == 0) {
pkt_dev->xmit_mode = M_QUEUE_XMIT;
pkt_dev->last_ok = 1;
return false;
master = netdev_master_upper_dev_get(dev);
+
+ /* 0 is already used to denote IFLA_MASTER wasn't passed, therefore need
+ * another invalid value for ifindex to denote "no master".
+ */
+ if (master_idx == -1)
+ return !!master;
+
if (!master || master->ifindex != master_idx)
return true;
return -EINVAL;
}
-static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[])
+static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[],
+ struct netlink_ext_ack *extack)
{
if (dev) {
if (tb[IFLA_ADDRESS] &&
return -EOPNOTSUPP;
if (af_ops->validate_link_af) {
- err = af_ops->validate_link_af(dev, af);
+ err = af_ops->validate_link_af(dev, af, extack);
if (err < 0)
return err;
}
const struct net_device_ops *ops = dev->netdev_ops;
int err;
- err = validate_linkmsg(dev, tb);
+ err = validate_linkmsg(dev, tb, extack);
if (err < 0)
return err;
m_ops = master_dev->rtnl_link_ops;
}
- err = validate_linkmsg(dev, tb);
+ err = validate_linkmsg(dev, tb, extack);
if (err < 0)
return err;
}
EXPORT_SYMBOL(skb_realloc_headroom);
+/**
+ * skb_expand_head - reallocate header of &sk_buff
+ * @skb: buffer to reallocate
+ * @headroom: needed headroom
+ *
+ * Unlike skb_realloc_headroom, this one does not allocate a new skb
+ * if possible; copies skb->sk to new skb as needed
+ * and frees original skb in case of failures.
+ *
+ * It expect increased headroom and generates warning otherwise.
+ */
+
+struct sk_buff *skb_expand_head(struct sk_buff *skb, unsigned int headroom)
+{
+ int delta = headroom - skb_headroom(skb);
+
+ if (WARN_ONCE(delta <= 0,
+ "%s is expecting an increase in the headroom", __func__))
+ return skb;
+
+ /* pskb_expand_head() might crash, if skb is shared */
+ if (skb_shared(skb)) {
+ struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
+
+ if (likely(nskb)) {
+ if (skb->sk)
+ skb_set_owner_w(nskb, skb->sk);
+ consume_skb(skb);
+ } else {
+ kfree_skb(skb);
+ }
+ skb = nskb;
+ }
+ if (skb &&
+ pskb_expand_head(skb, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) {
+ kfree_skb(skb);
+ skb = NULL;
+ }
+ return skb;
+}
+EXPORT_SYMBOL(skb_expand_head);
+
/**
* skb_copy_expand - copy and expand sk_buff
* @skb: buffer to copy
memcpy(frag + 1, skbinfo->frags, sizeof(*frag) * skbinfo->nr_frags);
/* We dont need to clear skbinfo->nr_frags here */
- new_truesize = SKB_TRUESIZE(sizeof(struct sk_buff));
+ new_truesize = SKB_DATA_ALIGN(sizeof(struct sk_buff));
delta_truesize = skb->truesize - new_truesize;
skb->truesize = new_truesize;
NAPI_GRO_CB(skb)->free = NAPI_GRO_FREE_STOLEN_HEAD;
return sk_psock_skb_ingress(psock, skb);
}
-static void sock_drop(struct sock *sk, struct sk_buff *skb)
+static void sk_psock_skb_state(struct sk_psock *psock,
+ struct sk_psock_work_state *state,
+ struct sk_buff *skb,
+ int len, int off)
{
- sk_drops_add(sk, skb);
- kfree_skb(skb);
+ spin_lock_bh(&psock->ingress_lock);
+ if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) {
+ state->skb = skb;
+ state->len = len;
+ state->off = off;
+ } else {
+ sock_drop(psock->sk, skb);
+ }
+ spin_unlock_bh(&psock->ingress_lock);
}
static void sk_psock_backlog(struct work_struct *work)
{
struct sk_psock *psock = container_of(work, struct sk_psock, work);
struct sk_psock_work_state *state = &psock->work_state;
- struct sk_buff *skb;
+ struct sk_buff *skb = NULL;
bool ingress;
u32 len, off;
int ret;
mutex_lock(&psock->work_mutex);
- if (state->skb) {
+ if (unlikely(state->skb)) {
+ spin_lock_bh(&psock->ingress_lock);
skb = state->skb;
len = state->len;
off = state->off;
state->skb = NULL;
- goto start;
+ spin_unlock_bh(&psock->ingress_lock);
}
+ if (skb)
+ goto start;
while ((skb = skb_dequeue(&psock->ingress_skb))) {
len = skb->len;
len, ingress);
if (ret <= 0) {
if (ret == -EAGAIN) {
- state->skb = skb;
- state->len = len;
- state->off = off;
+ sk_psock_skb_state(psock, state, skb,
+ len, off);
goto end;
}
/* Hard errors break pipe and stop xmit. */
skb_bpf_redirect_clear(skb);
sock_drop(psock->sk, skb);
}
+ kfree_skb(psock->work_state.skb);
+ /* We null the skb here to ensure that calls to sk_psock_backlog
+ * do not pick up the free'd skb.
+ */
+ psock->work_state.skb = NULL;
__sk_psock_purge_ingress_msg(psock);
}
void sk_psock_drop(struct sock *sk, struct sk_psock *psock)
{
- sk_psock_stop(psock, false);
-
write_lock_bh(&sk->sk_callback_lock);
sk_psock_restore_proto(sk, psock);
rcu_assign_sk_user_data(sk, NULL);
sk_psock_stop_verdict(sk, psock);
write_unlock_bh(&sk->sk_callback_lock);
+ sk_psock_stop(psock, false);
+
INIT_RCU_WORK(&psock->rwork, sk_psock_destroy);
queue_rcu_work(system_wq, &psock->rwork);
}
ret = sock_bindtoindex_locked(sk, val);
break;
+ case SO_BUF_LOCK:
+ if (val & ~SOCK_BUF_LOCK_MASK) {
+ ret = -EINVAL;
+ break;
+ }
+ sk->sk_userlocks = val | (sk->sk_userlocks &
+ ~SOCK_BUF_LOCK_MASK);
+ break;
+
default:
ret = -ENOPROTOOPT;
break;
v.val64 = sock_net(sk)->net_cookie;
break;
+ case SO_BUF_LOCK:
+ v.val = sk->sk_userlocks & SOCK_BUF_LOCK_MASK;
+ break;
+
default:
/* We implement the SO_SNDLOWAT etc to not be settable
* (1003.1g 7).
}
spin_unlock(&dndev_lock);
- if (old)
- dev_put(old);
+ dev_put(old);
return rv;
}
}
spin_unlock(&dndev_lock);
- if (dev)
- dev_put(dev);
+ dev_put(dev);
}
/*
}
change_nexthops(fi) {
- if (nh->nh_dev)
- dev_put(nh->nh_dev);
+ dev_put(nh->nh_dev);
nh->nh_dev = NULL;
} endfor_nexthops(fi);
kfree(fi);
return ofi;
}
- refcount_inc(&fi->fib_treeref);
+ refcount_set(&fi->fib_treeref, 1);
refcount_set(&fi->fib_clntref, 1);
spin_lock(&dn_fib_info_lock);
fi->fib_next = dn_fib_info_list;
if (!fld.daddr) {
fld.daddr = fld.saddr;
- if (dev_out)
- dev_put(dev_out);
+ dev_put(dev_out);
err = -EINVAL;
dev_out = init_net.loopback_dev;
if (!dev_out->dn_ptr)
neigh_release(neigh);
neigh = NULL;
} else {
- if (dev_out)
- dev_put(dev_out);
+ dev_put(dev_out);
if (dn_dev_islocal(neigh->dev, fld.daddr)) {
dev_out = init_net.loopback_dev;
res.type = RTN_LOCAL;
if (res.type == RTN_LOCAL) {
if (!fld.saddr)
fld.saddr = fld.daddr;
- if (dev_out)
- dev_put(dev_out);
+ dev_put(dev_out);
dev_out = init_net.loopback_dev;
dev_hold(dev_out);
if (!dev_out->dn_ptr)
if (!fld.saddr)
fld.saddr = DN_FIB_RES_PREFSRC(res);
- if (dev_out)
- dev_put(dev_out);
+ dev_put(dev_out);
dev_out = DN_FIB_RES_DEV(res);
dev_hold(dev_out);
fld.flowidn_oif = dev_out->ifindex;
neigh_release(neigh);
if (free_res)
dn_fib_res_put(&res);
- if (dev_out)
- dev_put(dev_out);
+ dev_put(dev_out);
out:
return err;
if (free_res)
dn_fib_res_put(&res);
dev_put(in_dev);
- if (out_dev)
- dev_put(out_dev);
+ dev_put(out_dev);
out:
return err;
config NET_DSA_TAG_SJA1105
tristate "Tag driver for NXP SJA1105 switches"
+ depends on (NET_DSA_SJA1105 && NET_DSA_SJA1105_PTP) || !NET_DSA_SJA1105 || !NET_DSA_SJA1105_PTP
select PACKING
help
Say Y or M if you want to enable support for tagging frames with the
if (!skb)
return 0;
- nskb = cpu_dp->rcv(skb, dev, pt);
+ nskb = cpu_dp->rcv(skb, dev);
if (!nskb) {
kfree_skb(skb);
return 0;
return NULL;
}
+/* Assign the default CPU port (the first one in the tree) to all ports of the
+ * fabric which don't already have one as part of their own switch.
+ */
static int dsa_tree_setup_default_cpu(struct dsa_switch_tree *dst)
{
struct dsa_port *cpu_dp, *dp;
return -EINVAL;
}
- /* Assign the default CPU port to all ports of the fabric */
- list_for_each_entry(dp, &dst->ports, list)
+ list_for_each_entry(dp, &dst->ports, list) {
+ if (dp->cpu_dp)
+ continue;
+
if (dsa_port_is_user(dp) || dsa_port_is_dsa(dp))
dp->cpu_dp = cpu_dp;
+ }
return 0;
}
-static void dsa_tree_teardown_default_cpu(struct dsa_switch_tree *dst)
+/* Perform initial assignment of CPU ports to user ports and DSA links in the
+ * fabric, giving preference to CPU ports local to each switch. Default to
+ * using the first CPU port in the switch tree if the port does not have a CPU
+ * port local to this switch.
+ */
+static int dsa_tree_setup_cpu_ports(struct dsa_switch_tree *dst)
+{
+ struct dsa_port *cpu_dp, *dp;
+
+ list_for_each_entry(cpu_dp, &dst->ports, list) {
+ if (!dsa_port_is_cpu(cpu_dp))
+ continue;
+
+ list_for_each_entry(dp, &dst->ports, list) {
+ /* Prefer a local CPU port */
+ if (dp->ds != cpu_dp->ds)
+ continue;
+
+ /* Prefer the first local CPU port found */
+ if (dp->cpu_dp)
+ continue;
+
+ if (dsa_port_is_user(dp) || dsa_port_is_dsa(dp))
+ dp->cpu_dp = cpu_dp;
+ }
+ }
+
+ return dsa_tree_setup_default_cpu(dst);
+}
+
+static void dsa_tree_teardown_cpu_ports(struct dsa_switch_tree *dst)
{
struct dsa_port *dp;
/* Add the switch to devlink before calling setup, so that setup can
* add dpipe tables
*/
- ds->devlink = devlink_alloc(&dsa_devlink_ops, sizeof(*dl_priv));
+ ds->devlink =
+ devlink_alloc(&dsa_devlink_ops, sizeof(*dl_priv), ds->dev);
if (!ds->devlink)
return -ENOMEM;
dl_priv = devlink_priv(ds->devlink);
dl_priv->ds = ds;
- err = devlink_register(ds->devlink, ds->dev);
+ err = devlink_register(ds->devlink);
if (err)
goto free_devlink;
if (!complete)
return 0;
- err = dsa_tree_setup_default_cpu(dst);
+ err = dsa_tree_setup_cpu_ports(dst);
if (err)
return err;
err = dsa_tree_setup_switches(dst);
if (err)
- goto teardown_default_cpu;
+ goto teardown_cpu_ports;
err = dsa_tree_setup_master(dst);
if (err)
dsa_tree_teardown_master(dst);
teardown_switches:
dsa_tree_teardown_switches(dst);
-teardown_default_cpu:
- dsa_tree_teardown_default_cpu(dst);
+teardown_cpu_ports:
+ dsa_tree_teardown_cpu_ports(dst);
return err;
}
dsa_tree_teardown_switches(dst);
- dsa_tree_teardown_default_cpu(dst);
+ dsa_tree_teardown_cpu_ports(dst);
list_for_each_entry_safe(dl, next, &dst->rtable, list) {
list_del(&dl->list);
/* port.c */
void dsa_port_set_tag_protocol(struct dsa_port *cpu_dp,
const struct dsa_device_ops *tag_ops);
-int dsa_port_set_state(struct dsa_port *dp, u8 state);
+int dsa_port_set_state(struct dsa_port *dp, u8 state, bool do_fast_age);
int dsa_port_enable_rt(struct dsa_port *dp, struct phy_device *phy);
int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy);
void dsa_port_disable_rt(struct dsa_port *dp);
int dsa_port_pre_bridge_flags(const struct dsa_port *dp,
struct switchdev_brport_flags flags,
struct netlink_ext_ack *extack);
-int dsa_port_bridge_flags(const struct dsa_port *dp,
+int dsa_port_bridge_flags(struct dsa_port *dp,
struct switchdev_brport_flags flags,
struct netlink_ext_ack *extack);
-int dsa_port_mrouter(struct dsa_port *dp, bool mrouter,
- struct netlink_ext_ack *extack);
int dsa_port_vlan_add(struct dsa_port *dp,
const struct switchdev_obj_port_vlan *vlan,
struct netlink_ext_ack *extack);
return dsa_tree_notify(dp->ds->dst, e, v);
}
-int dsa_port_set_state(struct dsa_port *dp, u8 state)
+static void dsa_port_notify_bridge_fdb_flush(const struct dsa_port *dp)
+{
+ struct net_device *brport_dev = dsa_port_to_bridge_port(dp);
+ struct switchdev_notifier_fdb_info info = {
+ /* flush all VLANs */
+ .vid = 0,
+ };
+
+ /* When the port becomes standalone it has already left the bridge.
+ * Don't notify the bridge in that case.
+ */
+ if (!brport_dev)
+ return;
+
+ call_switchdev_notifiers(SWITCHDEV_FDB_FLUSH_TO_BRIDGE,
+ brport_dev, &info.info, NULL);
+}
+
+static void dsa_port_fast_age(const struct dsa_port *dp)
+{
+ struct dsa_switch *ds = dp->ds;
+
+ if (!ds->ops->port_fast_age)
+ return;
+
+ ds->ops->port_fast_age(ds, dp->index);
+
+ dsa_port_notify_bridge_fdb_flush(dp);
+}
+
+static bool dsa_port_can_configure_learning(struct dsa_port *dp)
+{
+ struct switchdev_brport_flags flags = {
+ .mask = BR_LEARNING,
+ };
+ struct dsa_switch *ds = dp->ds;
+ int err;
+
+ if (!ds->ops->port_bridge_flags || !ds->ops->port_pre_bridge_flags)
+ return false;
+
+ err = ds->ops->port_pre_bridge_flags(ds, dp->index, flags, NULL);
+ return !err;
+}
+
+int dsa_port_set_state(struct dsa_port *dp, u8 state, bool do_fast_age)
{
struct dsa_switch *ds = dp->ds;
int port = dp->index;
ds->ops->port_stp_state_set(ds, port, state);
- if (ds->ops->port_fast_age) {
+ if (!dsa_port_can_configure_learning(dp) ||
+ (do_fast_age && dp->learning)) {
/* Fast age FDB entries or flush appropriate forwarding database
* for the given port, if we are moving it from Learning or
* Forwarding state, to Disabled or Blocking or Listening state.
+ * Ports that were standalone before the STP state change don't
+ * need to fast age the FDB, since address learning is off in
+ * standalone mode.
*/
if ((dp->stp_state == BR_STATE_LEARNING ||
(state == BR_STATE_DISABLED ||
state == BR_STATE_BLOCKING ||
state == BR_STATE_LISTENING))
- ds->ops->port_fast_age(ds, port);
+ dsa_port_fast_age(dp);
}
dp->stp_state = state;
return 0;
}
-static void dsa_port_set_state_now(struct dsa_port *dp, u8 state)
+static void dsa_port_set_state_now(struct dsa_port *dp, u8 state,
+ bool do_fast_age)
{
int err;
- err = dsa_port_set_state(dp, state);
+ err = dsa_port_set_state(dp, state, do_fast_age);
if (err)
pr_err("DSA: failed to set STP state %u (%d)\n", state, err);
}
}
if (!dp->bridge_dev)
- dsa_port_set_state_now(dp, BR_STATE_FORWARDING);
+ dsa_port_set_state_now(dp, BR_STATE_FORWARDING, false);
if (dp->pl)
phylink_start(dp->pl);
phylink_stop(dp->pl);
if (!dp->bridge_dev)
- dsa_port_set_state_now(dp, BR_STATE_DISABLED);
+ dsa_port_set_state_now(dp, BR_STATE_DISABLED, false);
if (ds->ops->port_disable)
ds->ops->port_disable(ds, port);
if (err)
return err;
- err = dsa_port_set_state(dp, br_port_get_stp_state(brport_dev));
+ err = dsa_port_set_state(dp, br_port_get_stp_state(brport_dev), false);
if (err && err != -EOPNOTSUPP)
return err;
if (err && err != -EOPNOTSUPP)
return err;
- err = dsa_port_mrouter(dp->cpu_dp, br_multicast_router(br), extack);
- if (err && err != -EOPNOTSUPP)
- return err;
-
err = dsa_port_ageing_time(dp, br_get_ageing_time(br));
if (err && err != -EOPNOTSUPP)
return err;
/* Port left the bridge, put in BR_STATE_DISABLED by the bridge layer,
* so allow it to be in BR_STATE_FORWARDING to be kept functional
*/
- dsa_port_set_state_now(dp, BR_STATE_FORWARDING);
+ dsa_port_set_state_now(dp, BR_STATE_FORWARDING, true);
/* VLAN filtering is handled by dsa_switch_bridge_leave */
- /* Some drivers treat the notification for having a local multicast
- * router by allowing multicast to be flooded to the CPU, so we should
- * allow this in standalone mode too.
- */
- dsa_port_mrouter(dp->cpu_dp, true, NULL);
-
/* Ageing time may be global to the switch chip, so don't change it
* here because we have no good reason (or value) to change it to.
*/
return ds->ops->port_pre_bridge_flags(ds, dp->index, flags, extack);
}
-int dsa_port_bridge_flags(const struct dsa_port *dp,
+int dsa_port_bridge_flags(struct dsa_port *dp,
struct switchdev_brport_flags flags,
struct netlink_ext_ack *extack)
{
struct dsa_switch *ds = dp->ds;
+ int err;
if (!ds->ops->port_bridge_flags)
return -EOPNOTSUPP;
- return ds->ops->port_bridge_flags(ds, dp->index, flags, extack);
-}
+ err = ds->ops->port_bridge_flags(ds, dp->index, flags, extack);
+ if (err)
+ return err;
-int dsa_port_mrouter(struct dsa_port *dp, bool mrouter,
- struct netlink_ext_ack *extack)
-{
- struct dsa_switch *ds = dp->ds;
+ if (flags.mask & BR_LEARNING) {
+ bool learning = flags.val & BR_LEARNING;
- if (!ds->ops->port_set_mrouter)
- return -EOPNOTSUPP;
+ if (learning == dp->learning)
+ return 0;
+
+ if ((dp->learning && !learning) &&
+ (dp->stp_state == BR_STATE_LEARNING ||
+ dp->stp_state == BR_STATE_FORWARDING))
+ dsa_port_fast_age(dp);
- return ds->ops->port_set_mrouter(ds, dp->index, mrouter, extack);
+ dp->learning = learning;
+ }
+
+ return 0;
}
int dsa_port_mtu_change(struct dsa_port *dp, int new_mtu,
if (!dsa_port_offloads_bridge_port(dp, attr->orig_dev))
return -EOPNOTSUPP;
- ret = dsa_port_set_state(dp, attr->u.stp_state);
+ ret = dsa_port_set_state(dp, attr->u.stp_state, true);
break;
case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING:
if (!dsa_port_offloads_bridge(dp, attr->orig_dev))
ret = dsa_port_bridge_flags(dp, attr->u.brport_flags, extack);
break;
- case SWITCHDEV_ATTR_ID_BRIDGE_MROUTER:
- if (!dsa_port_offloads_bridge(dp, attr->orig_dev))
- return -EOPNOTSUPP;
-
- ret = dsa_port_mrouter(dp->cpu_dp, attr->u.mrouter, extack);
- break;
default:
ret = -EOPNOTSUPP;
break;
}
static struct sk_buff *ar9331_tag_rcv(struct sk_buff *skb,
- struct net_device *ndev,
- struct packet_type *pt)
+ struct net_device *ndev)
{
u8 ver, port;
u16 hdr;
*/
static struct sk_buff *brcm_tag_rcv_ll(struct sk_buff *skb,
struct net_device *dev,
- struct packet_type *pt,
unsigned int offset)
{
int source_port;
}
-static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt)
+static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev)
{
struct sk_buff *nskb;
/* skb->data points to the EtherType, the tag is right before it */
- nskb = brcm_tag_rcv_ll(skb, dev, pt, 2);
+ nskb = brcm_tag_rcv_ll(skb, dev, 2);
if (!nskb)
return nskb;
}
static struct sk_buff *brcm_leg_tag_rcv(struct sk_buff *skb,
- struct net_device *dev,
- struct packet_type *pt)
+ struct net_device *dev)
{
int source_port;
u8 *brcm_tag;
}
static struct sk_buff *brcm_tag_rcv_prepend(struct sk_buff *skb,
- struct net_device *dev,
- struct packet_type *pt)
+ struct net_device *dev)
{
/* tag is prepended to the packet */
- return brcm_tag_rcv_ll(skb, dev, pt, ETH_HLEN);
+ return brcm_tag_rcv_ll(skb, dev, ETH_HLEN);
}
static const struct dsa_device_ops brcm_prepend_netdev_ops = {
return dsa_xmit_ll(skb, dev, 0);
}
-static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt)
+static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev)
{
if (unlikely(!pskb_may_pull(skb, DSA_HLEN)))
return NULL;
return skb;
}
-static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt)
+static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev)
{
if (unlikely(!pskb_may_pull(skb, EDSA_HLEN)))
return NULL;
}
static struct sk_buff *gswip_tag_rcv(struct sk_buff *skb,
- struct net_device *dev,
- struct packet_type *pt)
+ struct net_device *dev)
{
int port;
u8 *gswip_tag;
}
static struct sk_buff *hellcreek_rcv(struct sk_buff *skb,
- struct net_device *dev,
- struct packet_type *pt)
+ struct net_device *dev)
{
/* Tag decoding */
u8 *tag = skb_tail_pointer(skb) - HELLCREEK_TAG_LEN;
return skb;
}
-static struct sk_buff *ksz8795_rcv(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt)
+static struct sk_buff *ksz8795_rcv(struct sk_buff *skb, struct net_device *dev)
{
u8 *tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN;
return skb;
}
-static struct sk_buff *ksz9477_rcv(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt)
+static struct sk_buff *ksz9477_rcv(struct sk_buff *skb, struct net_device *dev)
{
/* Tag decoding */
u8 *tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN;
return skb;
}
-static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt)
+static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev)
{
__be16 *lan9303_tag;
u16 lan9303_tag1;
return skb;
}
-static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt)
+static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev)
{
u16 hdr;
int port;
}
static struct sk_buff *ocelot_rcv(struct sk_buff *skb,
- struct net_device *netdev,
- struct packet_type *pt)
+ struct net_device *netdev)
{
u64 src_port, qos_class;
u64 vlan_tci, tag_type;
}
static struct sk_buff *ocelot_rcv(struct sk_buff *skb,
- struct net_device *netdev,
- struct packet_type *pt)
+ struct net_device *netdev)
{
int src_port, switch_id;
return skb;
}
-static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt)
+static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev)
{
u8 ver;
u16 hdr;
}
static struct sk_buff *rtl4a_tag_rcv(struct sk_buff *skb,
- struct net_device *dev,
- struct packet_type *pt)
+ struct net_device *dev)
{
u16 protport;
__be16 *p;
}
static struct sk_buff *sja1105_rcv(struct sk_buff *skb,
- struct net_device *netdev,
- struct packet_type *pt)
+ struct net_device *netdev)
{
int source_port = -1, switch_id = -1;
struct sja1105_meta meta = {0};
}
static struct sk_buff *sja1110_rcv(struct sk_buff *skb,
- struct net_device *netdev,
- struct packet_type *pt)
+ struct net_device *netdev)
{
int source_port = -1, switch_id = -1;
bool host_only = false;
- u16 vid;
+ u16 vid = 0;
if (sja1110_skb_has_inband_control_extension(skb)) {
skb = sja1110_rcv_inband_control_extension(skb, &source_port,
return skb;
}
-static struct sk_buff *trailer_rcv(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt)
+static struct sk_buff *trailer_rcv(struct sk_buff *skb, struct net_device *dev)
{
u8 *trailer;
int source_port;
return skb;
}
-static struct sk_buff *xrs700x_rcv(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt)
+static struct sk_buff *xrs700x_rcv(struct sk_buff *skb, struct net_device *dev)
{
int source_port;
u8 *trailer;
#include <linux/uaccess.h>
#include <net/pkt_sched.h>
-__setup("ether=", netdev_boot_setup);
-
/**
* eth_header - create the Ethernet header
* @skb: buffer to alter
#include <linux/rtnetlink.h>
#include <linux/sched/signal.h>
#include <linux/net.h>
+#include <linux/pm_runtime.h>
#include <net/devlink.h>
#include <net/xdp_sock_drv.h>
#include <net/flow_offload.h>
int rc;
netdev_features_t old_features;
- if (!dev || !netif_device_present(dev))
+ if (!dev)
return -ENODEV;
if (copy_from_user(ðcmd, useraddr, sizeof(ethcmd)))
return -EPERM;
}
+ if (dev->dev.parent)
+ pm_runtime_get_sync(dev->dev.parent);
+
+ if (!netif_device_present(dev)) {
+ rc = -ENODEV;
+ goto out;
+ }
+
if (dev->ethtool_ops->begin) {
rc = dev->ethtool_ops->begin(dev);
- if (rc < 0)
- return rc;
+ if (rc < 0)
+ goto out;
}
old_features = dev->features;
if (old_features != dev->features)
netdev_features_change(dev);
+out:
+ if (dev->dev.parent)
+ pm_runtime_put(dev->dev.parent);
return rc;
}
#include <net/sock.h>
#include <linux/ethtool_netlink.h>
+#include <linux/pm_runtime.h>
#include "netlink.h"
static struct genl_family ethtool_genl_family;
ETHTOOL_FLAGS_STATS),
};
+int ethnl_ops_begin(struct net_device *dev)
+{
+ int ret;
+
+ if (!dev)
+ return -ENODEV;
+
+ if (dev->dev.parent)
+ pm_runtime_get_sync(dev->dev.parent);
+
+ if (!netif_device_present(dev)) {
+ ret = -ENODEV;
+ goto err;
+ }
+
+ if (dev->ethtool_ops->begin) {
+ ret = dev->ethtool_ops->begin(dev);
+ if (ret)
+ goto err;
+ }
+
+ return 0;
+err:
+ if (dev->dev.parent)
+ pm_runtime_put(dev->dev.parent);
+
+ return ret;
+}
+
+void ethnl_ops_complete(struct net_device *dev)
+{
+ if (dev->ethtool_ops->complete)
+ dev->ethtool_ops->complete(dev);
+
+ if (dev->dev.parent)
+ pm_runtime_put(dev->dev.parent);
+}
+
/**
* ethnl_parse_header_dev_get() - parse request header
* @req_info: structure to put results into
return -EINVAL;
}
- if (dev && !netif_device_present(dev)) {
- dev_put(dev);
- NL_SET_ERR_MSG(extack, "device not present");
- return -ENODEV;
- }
-
req_info->dev = dev;
req_info->flags = flags;
return 0;
ops->cleanup_data(reply_data);
genlmsg_end(rskb, reply_payload);
- if (req_info->dev)
- dev_put(req_info->dev);
+ dev_put(req_info->dev);
kfree(reply_data);
kfree(req_info);
return genlmsg_reply(rskb, info);
if (ops->cleanup_data)
ops->cleanup_data(reply_data);
err_dev:
- if (req_info->dev)
- dev_put(req_info->dev);
+ dev_put(req_info->dev);
kfree(reply_data);
kfree(req_info);
return ret;
struct net_device *dev;
};
-static inline int ethnl_ops_begin(struct net_device *dev)
-{
- if (dev && dev->ethtool_ops->begin)
- return dev->ethtool_ops->begin(dev);
- else
- return 0;
-}
-
-static inline void ethnl_ops_complete(struct net_device *dev)
-{
- if (dev && dev->ethtool_ops->complete)
- dev->ethtool_ops->complete(dev);
-}
+int ethnl_ops_begin(struct net_device *dev);
+void ethnl_ops_complete(struct net_device *dev);
/**
* struct ethnl_request_ops - unified handling of GET requests
out_dev:
wpan_phy_put(phy);
out:
- if (dev)
- dev_put(dev);
+ dev_put(dev);
return rc;
}
if (ops->internal_flags & NL802154_FLAG_NEED_WPAN_DEV) {
struct wpan_dev *wpan_dev = info->user_ptr[1];
- if (wpan_dev->netdev)
- dev_put(wpan_dev->netdev);
+ dev_put(wpan_dev->netdev);
} else {
dev_put(info->user_ptr[1]);
}
ieee802154_devaddr_to_raw(hwaddr, addr->extended_addr);
rcu_read_lock();
dev = dev_getbyhwaddr_rcu(net, ARPHRD_IEEE802154, hwaddr);
- if (dev)
- dev_hold(dev);
+ dev_hold(dev);
rcu_read_unlock();
break;
case IEEE802154_ADDR_SHORT:
};
static int inet_validate_link_af(const struct net_device *dev,
- const struct nlattr *nla)
+ const struct nlattr *nla,
+ struct netlink_ext_ack *extack)
{
struct nlattr *a, *tb[IFLA_INET_MAX+1];
int err, rem;
return -EAFNOSUPPORT;
err = nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla,
- inet_af_policy, NULL);
+ inet_af_policy, extack);
if (err < 0)
return err;
void fib_nh_common_release(struct fib_nh_common *nhc)
{
- if (nhc->nhc_dev)
- dev_put(nhc->nhc_dev);
-
+ dev_put(nhc->nhc_dev);
lwtstate_put(nhc->nhc_lwtstate);
rt_fibinfo_free_cpus(nhc->nhc_pcpu_rth_output);
rt_fibinfo_free(&nhc->nhc_rth_input);
return ofi;
}
- refcount_inc(&fi->fib_treeref);
+ refcount_set(&fi->fib_treeref, 1);
refcount_set(&fi->fib_clntref, 1);
spin_lock_bh(&fib_info_lock);
hlist_add_head(&fi->fib_hash,
sizeof(struct in6_addr))
goto send_mal_query;
dev = ipv6_stub->ipv6_dev_find(net, &iio->ident.addr.ip_addr.ipv6_addr, dev);
- if (dev)
- dev_hold(dev);
+ dev_hold(dev);
break;
#endif
default:
iml->sfmode, psf->sl_count, psf->sl_addr, 0);
RCU_INIT_POINTER(iml->sflist, NULL);
/* decrease mem now to avoid the memleak warning */
- atomic_sub(IP_SFLSIZE(psf->sl_max), &sk->sk_omem_alloc);
+ atomic_sub(struct_size(psf, sl_addr, psf->sl_max), &sk->sk_omem_alloc);
kfree_rcu(psf, rcu);
return err;
}
if (psl)
count += psl->sl_max;
- newpsl = sock_kmalloc(sk, IP_SFLSIZE(count), GFP_KERNEL);
+ newpsl = sock_kmalloc(sk, struct_size(newpsl, sl_addr, count),
+ GFP_KERNEL);
if (!newpsl) {
err = -ENOBUFS;
goto done;
for (i = 0; i < psl->sl_count; i++)
newpsl->sl_addr[i] = psl->sl_addr[i];
/* decrease mem now to avoid the memleak warning */
- atomic_sub(IP_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc);
+ atomic_sub(struct_size(psl, sl_addr, psl->sl_max),
+ &sk->sk_omem_alloc);
kfree_rcu(psl, rcu);
}
rcu_assign_pointer(pmc->sflist, newpsl);
goto done;
}
if (msf->imsf_numsrc) {
- newpsl = sock_kmalloc(sk, IP_SFLSIZE(msf->imsf_numsrc),
- GFP_KERNEL);
+ newpsl = sock_kmalloc(sk, struct_size(newpsl, sl_addr,
+ msf->imsf_numsrc),
+ GFP_KERNEL);
if (!newpsl) {
err = -ENOBUFS;
goto done;
}
newpsl->sl_max = newpsl->sl_count = msf->imsf_numsrc;
- memcpy(newpsl->sl_addr, msf->imsf_slist,
- msf->imsf_numsrc * sizeof(msf->imsf_slist[0]));
+ memcpy(newpsl->sl_addr, msf->imsf_slist_flex,
+ flex_array_size(msf, imsf_slist_flex, msf->imsf_numsrc));
err = ip_mc_add_src(in_dev, &msf->imsf_multiaddr,
msf->imsf_fmode, newpsl->sl_count, newpsl->sl_addr, 0);
if (err) {
- sock_kfree_s(sk, newpsl, IP_SFLSIZE(newpsl->sl_max));
+ sock_kfree_s(sk, newpsl,
+ struct_size(newpsl, sl_addr,
+ newpsl->sl_max));
goto done;
}
} else {
(void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode,
psl->sl_count, psl->sl_addr, 0);
/* decrease mem now to avoid the memleak warning */
- atomic_sub(IP_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc);
+ atomic_sub(struct_size(psl, sl_addr, psl->sl_max),
+ &sk->sk_omem_alloc);
kfree_rcu(psl, rcu);
} else
(void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode,
count = psl->sl_count;
}
copycount = count < msf->imsf_numsrc ? count : msf->imsf_numsrc;
- len = copycount * sizeof(psl->sl_addr[0]);
+ len = flex_array_size(psl, sl_addr, copycount);
msf->imsf_numsrc = count;
if (put_user(IP_MSFILTER_SIZE(copycount), optlen) ||
copy_to_user(optval, msf, IP_MSFILTER_SIZE(0))) {
return -EFAULT;
}
if (len &&
- copy_to_user(&optval->imsf_slist[0], psl->sl_addr, len))
+ copy_to_user(&optval->imsf_slist_flex[0], psl->sl_addr, len))
return -EFAULT;
return 0;
done:
} else if (rt->rt_type == RTN_BROADCAST)
IP_UPD_PO_STATS(net, IPSTATS_MIB_OUTBCAST, skb->len);
- /* Be paranoid, rather than too clever. */
if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
- struct sk_buff *skb2;
-
- skb2 = skb_realloc_headroom(skb, LL_RESERVED_SPACE(dev));
- if (!skb2) {
- kfree_skb(skb);
+ skb = skb_expand_head(skb, hh_len);
+ if (!skb)
return -ENOMEM;
- }
- if (skb->sk)
- skb_set_owner_w(skb2, skb->sk);
- consume_skb(skb);
- skb = skb2;
}
if (lwtunnel_xmit_redirect(dst->lwtstate)) {
struct sockaddr_storage *group,
struct sockaddr_storage *list)
{
- int msize = IP_MSFILTER_SIZE(numsrc);
struct ip_msfilter *msf;
struct sockaddr_in *psin;
int err, i;
- msf = kmalloc(msize, GFP_KERNEL);
+ msf = kmalloc(IP_MSFILTER_SIZE(numsrc), GFP_KERNEL);
if (!msf)
return -ENOBUFS;
if (psin->sin_family != AF_INET)
goto Eaddrnotavail;
- msf->imsf_slist[i] = psin->sin_addr.s_addr;
+ msf->imsf_slist_flex[i] = psin->sin_addr.s_addr;
}
err = ip_mc_msfilter(sk, msf, ifindex);
kfree(msf);
goto out_free_gsf;
err = set_mcast_msfilter(sk, gsf->gf_interface, gsf->gf_numsrc,
- gsf->gf_fmode, &gsf->gf_group, gsf->gf_slist);
+ gsf->gf_fmode, &gsf->gf_group,
+ gsf->gf_slist_flex);
out_free_gsf:
kfree(gsf);
return err;
static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
int optlen)
{
- const int size0 = offsetof(struct compat_group_filter, gf_slist);
+ const int size0 = offsetof(struct compat_group_filter, gf_slist_flex);
struct compat_group_filter *gf32;
unsigned int n;
void *p;
p = kmalloc(optlen + 4, GFP_KERNEL);
if (!p)
return -ENOMEM;
- gf32 = p + 4; /* we want ->gf_group and ->gf_slist aligned */
+ gf32 = p + 4; /* we want ->gf_group and ->gf_slist_flex aligned */
err = -EFAULT;
if (copy_from_sockptr(gf32, optval, optlen))
goto out_free_gsf;
err = -EINVAL;
- if (offsetof(struct compat_group_filter, gf_slist[n]) > optlen)
+ if (offsetof(struct compat_group_filter, gf_slist_flex[n]) > optlen)
goto out_free_gsf;
/* numsrc >= (4G-140)/128 overflow in 32 bits */
if (n > sock_net(sk)->ipv4.sysctl_igmp_max_msf)
goto out_free_gsf;
err = set_mcast_msfilter(sk, gf32->gf_interface, n, gf32->gf_fmode,
- &gf32->gf_group, gf32->gf_slist);
+ &gf32->gf_group, gf32->gf_slist_flex);
out_free_gsf:
kfree(p);
return err;
static int ip_get_mcast_msfilter(struct sock *sk, void __user *optval,
int __user *optlen, int len)
{
- const int size0 = offsetof(struct group_filter, gf_slist);
+ const int size0 = offsetof(struct group_filter, gf_slist_flex);
struct group_filter __user *p = optval;
struct group_filter gsf;
int num;
return -EFAULT;
num = gsf.gf_numsrc;
- err = ip_mc_gsfget(sk, &gsf, p->gf_slist);
+ err = ip_mc_gsfget(sk, &gsf, p->gf_slist_flex);
if (err)
return err;
if (gsf.gf_numsrc < num)
static int compat_ip_get_mcast_msfilter(struct sock *sk, void __user *optval,
int __user *optlen, int len)
{
- const int size0 = offsetof(struct compat_group_filter, gf_slist);
+ const int size0 = offsetof(struct compat_group_filter, gf_slist_flex);
struct compat_group_filter __user *p = optval;
struct compat_group_filter gf32;
struct group_filter gf;
num = gf.gf_numsrc = gf32.gf_numsrc;
gf.gf_group = gf32.gf_group;
- err = ip_mc_gsfget(sk, &gf, p->gf_slist);
+ err = ip_mc_gsfget(sk, &gf, p->gf_slist_flex);
if (err)
return err;
if (gf.gf_numsrc < num)
tunnel->i_seqno = ntohl(tpi->seq) + 1;
}
- skb_reset_network_header(skb);
+ skb_set_network_header(skb, (tunnel->dev->type == ARPHRD_ETHER) ? ETH_HLEN : 0);
err = IP_ECN_decapsulate(iph, skb);
if (unlikely(err)) {
struct rt_cache_stat *st = v;
if (v == SEQ_START_TOKEN) {
- seq_printf(seq, "entries in_hit in_slow_tot in_slow_mc in_no_route in_brd in_martian_dst in_martian_src out_hit out_slow_tot out_slow_mc gc_total gc_ignored gc_goal_miss gc_dst_overflow in_hlist_search out_hlist_search\n");
+ seq_puts(seq, "entries in_hit in_slow_tot in_slow_mc in_no_route in_brd in_martian_dst in_martian_src out_hit out_slow_tot out_slow_mc gc_total gc_ignored gc_goal_miss gc_dst_overflow in_hlist_search out_hlist_search\n");
return 0;
}
- seq_printf(seq,"%08x %08x %08x %08x %08x %08x %08x %08x "
- " %08x %08x %08x %08x %08x %08x %08x %08x %08x \n",
+ seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x "
+ "%08x %08x %08x %08x %08x %08x "
+ "%08x %08x %08x %08x\n",
dst_entries_get_slow(&ipv4_dst_ops),
0, /* st->in_hit */
st->in_slow_tot,
new->output = dst_discard_out;
new->dev = net->loopback_dev;
- if (new->dev)
- dev_hold(new->dev);
+ dev_hold(new->dev);
rt->rt_is_input = ort->rt_is_input;
rt->rt_iif = ort->rt_iif;
#ifdef CONFIG_PROC_FS
/* Proc filesystem TCP sock list dumping. */
-/*
- * Get next listener socket follow cur. If cur is NULL, get first socket
- * starting from bucket given in st->bucket; when st->bucket is zero the
- * very first socket in the hash table is returned.
+static unsigned short seq_file_family(const struct seq_file *seq);
+
+static bool seq_sk_match(struct seq_file *seq, const struct sock *sk)
+{
+ unsigned short family = seq_file_family(seq);
+
+ /* AF_UNSPEC is used as a match all */
+ return ((family == AF_UNSPEC || family == sk->sk_family) &&
+ net_eq(sock_net(sk), seq_file_net(seq)));
+}
+
+/* Find a non empty bucket (starting from st->bucket)
+ * and return the first sk from it.
*/
-static void *listening_get_next(struct seq_file *seq, void *cur)
+static void *listening_get_first(struct seq_file *seq)
{
- struct tcp_seq_afinfo *afinfo;
struct tcp_iter_state *st = seq->private;
- struct net *net = seq_file_net(seq);
- struct inet_listen_hashbucket *ilb;
- struct hlist_nulls_node *node;
- struct sock *sk = cur;
- if (st->bpf_seq_afinfo)
- afinfo = st->bpf_seq_afinfo;
- else
- afinfo = PDE_DATA(file_inode(seq->file));
+ st->offset = 0;
+ for (; st->bucket <= tcp_hashinfo.lhash2_mask; st->bucket++) {
+ struct inet_listen_hashbucket *ilb2;
+ struct inet_connection_sock *icsk;
+ struct sock *sk;
- if (!sk) {
-get_head:
- ilb = &tcp_hashinfo.listening_hash[st->bucket];
- spin_lock(&ilb->lock);
- sk = sk_nulls_head(&ilb->nulls_head);
- st->offset = 0;
- goto get_sk;
+ ilb2 = &tcp_hashinfo.lhash2[st->bucket];
+ if (hlist_empty(&ilb2->head))
+ continue;
+
+ spin_lock(&ilb2->lock);
+ inet_lhash2_for_each_icsk(icsk, &ilb2->head) {
+ sk = (struct sock *)icsk;
+ if (seq_sk_match(seq, sk))
+ return sk;
+ }
+ spin_unlock(&ilb2->lock);
}
- ilb = &tcp_hashinfo.listening_hash[st->bucket];
+
+ return NULL;
+}
+
+/* Find the next sk of "cur" within the same bucket (i.e. st->bucket).
+ * If "cur" is the last one in the st->bucket,
+ * call listening_get_first() to return the first sk of the next
+ * non empty bucket.
+ */
+static void *listening_get_next(struct seq_file *seq, void *cur)
+{
+ struct tcp_iter_state *st = seq->private;
+ struct inet_listen_hashbucket *ilb2;
+ struct inet_connection_sock *icsk;
+ struct sock *sk = cur;
+
++st->num;
++st->offset;
- sk = sk_nulls_next(sk);
-get_sk:
- sk_nulls_for_each_from(sk, node) {
- if (!net_eq(sock_net(sk), net))
- continue;
- if (afinfo->family == AF_UNSPEC ||
- sk->sk_family == afinfo->family)
+ icsk = inet_csk(sk);
+ inet_lhash2_for_each_icsk_continue(icsk) {
+ sk = (struct sock *)icsk;
+ if (seq_sk_match(seq, sk))
return sk;
}
- spin_unlock(&ilb->lock);
- st->offset = 0;
- if (++st->bucket < INET_LHTABLE_SIZE)
- goto get_head;
- return NULL;
+
+ ilb2 = &tcp_hashinfo.lhash2[st->bucket];
+ spin_unlock(&ilb2->lock);
+ ++st->bucket;
+ return listening_get_first(seq);
}
static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
st->bucket = 0;
st->offset = 0;
- rc = listening_get_next(seq, NULL);
+ rc = listening_get_first(seq);
while (rc && *pos) {
rc = listening_get_next(seq, rc);
*/
static void *established_get_first(struct seq_file *seq)
{
- struct tcp_seq_afinfo *afinfo;
struct tcp_iter_state *st = seq->private;
- struct net *net = seq_file_net(seq);
- void *rc = NULL;
-
- if (st->bpf_seq_afinfo)
- afinfo = st->bpf_seq_afinfo;
- else
- afinfo = PDE_DATA(file_inode(seq->file));
st->offset = 0;
for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
spin_lock_bh(lock);
sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
- if ((afinfo->family != AF_UNSPEC &&
- sk->sk_family != afinfo->family) ||
- !net_eq(sock_net(sk), net)) {
- continue;
- }
- rc = sk;
- goto out;
+ if (seq_sk_match(seq, sk))
+ return sk;
}
spin_unlock_bh(lock);
}
-out:
- return rc;
+
+ return NULL;
}
static void *established_get_next(struct seq_file *seq, void *cur)
{
- struct tcp_seq_afinfo *afinfo;
struct sock *sk = cur;
struct hlist_nulls_node *node;
struct tcp_iter_state *st = seq->private;
- struct net *net = seq_file_net(seq);
-
- if (st->bpf_seq_afinfo)
- afinfo = st->bpf_seq_afinfo;
- else
- afinfo = PDE_DATA(file_inode(seq->file));
++st->num;
++st->offset;
sk = sk_nulls_next(sk);
sk_nulls_for_each_from(sk, node) {
- if ((afinfo->family == AF_UNSPEC ||
- sk->sk_family == afinfo->family) &&
- net_eq(sock_net(sk), net))
+ if (seq_sk_match(seq, sk))
return sk;
}
static void *tcp_seek_last_pos(struct seq_file *seq)
{
struct tcp_iter_state *st = seq->private;
+ int bucket = st->bucket;
int offset = st->offset;
int orig_num = st->num;
void *rc = NULL;
switch (st->state) {
case TCP_SEQ_STATE_LISTENING:
- if (st->bucket >= INET_LHTABLE_SIZE)
+ if (st->bucket > tcp_hashinfo.lhash2_mask)
break;
st->state = TCP_SEQ_STATE_LISTENING;
- rc = listening_get_next(seq, NULL);
- while (offset-- && rc)
+ rc = listening_get_first(seq);
+ while (offset-- && rc && bucket == st->bucket)
rc = listening_get_next(seq, rc);
if (rc)
break;
if (st->bucket > tcp_hashinfo.ehash_mask)
break;
rc = established_get_first(seq);
- while (offset-- && rc)
+ while (offset-- && rc && bucket == st->bucket)
rc = established_get_next(seq, rc);
}
switch (st->state) {
case TCP_SEQ_STATE_LISTENING:
if (v != SEQ_START_TOKEN)
- spin_unlock(&tcp_hashinfo.listening_hash[st->bucket].lock);
+ spin_unlock(&tcp_hashinfo.lhash2[st->bucket].lock);
break;
case TCP_SEQ_STATE_ESTABLISHED:
if (v)
}
#ifdef CONFIG_BPF_SYSCALL
+struct bpf_tcp_iter_state {
+ struct tcp_iter_state state;
+ unsigned int cur_sk;
+ unsigned int end_sk;
+ unsigned int max_sk;
+ struct sock **batch;
+ bool st_bucket_done;
+};
+
struct bpf_iter__tcp {
__bpf_md_ptr(struct bpf_iter_meta *, meta);
__bpf_md_ptr(struct sock_common *, sk_common);
return bpf_iter_run_prog(prog, &ctx);
}
+static void bpf_iter_tcp_put_batch(struct bpf_tcp_iter_state *iter)
+{
+ while (iter->cur_sk < iter->end_sk)
+ sock_put(iter->batch[iter->cur_sk++]);
+}
+
+static int bpf_iter_tcp_realloc_batch(struct bpf_tcp_iter_state *iter,
+ unsigned int new_batch_sz)
+{
+ struct sock **new_batch;
+
+ new_batch = kvmalloc(sizeof(*new_batch) * new_batch_sz,
+ GFP_USER | __GFP_NOWARN);
+ if (!new_batch)
+ return -ENOMEM;
+
+ bpf_iter_tcp_put_batch(iter);
+ kvfree(iter->batch);
+ iter->batch = new_batch;
+ iter->max_sk = new_batch_sz;
+
+ return 0;
+}
+
+static unsigned int bpf_iter_tcp_listening_batch(struct seq_file *seq,
+ struct sock *start_sk)
+{
+ struct bpf_tcp_iter_state *iter = seq->private;
+ struct tcp_iter_state *st = &iter->state;
+ struct inet_connection_sock *icsk;
+ unsigned int expected = 1;
+ struct sock *sk;
+
+ sock_hold(start_sk);
+ iter->batch[iter->end_sk++] = start_sk;
+
+ icsk = inet_csk(start_sk);
+ inet_lhash2_for_each_icsk_continue(icsk) {
+ sk = (struct sock *)icsk;
+ if (seq_sk_match(seq, sk)) {
+ if (iter->end_sk < iter->max_sk) {
+ sock_hold(sk);
+ iter->batch[iter->end_sk++] = sk;
+ }
+ expected++;
+ }
+ }
+ spin_unlock(&tcp_hashinfo.lhash2[st->bucket].lock);
+
+ return expected;
+}
+
+static unsigned int bpf_iter_tcp_established_batch(struct seq_file *seq,
+ struct sock *start_sk)
+{
+ struct bpf_tcp_iter_state *iter = seq->private;
+ struct tcp_iter_state *st = &iter->state;
+ struct hlist_nulls_node *node;
+ unsigned int expected = 1;
+ struct sock *sk;
+
+ sock_hold(start_sk);
+ iter->batch[iter->end_sk++] = start_sk;
+
+ sk = sk_nulls_next(start_sk);
+ sk_nulls_for_each_from(sk, node) {
+ if (seq_sk_match(seq, sk)) {
+ if (iter->end_sk < iter->max_sk) {
+ sock_hold(sk);
+ iter->batch[iter->end_sk++] = sk;
+ }
+ expected++;
+ }
+ }
+ spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
+
+ return expected;
+}
+
+static struct sock *bpf_iter_tcp_batch(struct seq_file *seq)
+{
+ struct bpf_tcp_iter_state *iter = seq->private;
+ struct tcp_iter_state *st = &iter->state;
+ unsigned int expected;
+ bool resized = false;
+ struct sock *sk;
+
+ /* The st->bucket is done. Directly advance to the next
+ * bucket instead of having the tcp_seek_last_pos() to skip
+ * one by one in the current bucket and eventually find out
+ * it has to advance to the next bucket.
+ */
+ if (iter->st_bucket_done) {
+ st->offset = 0;
+ st->bucket++;
+ if (st->state == TCP_SEQ_STATE_LISTENING &&
+ st->bucket > tcp_hashinfo.lhash2_mask) {
+ st->state = TCP_SEQ_STATE_ESTABLISHED;
+ st->bucket = 0;
+ }
+ }
+
+again:
+ /* Get a new batch */
+ iter->cur_sk = 0;
+ iter->end_sk = 0;
+ iter->st_bucket_done = false;
+
+ sk = tcp_seek_last_pos(seq);
+ if (!sk)
+ return NULL; /* Done */
+
+ if (st->state == TCP_SEQ_STATE_LISTENING)
+ expected = bpf_iter_tcp_listening_batch(seq, sk);
+ else
+ expected = bpf_iter_tcp_established_batch(seq, sk);
+
+ if (iter->end_sk == expected) {
+ iter->st_bucket_done = true;
+ return sk;
+ }
+
+ if (!resized && !bpf_iter_tcp_realloc_batch(iter, expected * 3 / 2)) {
+ resized = true;
+ goto again;
+ }
+
+ return sk;
+}
+
+static void *bpf_iter_tcp_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ /* bpf iter does not support lseek, so it always
+ * continue from where it was stop()-ped.
+ */
+ if (*pos)
+ return bpf_iter_tcp_batch(seq);
+
+ return SEQ_START_TOKEN;
+}
+
+static void *bpf_iter_tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ struct bpf_tcp_iter_state *iter = seq->private;
+ struct tcp_iter_state *st = &iter->state;
+ struct sock *sk;
+
+ /* Whenever seq_next() is called, the iter->cur_sk is
+ * done with seq_show(), so advance to the next sk in
+ * the batch.
+ */
+ if (iter->cur_sk < iter->end_sk) {
+ /* Keeping st->num consistent in tcp_iter_state.
+ * bpf_iter_tcp does not use st->num.
+ * meta.seq_num is used instead.
+ */
+ st->num++;
+ /* Move st->offset to the next sk in the bucket such that
+ * the future start() will resume at st->offset in
+ * st->bucket. See tcp_seek_last_pos().
+ */
+ st->offset++;
+ sock_put(iter->batch[iter->cur_sk++]);
+ }
+
+ if (iter->cur_sk < iter->end_sk)
+ sk = iter->batch[iter->cur_sk];
+ else
+ sk = bpf_iter_tcp_batch(seq);
+
+ ++*pos;
+ /* Keeping st->last_pos consistent in tcp_iter_state.
+ * bpf iter does not do lseek, so st->last_pos always equals to *pos.
+ */
+ st->last_pos = *pos;
+ return sk;
+}
+
static int bpf_iter_tcp_seq_show(struct seq_file *seq, void *v)
{
struct bpf_iter_meta meta;
struct bpf_prog *prog;
struct sock *sk = v;
+ bool slow;
uid_t uid;
+ int ret;
if (v == SEQ_START_TOKEN)
return 0;
+ if (sk_fullsock(sk))
+ slow = lock_sock_fast(sk);
+
+ if (unlikely(sk_unhashed(sk))) {
+ ret = SEQ_SKIP;
+ goto unlock;
+ }
+
if (sk->sk_state == TCP_TIME_WAIT) {
uid = 0;
} else if (sk->sk_state == TCP_NEW_SYN_RECV) {
meta.seq = seq;
prog = bpf_iter_get_info(&meta, false);
- return tcp_prog_seq_show(prog, &meta, v, uid);
+ ret = tcp_prog_seq_show(prog, &meta, v, uid);
+
+unlock:
+ if (sk_fullsock(sk))
+ unlock_sock_fast(sk, slow);
+ return ret;
+
}
static void bpf_iter_tcp_seq_stop(struct seq_file *seq, void *v)
{
+ struct bpf_tcp_iter_state *iter = seq->private;
struct bpf_iter_meta meta;
struct bpf_prog *prog;
(void)tcp_prog_seq_show(prog, &meta, v, 0);
}
- tcp_seq_stop(seq, v);
+ if (iter->cur_sk < iter->end_sk) {
+ bpf_iter_tcp_put_batch(iter);
+ iter->st_bucket_done = false;
+ }
}
static const struct seq_operations bpf_iter_tcp_seq_ops = {
.show = bpf_iter_tcp_seq_show,
- .start = tcp_seq_start,
- .next = tcp_seq_next,
+ .start = bpf_iter_tcp_seq_start,
+ .next = bpf_iter_tcp_seq_next,
.stop = bpf_iter_tcp_seq_stop,
};
+#endif
+static unsigned short seq_file_family(const struct seq_file *seq)
+{
+ const struct tcp_seq_afinfo *afinfo;
+
+#ifdef CONFIG_BPF_SYSCALL
+ /* Iterated from bpf_iter. Let the bpf prog to filter instead. */
+ if (seq->op == &bpf_iter_tcp_seq_ops)
+ return AF_UNSPEC;
#endif
+ /* Iterated from proc fs */
+ afinfo = PDE_DATA(file_inode(seq->file));
+ return afinfo->family;
+}
+
static const struct seq_operations tcp4_seq_ops = {
.show = tcp4_seq_show,
.start = tcp_seq_start,
DEFINE_BPF_ITER_FUNC(tcp, struct bpf_iter_meta *meta,
struct sock_common *sk_common, uid_t uid)
+#define INIT_BATCH_SZ 16
+
static int bpf_iter_init_tcp(void *priv_data, struct bpf_iter_aux_info *aux)
{
- struct tcp_iter_state *st = priv_data;
- struct tcp_seq_afinfo *afinfo;
- int ret;
+ struct bpf_tcp_iter_state *iter = priv_data;
+ int err;
- afinfo = kmalloc(sizeof(*afinfo), GFP_USER | __GFP_NOWARN);
- if (!afinfo)
- return -ENOMEM;
+ err = bpf_iter_init_seq_net(priv_data, aux);
+ if (err)
+ return err;
- afinfo->family = AF_UNSPEC;
- st->bpf_seq_afinfo = afinfo;
- ret = bpf_iter_init_seq_net(priv_data, aux);
- if (ret)
- kfree(afinfo);
- return ret;
+ err = bpf_iter_tcp_realloc_batch(iter, INIT_BATCH_SZ);
+ if (err) {
+ bpf_iter_fini_seq_net(priv_data);
+ return err;
+ }
+
+ return 0;
}
static void bpf_iter_fini_tcp(void *priv_data)
{
- struct tcp_iter_state *st = priv_data;
+ struct bpf_tcp_iter_state *iter = priv_data;
- kfree(st->bpf_seq_afinfo);
bpf_iter_fini_seq_net(priv_data);
+ kvfree(iter->batch);
}
static const struct bpf_iter_seq_info tcp_seq_info = {
.seq_ops = &bpf_iter_tcp_seq_ops,
.init_seq_private = bpf_iter_init_tcp,
.fini_seq_private = bpf_iter_fini_tcp,
- .seq_priv_size = sizeof(struct tcp_iter_state),
+ .seq_priv_size = sizeof(struct bpf_tcp_iter_state),
};
+static const struct bpf_func_proto *
+bpf_iter_tcp_get_func_proto(enum bpf_func_id func_id,
+ const struct bpf_prog *prog)
+{
+ switch (func_id) {
+ case BPF_FUNC_setsockopt:
+ return &bpf_sk_setsockopt_proto;
+ case BPF_FUNC_getsockopt:
+ return &bpf_sk_getsockopt_proto;
+ default:
+ return NULL;
+ }
+}
+
static struct bpf_iter_reg tcp_reg_info = {
.target = "tcp",
.ctx_arg_info_size = 1,
{ offsetof(struct bpf_iter__tcp, sk_common),
PTR_TO_BTF_ID_OR_NULL },
},
+ .get_func_proto = bpf_iter_tcp_get_func_proto,
.seq_info = &tcp_seq_info,
};
if (th->cwr)
skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
+ if (skb->encapsulation)
+ skb->inner_transport_header = skb->transport_header;
+
return 0;
}
EXPORT_SYMBOL(tcp_gro_complete);
skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_L4;
+
+ if (skb->encapsulation)
+ skb->inner_transport_header = skb->transport_header;
+
return 0;
}
errout:
if (in6_dev)
in6_dev_put(in6_dev);
- if (dev)
- dev_put(dev);
+ dev_put(dev);
return err;
}
errout_ifa:
in6_ifa_put(ifa);
errout:
- if (dev)
- dev_put(dev);
+ dev_put(dev);
if (fillargs.netnsid >= 0)
put_net(tgt_net);
}
static int inet6_validate_link_af(const struct net_device *dev,
- const struct nlattr *nla)
+ const struct nlattr *nla,
+ struct netlink_ext_ack *extack)
{
struct nlattr *tb[IFLA_INET6_MAX + 1];
struct inet6_dev *idev = NULL;
}
err = nla_parse_nested_deprecated(tb, IFLA_INET6_MAX, nla,
- inet6_af_policy, NULL);
+ inet6_af_policy, extack);
if (err)
return err;
#include <linux/uaccess.h>
-/*
- * Parsing tlv encoded headers.
- *
- * Parsing function "func" returns true, if parsing succeed
- * and false, if it failed.
- * It MUST NOT touch skb->h.
- */
-
-struct tlvtype_proc {
- int type;
- bool (*func)(struct sk_buff *skb, int offset);
-};
-
/*********************
Generic functions
*********************/
return false;
}
+static bool ipv6_hop_ra(struct sk_buff *skb, int optoff);
+static bool ipv6_hop_ioam(struct sk_buff *skb, int optoff);
+static bool ipv6_hop_jumbo(struct sk_buff *skb, int optoff);
+static bool ipv6_hop_calipso(struct sk_buff *skb, int optoff);
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
+static bool ipv6_dest_hao(struct sk_buff *skb, int optoff);
+#endif
+
/* Parse tlv encoded option header (hop-by-hop or destination) */
-static bool ip6_parse_tlv(const struct tlvtype_proc *procs,
+static bool ip6_parse_tlv(bool hopbyhop,
struct sk_buff *skb,
int max_count)
{
int len = (skb_transport_header(skb)[1] + 1) << 3;
const unsigned char *nh = skb_network_header(skb);
int off = skb_network_header_len(skb);
- const struct tlvtype_proc *curr;
bool disallow_unknowns = false;
int tlv_count = 0;
int padlen = 0;
if (tlv_count > max_count)
goto bad;
- for (curr = procs; curr->type >= 0; curr++) {
- if (curr->type == nh[off]) {
- /* type specific length/alignment
- checks will be performed in the
- func(). */
- if (curr->func(skb, off) == false)
+ if (hopbyhop) {
+ switch (nh[off]) {
+ case IPV6_TLV_ROUTERALERT:
+ if (!ipv6_hop_ra(skb, off))
+ return false;
+ break;
+ case IPV6_TLV_IOAM:
+ if (!ipv6_hop_ioam(skb, off))
+ return false;
+ break;
+ case IPV6_TLV_JUMBO:
+ if (!ipv6_hop_jumbo(skb, off))
+ return false;
+ break;
+ case IPV6_TLV_CALIPSO:
+ if (!ipv6_hop_calipso(skb, off))
+ return false;
+ break;
+ default:
+ if (!ip6_tlvopt_unknown(skb, off,
+ disallow_unknowns))
+ return false;
+ break;
+ }
+ } else {
+ switch (nh[off]) {
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
+ case IPV6_TLV_HAO:
+ if (!ipv6_dest_hao(skb, off))
+ return false;
+ break;
+#endif
+ default:
+ if (!ip6_tlvopt_unknown(skb, off,
+ disallow_unknowns))
return false;
break;
}
}
- if (curr->type < 0 &&
- !ip6_tlvopt_unknown(skb, off, disallow_unknowns))
- return false;
-
padlen = 0;
}
off += optlen;
}
#endif
-static const struct tlvtype_proc tlvprocdestopt_lst[] = {
-#if IS_ENABLED(CONFIG_IPV6_MIP6)
- {
- .type = IPV6_TLV_HAO,
- .func = ipv6_dest_hao,
- },
-#endif
- {-1, NULL}
-};
-
static int ipv6_destopt_rcv(struct sk_buff *skb)
{
struct inet6_dev *idev = __in6_dev_get(skb->dev);
dstbuf = opt->dst1;
#endif
- if (ip6_parse_tlv(tlvprocdestopt_lst, skb,
- net->ipv6.sysctl.max_dst_opts_cnt)) {
+ if (ip6_parse_tlv(false, skb, net->ipv6.sysctl.max_dst_opts_cnt)) {
skb->transport_header += extlen;
opt = IP6CB(skb);
#if IS_ENABLED(CONFIG_IPV6_MIP6)
return false;
}
-static const struct tlvtype_proc tlvprochopopt_lst[] = {
- {
- .type = IPV6_TLV_ROUTERALERT,
- .func = ipv6_hop_ra,
- },
- {
- .type = IPV6_TLV_IOAM,
- .func = ipv6_hop_ioam,
- },
- {
- .type = IPV6_TLV_JUMBO,
- .func = ipv6_hop_jumbo,
- },
- {
- .type = IPV6_TLV_CALIPSO,
- .func = ipv6_hop_calipso,
- },
- { -1, }
-};
-
int ipv6_parse_hopopts(struct sk_buff *skb)
{
struct inet6_skb_parm *opt = IP6CB(skb);
goto fail_and_free;
opt->flags |= IP6SKB_HOPBYHOP;
- if (ip6_parse_tlv(tlvprochopopt_lst, skb,
- net->ipv6.sysctl.max_hbh_opts_cnt)) {
+ if (ip6_parse_tlv(true, skb, net->ipv6.sysctl.max_hbh_opts_cnt)) {
skb->transport_header += extlen;
opt = IP6CB(skb);
opt->nhoff = sizeof(struct ipv6hdr);
{
struct dst_entry *dst = skb_dst(skb);
struct net_device *dev = dst->dev;
+ struct inet6_dev *idev = ip6_dst_idev(dst);
unsigned int hh_len = LL_RESERVED_SPACE(dev);
- int delta = hh_len - skb_headroom(skb);
- const struct in6_addr *nexthop;
+ const struct in6_addr *daddr, *nexthop;
+ struct ipv6hdr *hdr;
struct neighbour *neigh;
int ret;
/* Be paranoid, rather than too clever. */
- if (unlikely(delta > 0) && dev->header_ops) {
- /* pskb_expand_head() might crash, if skb is shared */
- if (skb_shared(skb)) {
- struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
-
- if (likely(nskb)) {
- if (skb->sk)
- skb_set_owner_w(nskb, skb->sk);
- consume_skb(skb);
- } else {
- kfree_skb(skb);
- }
- skb = nskb;
- }
- if (skb &&
- pskb_expand_head(skb, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) {
- kfree_skb(skb);
- skb = NULL;
- }
+ if (unlikely(hh_len > skb_headroom(skb)) && dev->header_ops) {
+ skb = skb_expand_head(skb, hh_len);
if (!skb) {
- IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
+ IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
return -ENOMEM;
}
}
- if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
- struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
-
+ hdr = ipv6_hdr(skb);
+ daddr = &hdr->daddr;
+ if (ipv6_addr_is_multicast(daddr)) {
if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
((mroute6_is_socket(net, skb) &&
!(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
- ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
- &ipv6_hdr(skb)->saddr))) {
+ ipv6_chk_mcast_addr(dev, daddr, &hdr->saddr))) {
struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
/* Do not check for IFF_ALLMULTI; multicast routing
net, sk, newskb, NULL, newskb->dev,
dev_loopback_xmit);
- if (ipv6_hdr(skb)->hop_limit == 0) {
+ if (hdr->hop_limit == 0) {
IP6_INC_STATS(net, idev,
IPSTATS_MIB_OUTDISCARDS);
kfree_skb(skb);
}
IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
-
- if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
- IPV6_ADDR_SCOPE_NODELOCAL &&
+ if (IPV6_ADDR_MC_SCOPE(daddr) <= IPV6_ADDR_SCOPE_NODELOCAL &&
!(dev->flags & IFF_LOOPBACK)) {
kfree_skb(skb);
return 0;
}
rcu_read_lock_bh();
- nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
- neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
+ nexthop = rt6_nexthop((struct rt6_info *)dst, daddr);
+ neigh = __ipv6_neigh_lookup_noref(dev, nexthop);
if (unlikely(!neigh))
- neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
+ neigh = __neigh_create(&nd_tbl, nexthop, dev, false);
if (!IS_ERR(neigh)) {
sock_confirm_neigh(skb, neigh);
ret = neigh_output(neigh, skb, false);
}
rcu_read_unlock_bh();
- IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
+ IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTNOROUTES);
kfree_skb(skb);
return -EINVAL;
}
const struct ipv6_pinfo *np = inet6_sk(sk);
struct in6_addr *first_hop = &fl6->daddr;
struct dst_entry *dst = skb_dst(skb);
+ struct net_device *dev = dst->dev;
+ struct inet6_dev *idev = ip6_dst_idev(dst);
unsigned int head_room;
struct ipv6hdr *hdr;
u8 proto = fl6->flowi6_proto;
int hlimit = -1;
u32 mtu;
- head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
+ head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dev);
if (opt)
head_room += opt->opt_nflen + opt->opt_flen;
- if (unlikely(skb_headroom(skb) < head_room)) {
- struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
- if (!skb2) {
- IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_OUTDISCARDS);
- kfree_skb(skb);
+ if (unlikely(head_room > skb_headroom(skb))) {
+ skb = skb_expand_head(skb, head_room);
+ if (!skb) {
+ IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
return -ENOBUFS;
}
- if (skb->sk)
- skb_set_owner_w(skb2, skb->sk);
- consume_skb(skb);
- skb = skb2;
}
if (opt) {
mtu = dst_mtu(dst);
if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
- IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_OUT, skb->len);
+ IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
/* if egress device is enslaved to an L3 master device pass the
* skb to its handler for processing
* we promote our socket to non const
*/
return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
- net, (struct sock *)sk, skb, NULL, dst->dev,
+ net, (struct sock *)sk, skb, NULL, dev,
dst_output);
}
- skb->dev = dst->dev;
+ skb->dev = dev;
/* ipv6_local_error() does not require socket lock,
* we promote our socket to non const
*/
ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
- IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
+ IP6_INC_STATS(net, idev, IPSTATS_MIB_FRAGFAILS);
kfree_skb(skb);
return -EMSGSIZE;
}
if (net->ipv6.devconf_all->proxy_ndp &&
pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
int proxied = ip6_forward_proxy_check(skb);
- if (proxied > 0)
+ if (proxied > 0) {
+ hdr->hop_limit--;
return ip6_input(skb);
- else if (proxied < 0) {
+ } else if (proxied < 0) {
__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
goto drop;
}
read_lock(&mrt_lock);
if (reg_vif_num >= 0)
reg_dev = mrt->vif_table[reg_vif_num].dev;
- if (reg_dev)
- dev_hold(reg_dev);
+ dev_hold(reg_dev);
read_unlock(&mrt_lock);
if (!reg_dev)
if (GROUP_FILTER_SIZE(gsf->gf_numsrc) > optlen)
goto out_free_gsf;
- ret = ip6_mc_msfilter(sk, gsf, gsf->gf_slist);
+ ret = ip6_mc_msfilter(sk, gsf, gsf->gf_slist_flex);
out_free_gsf:
kfree(gsf);
return ret;
static int compat_ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
int optlen)
{
- const int size0 = offsetof(struct compat_group_filter, gf_slist);
+ const int size0 = offsetof(struct compat_group_filter, gf_slist_flex);
struct compat_group_filter *gf32;
void *p;
int ret;
if (!p)
return -ENOMEM;
- gf32 = p + 4; /* we want ->gf_group and ->gf_slist aligned */
+ gf32 = p + 4; /* we want ->gf_group and ->gf_slist_flex aligned */
ret = -EFAULT;
if (copy_from_sockptr(gf32, optval, optlen))
goto out_free_p;
goto out_free_p;
ret = -EINVAL;
- if (offsetof(struct compat_group_filter, gf_slist[n]) > optlen)
+ if (offsetof(struct compat_group_filter, gf_slist_flex[n]) > optlen)
goto out_free_p;
ret = ip6_mc_msfilter(sk, &(struct group_filter){
.gf_interface = gf32->gf_interface,
.gf_group = gf32->gf_group,
.gf_fmode = gf32->gf_fmode,
- .gf_numsrc = gf32->gf_numsrc}, gf32->gf_slist);
+ .gf_numsrc = gf32->gf_numsrc}, gf32->gf_slist_flex);
out_free_p:
kfree(p);
static int ipv6_get_msfilter(struct sock *sk, void __user *optval,
int __user *optlen, int len)
{
- const int size0 = offsetof(struct group_filter, gf_slist);
+ const int size0 = offsetof(struct group_filter, gf_slist_flex);
struct group_filter __user *p = optval;
struct group_filter gsf;
int num;
return -EADDRNOTAVAIL;
num = gsf.gf_numsrc;
lock_sock(sk);
- err = ip6_mc_msfget(sk, &gsf, p->gf_slist);
+ err = ip6_mc_msfget(sk, &gsf, p->gf_slist_flex);
if (!err) {
if (num > gsf.gf_numsrc)
num = gsf.gf_numsrc;
static int compat_ipv6_get_msfilter(struct sock *sk, void __user *optval,
int __user *optlen)
{
- const int size0 = offsetof(struct compat_group_filter, gf_slist);
+ const int size0 = offsetof(struct compat_group_filter, gf_slist_flex);
struct compat_group_filter __user *p = optval;
struct compat_group_filter gf32;
struct group_filter gf;
return -EADDRNOTAVAIL;
lock_sock(sk);
- err = ip6_mc_msfget(sk, &gf, p->gf_slist);
+ err = ip6_mc_msfget(sk, &gf, p->gf_slist_flex);
release_sock(sk);
if (err)
return err;
if (psl)
count += psl->sl_max;
- newpsl = sock_kmalloc(sk, IP6_SFLSIZE(count), GFP_KERNEL);
+ newpsl = sock_kmalloc(sk, struct_size(newpsl, sl_addr, count),
+ GFP_KERNEL);
if (!newpsl) {
err = -ENOBUFS;
goto done;
if (psl) {
for (i = 0; i < psl->sl_count; i++)
newpsl->sl_addr[i] = psl->sl_addr[i];
- atomic_sub(IP6_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc);
+ atomic_sub(struct_size(psl, sl_addr, psl->sl_max),
+ &sk->sk_omem_alloc);
kfree_rcu(psl, rcu);
}
psl = newpsl;
goto done;
}
if (gsf->gf_numsrc) {
- newpsl = sock_kmalloc(sk, IP6_SFLSIZE(gsf->gf_numsrc),
- GFP_KERNEL);
+ newpsl = sock_kmalloc(sk, struct_size(newpsl, sl_addr,
+ gsf->gf_numsrc),
+ GFP_KERNEL);
if (!newpsl) {
err = -ENOBUFS;
goto done;
newpsl->sl_count, newpsl->sl_addr, 0);
if (err) {
mutex_unlock(&idev->mc_lock);
- sock_kfree_s(sk, newpsl, IP6_SFLSIZE(newpsl->sl_max));
+ sock_kfree_s(sk, newpsl, struct_size(newpsl, sl_addr,
+ newpsl->sl_max));
goto done;
}
mutex_unlock(&idev->mc_lock);
if (psl) {
ip6_mc_del_src(idev, group, pmc->sfmode,
psl->sl_count, psl->sl_addr, 0);
- atomic_sub(IP6_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc);
+ atomic_sub(struct_size(psl, sl_addr, psl->sl_max),
+ &sk->sk_omem_alloc);
kfree_rcu(psl, rcu);
} else {
ip6_mc_del_src(idev, group, pmc->sfmode, 0, NULL, 0);
err = ip6_mc_del_src(idev, &iml->addr, iml->sfmode,
psl->sl_count, psl->sl_addr, 0);
RCU_INIT_POINTER(iml->sflist, NULL);
- atomic_sub(IP6_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc);
+ atomic_sub(struct_size(psl, sl_addr, psl->sl_max),
+ &sk->sk_omem_alloc);
kfree_rcu(psl, rcu);
}
if (err) {
lwtstate_put(fib6_nh->fib_nh_lws);
fib6_nh->fib_nh_lws = NULL;
- if (dev)
- dev_put(dev);
+ dev_put(dev);
}
return err;
if (err == 0) {
atomic_dec(&iucv->skbs_in_xmit);
skb_unlink(skb, &iucv->send_skb_q);
- kfree_skb(skb);
+ consume_skb(skb);
}
/* this error should never happen since the */
}
}
- kfree_skb(skb);
+ consume_skb(skb);
if (iucv->transport == AF_IUCV_TRANS_HIPER) {
atomic_inc(&iucv->msg_recv);
if (atomic_read(&iucv->msg_recv) > iucv->msglimit) {
spin_unlock_irqrestore(&list->lock, flags);
if (this) {
- kfree_skb(this);
+ consume_skb(this);
/* wake up any process waiting for sending */
iucv_sock_wake_msglim(sk);
}
{
struct iucv_sock *iucv = iucv_sk(sk);
- if (!iucv)
- goto out;
- if (sk->sk_state != IUCV_BOUND)
- goto out;
+ if (!iucv || sk->sk_state != IUCV_BOUND) {
+ kfree_skb(skb);
+ return NET_RX_SUCCESS;
+ }
+
bh_lock_sock(sk);
iucv->msglimit_peer = iucv_trans_hdr(skb)->window;
sk->sk_state = IUCV_CONNECTED;
sk->sk_state_change(sk);
bh_unlock_sock(sk);
-out:
- kfree_skb(skb);
+ consume_skb(skb);
return NET_RX_SUCCESS;
}
{
struct iucv_sock *iucv = iucv_sk(sk);
- if (!iucv)
- goto out;
- if (sk->sk_state != IUCV_BOUND)
- goto out;
+ if (!iucv || sk->sk_state != IUCV_BOUND) {
+ kfree_skb(skb);
+ return NET_RX_SUCCESS;
+ }
+
bh_lock_sock(sk);
sk->sk_state = IUCV_DISCONN;
sk->sk_state_change(sk);
bh_unlock_sock(sk);
-out:
- kfree_skb(skb);
+ consume_skb(skb);
return NET_RX_SUCCESS;
}
struct iucv_sock *iucv = iucv_sk(sk);
/* other end of connection closed */
- if (!iucv)
- goto out;
+ if (!iucv) {
+ kfree_skb(skb);
+ return NET_RX_SUCCESS;
+ }
+
bh_lock_sock(sk);
if (sk->sk_state == IUCV_CONNECTED) {
sk->sk_state = IUCV_DISCONN;
sk->sk_state_change(sk);
}
bh_unlock_sock(sk);
-out:
- kfree_skb(skb);
+ consume_skb(skb);
return NET_RX_SUCCESS;
}
case (AF_IUCV_FLAG_WIN):
err = afiucv_hs_callback_win(sk, skb);
if (skb->len == sizeof(struct af_iucv_trans_hdr)) {
- kfree_skb(skb);
+ consume_skb(skb);
break;
}
fallthrough; /* and receive non-zero length data */
.func = afiucv_hs_rcv,
};
-static int afiucv_iucv_init(void)
-{
- return pr_iucv->iucv_register(&af_iucv_handler, 0);
-}
-
-static void afiucv_iucv_exit(void)
-{
- pr_iucv->iucv_unregister(&af_iucv_handler, 0);
-}
-
static int __init afiucv_init(void)
{
int err;
- if (MACHINE_IS_VM) {
+ if (MACHINE_IS_VM && IS_ENABLED(CONFIG_IUCV)) {
cpcmd("QUERY USERID", iucv_userid, sizeof(iucv_userid), &err);
if (unlikely(err)) {
WARN_ON(err);
goto out;
}
- pr_iucv = try_then_request_module(symbol_get(iucv_if), "iucv");
- if (!pr_iucv) {
- printk(KERN_WARNING "iucv_if lookup failed\n");
- memset(&iucv_userid, 0, sizeof(iucv_userid));
- }
+ pr_iucv = &iucv_if;
} else {
memset(&iucv_userid, 0, sizeof(iucv_userid));
pr_iucv = NULL;
goto out_proto;
if (pr_iucv) {
- err = afiucv_iucv_init();
+ err = pr_iucv->iucv_register(&af_iucv_handler, 0);
if (err)
goto out_sock;
}
out_notifier:
if (pr_iucv)
- afiucv_iucv_exit();
+ pr_iucv->iucv_unregister(&af_iucv_handler, 0);
out_sock:
sock_unregister(PF_IUCV);
out_proto:
proto_unregister(&iucv_proto);
out:
- if (pr_iucv)
- symbol_put(iucv_if);
return err;
}
static void __exit afiucv_exit(void)
{
- if (pr_iucv) {
- afiucv_iucv_exit();
- symbol_put(iucv_if);
- }
+ if (pr_iucv)
+ pr_iucv->iucv_unregister(&af_iucv_handler, 0);
unregister_netdevice_notifier(&afiucv_netdev_notifier);
dev_remove_pack(&iucv_packet_type);
*/
static inline int __iucv_call_b2f0(int command, union iucv_param *parm)
{
- register unsigned long reg0 asm ("0");
- register unsigned long reg1 asm ("1");
- int ccode;
+ int cc;
- reg0 = command;
- reg1 = (unsigned long)parm;
asm volatile(
- " .long 0xb2f01000\n"
- " ipm %0\n"
- " srl %0,28\n"
- : "=d" (ccode), "=m" (*parm), "+d" (reg0), "+a" (reg1)
- : "m" (*parm) : "cc");
- return ccode;
+ " lgr 0,%[reg0]\n"
+ " lgr 1,%[reg1]\n"
+ " .long 0xb2f01000\n"
+ " ipm %[cc]\n"
+ " srl %[cc],28\n"
+ : [cc] "=&d" (cc), "+m" (*parm)
+ : [reg0] "d" ((unsigned long)command),
+ [reg1] "d" ((unsigned long)parm)
+ : "cc", "0", "1");
+ return cc;
}
static inline int iucv_call_b2f0(int command, union iucv_param *parm)
*/
static int __iucv_query_maxconn(void *param, unsigned long *max_pathid)
{
- register unsigned long reg0 asm ("0");
- register unsigned long reg1 asm ("1");
- int ccode;
+ unsigned long reg1 = (unsigned long)param;
+ int cc;
- reg0 = IUCV_QUERY;
- reg1 = (unsigned long) param;
asm volatile (
+ " lghi 0,%[cmd]\n"
+ " lgr 1,%[reg1]\n"
" .long 0xb2f01000\n"
- " ipm %0\n"
- " srl %0,28\n"
- : "=d" (ccode), "+d" (reg0), "+d" (reg1) : : "cc");
+ " ipm %[cc]\n"
+ " srl %[cc],28\n"
+ " lgr %[reg1],1\n"
+ : [cc] "=&d" (cc), [reg1] "+&d" (reg1)
+ : [cmd] "K" (IUCV_QUERY)
+ : "cc", "0", "1");
*max_pathid = reg1;
- return ccode;
+ return cc;
}
static int iucv_query_maxconn(void)
{
int cpu;
- get_online_cpus();
+ cpus_read_lock();
for_each_online_cpu(cpu)
/* Enable all cpus with a declared buffer. */
if (cpumask_test_cpu(cpu, &iucv_buffer_cpumask) &&
!cpumask_test_cpu(cpu, &iucv_irq_cpumask))
smp_call_function_single(cpu, iucv_allow_cpu,
NULL, 1);
- put_online_cpus();
+ cpus_read_unlock();
}
/**
size_t alloc_size;
int cpu, rc;
- get_online_cpus();
+ cpus_read_lock();
rc = -ENOMEM;
alloc_size = iucv_max_pathid * sizeof(struct iucv_path);
iucv_path_table = kzalloc(alloc_size, GFP_KERNEL);
if (cpumask_empty(&iucv_buffer_cpumask))
/* No cpu could declare an iucv buffer. */
goto out;
- put_online_cpus();
+ cpus_read_unlock();
return 0;
out:
kfree(iucv_path_table);
iucv_path_table = NULL;
- put_online_cpus();
+ cpus_read_unlock();
return rc;
}
*/
static void iucv_disable(void)
{
- get_online_cpus();
+ cpus_read_lock();
on_each_cpu(iucv_retrieve_cpu, NULL, 1);
kfree(iucv_path_table);
iucv_path_table = NULL;
- put_online_cpus();
+ cpus_read_unlock();
}
static int iucv_cpu_dead(unsigned int cpu)
if (cpumask_empty(&iucv_irq_cpumask))
return NOTIFY_DONE;
- get_online_cpus();
+ cpus_read_lock();
on_each_cpu_mask(&iucv_irq_cpumask, iucv_block_cpu, NULL, 1);
preempt_disable();
for (i = 0; i < iucv_max_pathid; i++) {
iucv_sever_pathid(i, NULL);
}
preempt_enable();
- put_online_cpus();
+ cpus_read_unlock();
iucv_disable();
return NOTIFY_DONE;
}
{
u8 rc = LLC_PDU_LEN_U;
- if (addr->sllc_test || addr->sllc_xid)
+ if (addr->sllc_test)
rc = LLC_PDU_LEN_U;
+ else if (addr->sllc_xid)
+ /* We need to expand header to sizeof(struct llc_xid_info)
+ * since llc_pdu_init_as_xid_cmd() sets 4,5,6 bytes of LLC header
+ * as XID PDU. In llc_ui_sendmsg() we reserved header size and then
+ * filled all other space with user data. If we won't reserve this
+ * bytes, llc_pdu_init_as_xid_cmd() will overwrite user data
+ */
+ rc = LLC_PDU_LEN_U_XID;
else if (sk->sk_type == SOCK_STREAM)
rc = LLC_PDU_LEN_I;
return rc;
} else {
release_sock(sk);
}
- if (llc->dev)
- dev_put(llc->dev);
+ dev_put(llc->dev);
sock_put(sk);
llc_sk_free(sk);
out:
} else
llc->dev = dev_getbyhwaddr_rcu(&init_net, addr->sllc_arphrd,
addr->sllc_mac);
- if (llc->dev)
- dev_hold(llc->dev);
+ dev_hold(llc->dev);
rcu_read_unlock();
if (!llc->dev)
goto out;
struct llc_sap_state_ev *ev = llc_sap_ev(skb);
int rc;
- llc_pdu_header_init(skb, LLC_PDU_TYPE_U, ev->saddr.lsap,
+ llc_pdu_header_init(skb, LLC_PDU_TYPE_U_XID, ev->saddr.lsap,
ev->daddr.lsap, LLC_PDU_CMD);
llc_pdu_init_as_xid_cmd(skb, LLC_XID_NULL_CLASS_2, 0);
rc = llc_mac_hdr_init(skb, ev->saddr.mac, ev->daddr.mac);
struct vif_params *params)
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ struct ieee80211_local *local = sdata->local;
+ struct sta_info *sta;
int ret;
ret = ieee80211_if_change_type(sdata, type);
RCU_INIT_POINTER(sdata->u.vlan.sta, NULL);
ieee80211_check_fast_rx_iface(sdata);
} else if (type == NL80211_IFTYPE_STATION && params->use_4addr >= 0) {
+ struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
+
+ if (params->use_4addr == ifmgd->use_4addr)
+ return 0;
+
sdata->u.mgd.use_4addr = params->use_4addr;
+ if (!ifmgd->associated)
+ return 0;
+
+ mutex_lock(&local->sta_mtx);
+ sta = sta_info_get(sdata, ifmgd->bssid);
+ if (sta)
+ drv_sta_set_4addr(local, sdata, &sta->sta,
+ params->use_4addr);
+ mutex_unlock(&local->sta_mtx);
+
+ if (params->use_4addr)
+ ieee80211_send_4addr_nullfunc(local, sdata);
}
if (sdata->vif.type == NL80211_IFTYPE_MONITOR) {
void ieee80211_send_nullfunc(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata,
bool powersave);
+void ieee80211_send_4addr_nullfunc(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata);
void ieee80211_sta_tx_notify(struct ieee80211_sub_if_data *sdata,
struct ieee80211_hdr *hdr, bool ack, u16 tx_time);
ieee80211_tx_skb(sdata, skb);
}
-static void ieee80211_send_4addr_nullfunc(struct ieee80211_local *local,
- struct ieee80211_sub_if_data *sdata)
+void ieee80211_send_4addr_nullfunc(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata)
{
struct sk_buff *skb;
struct ieee80211_hdr *nullfunc;
* Need to make a copy and possibly remove radiotap header
* and FCS from the original.
*/
- skb = skb_copy_expand(*origskb, needed_headroom, 0, GFP_ATOMIC);
+ skb = skb_copy_expand(*origskb, needed_headroom + NET_SKB_PAD,
+ 0, GFP_ATOMIC);
if (!skb)
return NULL;
return queued;
}
+static void
+ieee80211_aggr_check(struct ieee80211_sub_if_data *sdata,
+ struct sta_info *sta,
+ struct sk_buff *skb)
+{
+ struct rate_control_ref *ref = sdata->local->rate_ctrl;
+ u16 tid;
+
+ if (!ref || !(ref->ops->capa & RATE_CTRL_CAPA_AMPDU_TRIGGER))
+ return;
+
+ if (!sta || !sta->sta.ht_cap.ht_supported ||
+ !sta->sta.wme || skb_get_queue_mapping(skb) == IEEE80211_AC_VO ||
+ skb->protocol == sdata->control_port_protocol)
+ return;
+
+ tid = skb->priority & IEEE80211_QOS_CTL_TID_MASK;
+ if (likely(sta->ampdu_mlme.tid_tx[tid]))
+ return;
+
+ ieee80211_start_tx_ba_session(&sta->sta, tid, 0);
+}
+
/*
* initialises @tx
* pass %NULL for the station if unknown, a valid pointer if known
struct ieee80211_local *local = sdata->local;
struct ieee80211_hdr *hdr;
struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+ bool aggr_check = false;
int tid;
memset(tx, 0, sizeof(*tx));
} else if (tx->sdata->control_port_protocol == tx->skb->protocol) {
tx->sta = sta_info_get_bss(sdata, hdr->addr1);
}
- if (!tx->sta && !is_multicast_ether_addr(hdr->addr1))
+ if (!tx->sta && !is_multicast_ether_addr(hdr->addr1)) {
tx->sta = sta_info_get(sdata, hdr->addr1);
+ aggr_check = true;
+ }
}
if (tx->sta && ieee80211_is_data_qos(hdr->frame_control) &&
struct tid_ampdu_tx *tid_tx;
tid = ieee80211_get_tid(hdr);
-
tid_tx = rcu_dereference(tx->sta->ampdu_mlme.tid_tx[tid]);
+ if (!tid_tx && aggr_check) {
+ ieee80211_aggr_check(sdata, tx->sta, skb);
+ tid_tx = rcu_dereference(tx->sta->ampdu_mlme.tid_tx[tid]);
+ }
+
if (tid_tx) {
bool queued;
}
EXPORT_SYMBOL(ieee80211_txq_schedule_start);
-static void
-ieee80211_aggr_check(struct ieee80211_sub_if_data *sdata,
- struct sta_info *sta,
- struct sk_buff *skb)
-{
- struct rate_control_ref *ref = sdata->local->rate_ctrl;
- u16 tid;
-
- if (!ref || !(ref->ops->capa & RATE_CTRL_CAPA_AMPDU_TRIGGER))
- return;
-
- if (!sta || !sta->sta.ht_cap.ht_supported ||
- !sta->sta.wme || skb_get_queue_mapping(skb) == IEEE80211_AC_VO ||
- skb->protocol == sdata->control_port_protocol)
- return;
-
- tid = skb->priority & IEEE80211_QOS_CTL_TID_MASK;
- if (likely(sta->ampdu_mlme.tid_tx[tid]))
- return;
-
- ieee80211_start_tx_ba_session(&sta->sta, tid, 0);
-}
-
void __ieee80211_subif_start_xmit(struct sk_buff *skb,
struct net_device *dev,
u32 info_flags,
/* TODO: expand mctp_skb_cb for header fields? */
struct mctp_hdr *hdr = mctp_hdr(skb);
- hdr = mctp_hdr(skb);
addr = msg->msg_name;
addr->smctp_family = AF_MCTP;
addr->smctp_network = cb->net;
struct mptcp_addr_info addr;
u8 flags;
int ifindex;
- struct rcu_head rcu;
struct socket *lsk;
};
return false;
tstamp = nf_conn_tstamp_find(ct);
- if (tstamp && tstamp->stop == 0)
+ if (tstamp) {
+ s32 timeout = ct->timeout - nfct_time_stamp;
+
tstamp->stop = ktime_get_real_ns();
+ if (timeout < 0)
+ tstamp->stop -= jiffies_to_nsecs(-timeout);
+ }
if (nf_conntrack_event_report(IPCT_DESTROY, ct,
portid, report) < 0) {
void flow_offload_refresh(struct nf_flowtable *flow_table,
struct flow_offload *flow)
{
- flow->timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow);
+ u32 timeout;
+
+ timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow);
+ if (READ_ONCE(flow->timeout) != timeout)
+ WRITE_ONCE(flow->timeout, timeout);
if (likely(!nf_flowtable_hw_offload(flow_table)))
return;
flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 8,
&val, &mask);
- if (dev)
- dev_put(dev);
+ dev_put(dev);
return 0;
}
struct nf_hook_state *state = &entry->state;
/* Release those devices we held, or Alexey will kill me. */
- if (state->in)
- dev_put(state->in);
- if (state->out)
- dev_put(state->out);
+ dev_put(state->in);
+ dev_put(state->out);
if (state->sk)
sock_put(state->sk);
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
- if (entry->physin)
- dev_put(entry->physin);
- if (entry->physout)
- dev_put(entry->physout);
+ dev_put(entry->physin);
+ dev_put(entry->physout);
#endif
}
{
struct nf_hook_state *state = &entry->state;
- if (state->in)
- dev_hold(state->in);
- if (state->out)
- dev_hold(state->out);
+ dev_hold(state->in);
+ dev_hold(state->out);
if (state->sk)
sock_hold(state->sk);
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
- if (entry->physin)
- dev_hold(entry->physin);
- if (entry->physout)
- dev_hold(entry->physout);
+ dev_hold(entry->physin);
+ dev_hold(entry->physout);
#endif
}
EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs);
return 0;
}
+static void nf_tables_commit_audit_free(struct list_head *adl)
+{
+ struct nft_audit_data *adp, *adn;
+
+ list_for_each_entry_safe(adp, adn, adl, list) {
+ list_del(&adp->list);
+ kfree(adp);
+ }
+}
+
static void nf_tables_commit_audit_collect(struct list_head *adl,
struct nft_table *table, u32 op)
{
ret = nf_tables_commit_audit_alloc(&adl, trans->ctx.table);
if (ret) {
nf_tables_commit_chain_prepare_cancel(net);
+ nf_tables_commit_audit_free(&adl);
return ret;
}
if (trans->msg_type == NFT_MSG_NEWRULE ||
ret = nf_tables_commit_chain_prepare(net, chain);
if (ret < 0) {
nf_tables_commit_chain_prepare_cancel(net);
+ nf_tables_commit_audit_free(&adl);
return ret;
}
}
nfnl_hook_entries_head(u8 pf, unsigned int hook, struct net *net, const char *dev)
{
const struct nf_hook_entries *hook_head = NULL;
+#ifdef CONFIG_NETFILTER_INGRESS
struct net_device *netdev;
+#endif
switch (pf) {
case NFPROTO_IPV4:
{
struct nft_last_priv *priv = nft_expr_priv(expr);
- priv->last_jiffies = jiffies;
- priv->last_set = 1;
+ if (READ_ONCE(priv->last_jiffies) != jiffies)
+ WRITE_ONCE(priv->last_jiffies, jiffies);
+ if (READ_ONCE(priv->last_set) == 0)
+ WRITE_ONCE(priv->last_set, 1);
}
static int nft_last_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
struct nft_last_priv *priv = nft_expr_priv(expr);
+ unsigned long last_jiffies = READ_ONCE(priv->last_jiffies);
+ u32 last_set = READ_ONCE(priv->last_set);
__be64 msecs;
- if (time_before(jiffies, priv->last_jiffies))
- priv->last_set = 0;
+ if (time_before(jiffies, last_jiffies)) {
+ WRITE_ONCE(priv->last_set, 0);
+ last_set = 0;
+ }
- if (priv->last_set)
- msecs = nf_jiffies64_to_msecs(jiffies - priv->last_jiffies);
+ if (last_set)
+ msecs = nf_jiffies64_to_msecs(jiffies - last_jiffies);
else
msecs = 0;
- if (nla_put_be32(skb, NFTA_LAST_SET, htonl(priv->last_set)) ||
+ if (nla_put_be32(skb, NFTA_LAST_SET, htonl(last_set)) ||
nla_put_be64(skb, NFTA_LAST_MSECS, msecs, NFTA_LAST_PAD))
goto nla_put_failure;
alen = sizeof_field(struct nf_nat_range, min_addr.ip6);
break;
default:
- return -EAFNOSUPPORT;
+ if (tb[NFTA_NAT_REG_ADDR_MIN])
+ return -EAFNOSUPPORT;
+ break;
}
priv->family = family;
netlbl_af4list_audit_addr(audit_buf, 1,
(dev != NULL ? dev->name : NULL),
addr->s_addr, mask->s_addr);
- if (dev != NULL)
- dev_put(dev);
+ dev_put(dev);
if (entry != NULL &&
security_secid_to_secctx(entry->secid,
&secctx, &secctx_len) == 0) {
netlbl_af6list_audit_addr(audit_buf, 1,
(dev != NULL ? dev->name : NULL),
addr, mask);
- if (dev != NULL)
- dev_put(dev);
+ dev_put(dev);
if (entry != NULL &&
security_secid_to_secctx(entry->secid,
&secctx, &secctx_len) == 0) {
if (dev == NULL || nr_rx_frame(skb, dev) == 0)
kfree_skb(skb);
- if (dev != NULL)
- dev_put(dev);
+ dev_put(dev);
if (!skb_queue_empty(&loopback_queue) && !nr_loopback_running())
mod_timer(&loopback_timer, jiffies + 10);
if (first == NULL || strncmp(dev->name, first->name, 3) < 0)
first = dev;
}
- if (first)
- dev_hold(first);
+ dev_hold(first);
rcu_read_unlock();
return first;
/* Execute request and wait for completion. */
static int __nci_request(struct nci_dev *ndev,
- void (*req)(struct nci_dev *ndev, unsigned long opt),
- unsigned long opt, __u32 timeout)
+ void (*req)(struct nci_dev *ndev, const void *opt),
+ const void *opt, __u32 timeout)
{
int rc = 0;
long completion_rc;
inline int nci_request(struct nci_dev *ndev,
void (*req)(struct nci_dev *ndev,
- unsigned long opt),
- unsigned long opt, __u32 timeout)
+ const void *opt),
+ const void *opt, __u32 timeout)
{
int rc;
return rc;
}
-static void nci_reset_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_reset_req(struct nci_dev *ndev, const void *opt)
{
struct nci_core_reset_cmd cmd;
nci_send_cmd(ndev, NCI_OP_CORE_RESET_CMD, 1, &cmd);
}
-static void nci_init_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_init_req(struct nci_dev *ndev, const void *opt)
{
u8 plen = 0;
if (opt)
plen = sizeof(struct nci_core_init_v2_cmd);
- nci_send_cmd(ndev, NCI_OP_CORE_INIT_CMD, plen, (void *)opt);
+ nci_send_cmd(ndev, NCI_OP_CORE_INIT_CMD, plen, opt);
}
-static void nci_init_complete_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_init_complete_req(struct nci_dev *ndev, const void *opt)
{
struct nci_rf_disc_map_cmd cmd;
struct disc_map_config *cfg = cmd.mapping_configs;
const __u8 *val;
};
-static void nci_set_config_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_set_config_req(struct nci_dev *ndev, const void *opt)
{
- const struct nci_set_config_param *param =
- (struct nci_set_config_param *)opt;
+ const struct nci_set_config_param *param = opt;
struct nci_core_set_config_cmd cmd;
BUG_ON(param->len > NCI_MAX_PARAM_LEN);
__u32 tm_protocols;
};
-static void nci_rf_discover_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_rf_discover_req(struct nci_dev *ndev, const void *opt)
{
- const struct nci_rf_discover_param *param =
- (struct nci_rf_discover_param *)opt;
+ const struct nci_rf_discover_param *param = opt;
struct nci_rf_disc_cmd cmd;
cmd.num_disc_configs = 0;
__u8 rf_protocol;
};
-static void nci_rf_discover_select_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_rf_discover_select_req(struct nci_dev *ndev, const void *opt)
{
- const struct nci_rf_discover_select_param *param =
- (struct nci_rf_discover_select_param *)opt;
+ const struct nci_rf_discover_select_param *param = opt;
struct nci_rf_discover_select_cmd cmd;
cmd.rf_discovery_id = param->rf_discovery_id;
sizeof(struct nci_rf_discover_select_cmd), &cmd);
}
-static void nci_rf_deactivate_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_rf_deactivate_req(struct nci_dev *ndev, const void *opt)
{
struct nci_rf_deactivate_cmd cmd;
- cmd.type = opt;
+ cmd.type = (unsigned long)opt;
nci_send_cmd(ndev, NCI_OP_RF_DEACTIVATE_CMD,
sizeof(struct nci_rf_deactivate_cmd), &cmd);
const __u8 *payload;
};
-static void nci_generic_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_generic_req(struct nci_dev *ndev, const void *opt)
{
- const struct nci_cmd_param *param =
- (struct nci_cmd_param *)opt;
+ const struct nci_cmd_param *param = opt;
nci_send_cmd(ndev, param->opcode, param->len, param->payload);
}
param.len = len;
param.payload = payload;
- return __nci_request(ndev, nci_generic_req, (unsigned long)¶m,
+ return __nci_request(ndev, nci_generic_req, ¶m,
msecs_to_jiffies(NCI_CMD_TIMEOUT));
}
EXPORT_SYMBOL(nci_prop_cmd);
param.len = len;
param.payload = payload;
- return __nci_request(ndev, nci_generic_req, (unsigned long)¶m,
+ return __nci_request(ndev, nci_generic_req, ¶m,
msecs_to_jiffies(NCI_CMD_TIMEOUT));
}
EXPORT_SYMBOL(nci_core_cmd);
int nci_core_reset(struct nci_dev *ndev)
{
- return __nci_request(ndev, nci_reset_req, 0,
+ return __nci_request(ndev, nci_reset_req, (void *)0,
msecs_to_jiffies(NCI_RESET_TIMEOUT));
}
EXPORT_SYMBOL(nci_core_reset);
int nci_core_init(struct nci_dev *ndev)
{
- return __nci_request(ndev, nci_init_req, 0,
+ return __nci_request(ndev, nci_init_req, (void *)0,
msecs_to_jiffies(NCI_INIT_TIMEOUT));
}
EXPORT_SYMBOL(nci_core_init);
struct sk_buff *data;
};
-static void nci_send_data_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_send_data_req(struct nci_dev *ndev, const void *opt)
{
- const struct nci_loopback_data *data = (struct nci_loopback_data *)opt;
+ const struct nci_loopback_data *data = opt;
nci_send_data(ndev, data->conn_id, data->data);
}
loopback_data.data = skb;
ndev->cur_conn_id = conn_id;
- r = nci_request(ndev, nci_send_data_req, (unsigned long)&loopback_data,
+ r = nci_request(ndev, nci_send_data_req, &loopback_data,
msecs_to_jiffies(NCI_DATA_TIMEOUT));
if (r == NCI_STATUS_OK && resp)
*resp = conn_info->rx_skb;
rc = ndev->ops->init(ndev);
if (!rc) {
- rc = __nci_request(ndev, nci_reset_req, 0,
+ rc = __nci_request(ndev, nci_reset_req, (void *)0,
msecs_to_jiffies(NCI_RESET_TIMEOUT));
}
.feature1 = NCI_FEATURE_DISABLE,
.feature2 = NCI_FEATURE_DISABLE
};
- unsigned long opt = 0;
+ const void *opt = NULL;
if (ndev->nci_ver & NCI_VER_2_MASK)
- opt = (unsigned long)&nci_init_v2_cmd;
+ opt = &nci_init_v2_cmd;
rc = __nci_request(ndev, nci_init_req, opt,
msecs_to_jiffies(NCI_INIT_TIMEOUT));
rc = ndev->ops->post_setup(ndev);
if (!rc) {
- rc = __nci_request(ndev, nci_init_complete_req, 0,
+ rc = __nci_request(ndev, nci_init_complete_req, (void *)0,
msecs_to_jiffies(NCI_INIT_TIMEOUT));
}
atomic_set(&ndev->cmd_cnt, 1);
set_bit(NCI_INIT, &ndev->flags);
- __nci_request(ndev, nci_reset_req, 0,
+ __nci_request(ndev, nci_reset_req, (void *)0,
msecs_to_jiffies(NCI_RESET_TIMEOUT));
/* After this point our queues are empty
param.len = len;
param.val = val;
- return __nci_request(ndev, nci_set_config_req, (unsigned long)¶m,
+ return __nci_request(ndev, nci_set_config_req, ¶m,
msecs_to_jiffies(NCI_SET_CONFIG_TIMEOUT));
}
EXPORT_SYMBOL(nci_set_config);
-static void nci_nfcee_discover_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_nfcee_discover_req(struct nci_dev *ndev, const void *opt)
{
struct nci_nfcee_discover_cmd cmd;
- __u8 action = opt;
+ __u8 action = (unsigned long)opt;
cmd.discovery_action = action;
int nci_nfcee_discover(struct nci_dev *ndev, u8 action)
{
- return __nci_request(ndev, nci_nfcee_discover_req, action,
+ unsigned long opt = action;
+
+ return __nci_request(ndev, nci_nfcee_discover_req, (void *)opt,
msecs_to_jiffies(NCI_CMD_TIMEOUT));
}
EXPORT_SYMBOL(nci_nfcee_discover);
-static void nci_nfcee_mode_set_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_nfcee_mode_set_req(struct nci_dev *ndev, const void *opt)
{
- const struct nci_nfcee_mode_set_cmd *cmd =
- (struct nci_nfcee_mode_set_cmd *)opt;
+ const struct nci_nfcee_mode_set_cmd *cmd = opt;
nci_send_cmd(ndev, NCI_OP_NFCEE_MODE_SET_CMD,
sizeof(struct nci_nfcee_mode_set_cmd), cmd);
cmd.nfcee_id = nfcee_id;
cmd.nfcee_mode = nfcee_mode;
- return __nci_request(ndev, nci_nfcee_mode_set_req,
- (unsigned long)&cmd,
+ return __nci_request(ndev, nci_nfcee_mode_set_req, &cmd,
msecs_to_jiffies(NCI_CMD_TIMEOUT));
}
EXPORT_SYMBOL(nci_nfcee_mode_set);
-static void nci_core_conn_create_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_core_conn_create_req(struct nci_dev *ndev, const void *opt)
{
- const struct core_conn_create_data *data =
- (struct core_conn_create_data *)opt;
+ const struct core_conn_create_data *data = opt;
nci_send_cmd(ndev, NCI_OP_CORE_CONN_CREATE_CMD, data->length, data->cmd);
}
}
ndev->cur_dest_type = destination_type;
- r = __nci_request(ndev, nci_core_conn_create_req, (unsigned long)&data,
+ r = __nci_request(ndev, nci_core_conn_create_req, &data,
msecs_to_jiffies(NCI_CMD_TIMEOUT));
kfree(cmd);
return r;
}
EXPORT_SYMBOL(nci_core_conn_create);
-static void nci_core_conn_close_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_core_conn_close_req(struct nci_dev *ndev, const void *opt)
{
- __u8 conn_id = opt;
+ __u8 conn_id = (unsigned long)opt;
nci_send_cmd(ndev, NCI_OP_CORE_CONN_CLOSE_CMD, 1, &conn_id);
}
int nci_core_conn_close(struct nci_dev *ndev, u8 conn_id)
{
+ unsigned long opt = conn_id;
+
ndev->cur_conn_id = conn_id;
- return __nci_request(ndev, nci_core_conn_close_req, conn_id,
+ return __nci_request(ndev, nci_core_conn_close_req, (void *)opt,
msecs_to_jiffies(NCI_CMD_TIMEOUT));
}
EXPORT_SYMBOL(nci_core_conn_close);
param.id = NCI_PN_ATR_REQ_GEN_BYTES;
- rc = nci_request(ndev, nci_set_config_req, (unsigned long)¶m,
+ rc = nci_request(ndev, nci_set_config_req, ¶m,
msecs_to_jiffies(NCI_SET_CONFIG_TIMEOUT));
if (rc)
return rc;
param.id = NCI_LN_ATR_RES_GEN_BYTES;
- return nci_request(ndev, nci_set_config_req, (unsigned long)¶m,
+ return nci_request(ndev, nci_set_config_req, ¶m,
msecs_to_jiffies(NCI_SET_CONFIG_TIMEOUT));
}
pr_debug("target active or w4 select, implicitly deactivate\n");
rc = nci_request(ndev, nci_rf_deactivate_req,
- NCI_DEACTIVATE_TYPE_IDLE_MODE,
+ (void *)NCI_DEACTIVATE_TYPE_IDLE_MODE,
msecs_to_jiffies(NCI_RF_DEACTIVATE_TIMEOUT));
if (rc)
return -EBUSY;
param.im_protocols = im_protocols;
param.tm_protocols = tm_protocols;
- rc = nci_request(ndev, nci_rf_discover_req, (unsigned long)¶m,
+ rc = nci_request(ndev, nci_rf_discover_req, ¶m,
msecs_to_jiffies(NCI_RF_DISC_TIMEOUT));
if (!rc)
return;
}
- nci_request(ndev, nci_rf_deactivate_req, NCI_DEACTIVATE_TYPE_IDLE_MODE,
+ nci_request(ndev, nci_rf_deactivate_req,
+ (void *)NCI_DEACTIVATE_TYPE_IDLE_MODE,
msecs_to_jiffies(NCI_RF_DEACTIVATE_TIMEOUT));
}
else
param.rf_protocol = NCI_RF_PROTOCOL_NFC_DEP;
- rc = nci_request(ndev, nci_rf_discover_select_req,
- (unsigned long)¶m,
+ rc = nci_request(ndev, nci_rf_discover_select_req, ¶m,
msecs_to_jiffies(NCI_RF_DISC_SELECT_TIMEOUT));
}
__u8 mode)
{
struct nci_dev *ndev = nfc_get_drvdata(nfc_dev);
- u8 nci_mode = NCI_DEACTIVATE_TYPE_IDLE_MODE;
+ unsigned long nci_mode = NCI_DEACTIVATE_TYPE_IDLE_MODE;
pr_debug("entry\n");
}
if (atomic_read(&ndev->state) == NCI_POLL_ACTIVE) {
- nci_request(ndev, nci_rf_deactivate_req, nci_mode,
+ nci_request(ndev, nci_rf_deactivate_req, (void *)nci_mode,
msecs_to_jiffies(NCI_RF_DEACTIVATE_TIMEOUT));
}
}
} else {
if (atomic_read(&ndev->state) == NCI_LISTEN_ACTIVE ||
atomic_read(&ndev->state) == NCI_DISCOVERY) {
- nci_request(ndev, nci_rf_deactivate_req, 0,
- msecs_to_jiffies(NCI_RF_DEACTIVATE_TIMEOUT));
+ nci_request(ndev, nci_rf_deactivate_req, (void *)0,
+ msecs_to_jiffies(NCI_RF_DEACTIVATE_TIMEOUT));
}
rc = nfc_tm_deactivated(nfc_dev);
return i;
}
-static void nci_hci_send_data_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_hci_send_data_req(struct nci_dev *ndev, const void *opt)
{
- const struct nci_data *data = (struct nci_data *)opt;
+ const struct nci_data *data = opt;
nci_hci_send_data(ndev, data->pipe, data->cmd,
data->data, data->data_len);
data.data = param;
data.data_len = param_len;
- r = nci_request(ndev, nci_hci_send_data_req, (unsigned long)&data,
+ r = nci_request(ndev, nci_hci_send_data_req, &data,
msecs_to_jiffies(NCI_DATA_TIMEOUT));
if (r == NCI_STATUS_OK) {
message = (struct nci_hcp_message *)conn_info->rx_skb->data;
data.data = NULL;
data.data_len = 0;
- return nci_request(ndev, nci_hci_send_data_req,
- (unsigned long)&data,
- msecs_to_jiffies(NCI_DATA_TIMEOUT));
+ return nci_request(ndev, nci_hci_send_data_req, &data,
+ msecs_to_jiffies(NCI_DATA_TIMEOUT));
}
EXPORT_SYMBOL(nci_hci_open_pipe);
data.data = tmp;
data.data_len = param_len + 1;
- r = nci_request(ndev, nci_hci_send_data_req,
- (unsigned long)&data,
+ r = nci_request(ndev, nci_hci_send_data_req, &data,
msecs_to_jiffies(NCI_DATA_TIMEOUT));
if (r == NCI_STATUS_OK) {
message = (struct nci_hcp_message *)conn_info->rx_skb->data;
data.data = &idx;
data.data_len = 1;
- r = nci_request(ndev, nci_hci_send_data_req, (unsigned long)&data,
+ r = nci_request(ndev, nci_hci_send_data_req, &data,
msecs_to_jiffies(NCI_DATA_TIMEOUT));
if (r == NCI_STATUS_OK) {
rcu_read_lock();
dev = rcu_dereference(po->cached_dev);
- if (likely(dev))
- dev_hold(dev);
+ dev_hold(dev);
rcu_read_unlock();
return dev;
out_free:
kfree_skb(skb);
out_unlock:
- if (dev)
- dev_put(dev);
+ dev_put(dev);
out:
return err;
}
}
}
- if (dev)
- dev_hold(dev);
+ dev_hold(dev);
proto_curr = po->prot_hook.type;
dev_curr = po->prot_hook.dev;
packet_cached_dev_assign(po, dev);
}
}
- if (dev_curr)
- dev_put(dev_curr);
+ dev_put(dev_curr);
if (proto == 0 || !need_rehook)
goto out_unlock;
if (msg == NETDEV_UNREGISTER) {
packet_cached_dev_reset(po);
WRITE_ONCE(po->ifindex, -1);
- if (po->prot_hook.dev)
- dev_put(po->prot_hook.dev);
+ dev_put(po->prot_hook.dev);
po->prot_hook.dev = NULL;
}
spin_unlock(&po->bind_lock);
drop:
kfree_skb(skb);
- if (dev)
- dev_put(dev);
+ dev_put(dev);
return err;
}
EXPORT_SYMBOL(pn_skb_send);
break;
dev = NULL;
}
- if (dev)
- dev_hold(dev);
+ dev_hold(dev);
rcu_read_unlock();
return dev;
}
daddr >>= 2;
rcu_read_lock();
dev = rcu_dereference(routes->table[daddr]);
- if (dev)
- dev_hold(dev);
+ dev_hold(dev);
rcu_read_unlock();
if (!dev)
saddr = PN_NO_ADDR;
release_sock(sk);
- if (dev)
- dev_put(dev);
+ dev_put(dev);
if (saddr == PN_NO_ADDR)
return -EHOSTUNREACH;
struct qrtr_endpoint ep;
struct mhi_device *mhi_dev;
struct device *dev;
+ struct completion ready;
};
/* From MHI to QRTR */
struct qrtr_mhi_dev *qdev = container_of(ep, struct qrtr_mhi_dev, ep);
int rc;
+ rc = wait_for_completion_interruptible(&qdev->ready);
+ if (rc)
+ goto free_skb;
+
if (skb->sk)
sock_hold(skb->sk);
int rc;
/* start channels */
- rc = mhi_prepare_for_transfer(mhi_dev);
+ rc = mhi_prepare_for_transfer(mhi_dev, 0);
if (rc)
return rc;
if (rc)
return rc;
+ /* start channels */
+ rc = mhi_prepare_for_transfer(mhi_dev, MHI_CH_INBOUND_ALLOC_BUFS);
+ if (rc) {
+ qrtr_endpoint_unregister(&qdev->ep);
+ dev_set_drvdata(&mhi_dev->dev, NULL);
+ return rc;
+ }
+
+ complete_all(&qdev->ready);
dev_dbg(qdev->dev, "Qualcomm MHI QRTR driver probed\n");
return 0;
if (!ipc)
goto err;
- if (sock_queue_rcv_skb(&ipc->sk, skb))
+ if (sock_queue_rcv_skb(&ipc->sk, skb)) {
+ qrtr_port_put(ipc);
goto err;
+ }
qrtr_port_put(ipc);
}
ipc = qrtr_port_lookup(to->sq_port);
if (!ipc || &ipc->sk == skb->sk) { /* do not send to self */
+ if (ipc)
+ qrtr_port_put(ipc);
kfree_skb(skb);
return -ENODEV;
}
p->tcfa_tm.install = jiffies;
p->tcfa_tm.lastuse = jiffies;
p->tcfa_tm.firstuse = 0;
- p->tcfa_flags = flags;
+ p->tcfa_flags = flags & TCA_ACT_FLAGS_USER_MASK;
if (est) {
err = gen_new_estimator(&p->tcfa_bstats, p->cpu_bstats,
&p->tcfa_rate_est,
}
}
-struct tc_action_ops *tc_action_load_ops(char *name, struct nlattr *nla,
+struct tc_action_ops *tc_action_load_ops(struct nlattr *nla, bool police,
bool rtnl_held,
struct netlink_ext_ack *extack)
{
struct nlattr *kind;
int err;
- if (name == NULL) {
+ if (!police) {
err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX, nla,
tcf_action_policy, extack);
if (err < 0)
return ERR_PTR(err);
}
} else {
- if (strlcpy(act_name, name, IFNAMSIZ) >= IFNAMSIZ) {
+ if (strlcpy(act_name, "police", IFNAMSIZ) >= IFNAMSIZ) {
NL_SET_ERR_MSG(extack, "TC action name too long");
return ERR_PTR(-EINVAL);
}
struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
struct nlattr *nla, struct nlattr *est,
- char *name, int ovr, int bind,
struct tc_action_ops *a_o, int *init_res,
- bool rtnl_held,
- struct netlink_ext_ack *extack)
+ u32 flags, struct netlink_ext_ack *extack)
{
- struct nla_bitfield32 flags = { 0, 0 };
+ bool police = flags & TCA_ACT_FLAGS_POLICE;
+ struct nla_bitfield32 userflags = { 0, 0 };
u8 hw_stats = TCA_ACT_HW_STATS_ANY;
struct nlattr *tb[TCA_ACT_MAX + 1];
struct tc_cookie *cookie = NULL;
int err;
/* backward compatibility for policer */
- if (name == NULL) {
+ if (!police) {
err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX, nla,
tcf_action_policy, extack);
if (err < 0)
}
hw_stats = tcf_action_hw_stats_get(tb[TCA_ACT_HW_STATS]);
if (tb[TCA_ACT_FLAGS])
- flags = nla_get_bitfield32(tb[TCA_ACT_FLAGS]);
+ userflags = nla_get_bitfield32(tb[TCA_ACT_FLAGS]);
- err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, &a, ovr, bind,
- rtnl_held, tp, flags.value, extack);
+ err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, &a, tp,
+ userflags.value | flags, extack);
} else {
- err = a_o->init(net, nla, est, &a, ovr, bind, rtnl_held,
- tp, flags.value, extack);
+ err = a_o->init(net, nla, est, &a, tp, userflags.value | flags,
+ extack);
}
if (err < 0)
goto err_out;
*init_res = err;
- if (!name && tb[TCA_ACT_COOKIE])
+ if (!police && tb[TCA_ACT_COOKIE])
tcf_set_action_cookie(&a->act_cookie, cookie);
- if (!name)
+ if (!police)
a->hw_stats = hw_stats;
return a;
/* Returns numbers of initialized actions or negative error. */
int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
- struct nlattr *est, char *name, int ovr, int bind,
- struct tc_action *actions[], int init_res[], size_t *attr_size,
- bool rtnl_held, struct netlink_ext_ack *extack)
+ struct nlattr *est, struct tc_action *actions[],
+ int init_res[], size_t *attr_size, u32 flags,
+ struct netlink_ext_ack *extack)
{
struct tc_action_ops *ops[TCA_ACT_MAX_PRIO] = {};
struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
struct tc_action_ops *a_o;
- a_o = tc_action_load_ops(name, tb[i], rtnl_held, extack);
+ a_o = tc_action_load_ops(tb[i], flags & TCA_ACT_FLAGS_POLICE,
+ !(flags & TCA_ACT_FLAGS_NO_RTNL),
+ extack);
if (IS_ERR(a_o)) {
err = PTR_ERR(a_o);
goto err_mod;
}
for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
- act = tcf_action_init_1(net, tp, tb[i], est, name, ovr, bind,
- ops[i - 1], &init_res[i - 1], rtnl_held,
- extack);
+ act = tcf_action_init_1(net, tp, tb[i], est, ops[i - 1],
+ &init_res[i - 1], flags, extack);
if (IS_ERR(act)) {
err = PTR_ERR(act);
goto err;
goto err_mod;
err:
- tcf_action_destroy(actions, bind);
+ tcf_action_destroy(actions, flags & TCA_ACT_FLAGS_BIND);
err_mod:
for (i = 0; i < TCA_ACT_MAX_PRIO; i++) {
if (ops[i])
}
static int tcf_action_add(struct net *net, struct nlattr *nla,
- struct nlmsghdr *n, u32 portid, int ovr,
+ struct nlmsghdr *n, u32 portid, u32 flags,
struct netlink_ext_ack *extack)
{
size_t attr_size = 0;
int init_res[TCA_ACT_MAX_PRIO] = {};
for (loop = 0; loop < 10; loop++) {
- ret = tcf_action_init(net, NULL, nla, NULL, NULL, ovr, 0,
- actions, init_res, &attr_size, true, extack);
+ ret = tcf_action_init(net, NULL, nla, NULL, actions, init_res,
+ &attr_size, flags, extack);
if (ret != -EAGAIN)
break;
}
struct net *net = sock_net(skb->sk);
struct nlattr *tca[TCA_ROOT_MAX + 1];
u32 portid = NETLINK_CB(skb).portid;
- int ret = 0, ovr = 0;
+ u32 flags = 0;
+ int ret = 0;
if ((n->nlmsg_type != RTM_GETACTION) &&
!netlink_capable(skb, CAP_NET_ADMIN))
* is zero) then just set this
*/
if (n->nlmsg_flags & NLM_F_REPLACE)
- ovr = 1;
- ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, portid, ovr,
+ flags = TCA_ACT_FLAGS_REPLACE;
+ ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, portid, flags,
extack);
break;
case RTM_DELACTION:
static int tcf_bpf_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **act,
- int replace, int bind, bool rtnl_held,
struct tcf_proto *tp, u32 flags,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, bpf_net_id);
+ bool bind = flags & TCA_ACT_FLAGS_BIND;
struct nlattr *tb[TCA_ACT_BPF_MAX + 1];
struct tcf_chain *goto_ch = NULL;
struct tcf_bpf_cfg cfg, old;
if (bind)
return 0;
- if (!replace) {
+ if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
tcf_idr_release(*act, bind);
return -EEXIST;
}
static int tcf_connmark_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int ovr, int bind, bool rtnl_held,
struct tcf_proto *tp, u32 flags,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, connmark_net_id);
struct nlattr *tb[TCA_CONNMARK_MAX + 1];
+ bool bind = flags & TCA_ACT_FLAGS_BIND;
struct tcf_chain *goto_ch = NULL;
struct tcf_connmark_info *ci;
struct tc_connmark *parm;
ci = to_connmark(*a);
if (bind)
return 0;
- if (!ovr) {
+ if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
tcf_idr_release(*a, bind);
return -EEXIST;
}
static struct tc_action_ops act_csum_ops;
static int tcf_csum_init(struct net *net, struct nlattr *nla,
- struct nlattr *est, struct tc_action **a, int ovr,
- int bind, bool rtnl_held, struct tcf_proto *tp,
+ struct nlattr *est, struct tc_action **a,
+ struct tcf_proto *tp,
u32 flags, struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, csum_net_id);
+ bool bind = flags & TCA_ACT_FLAGS_BIND;
struct tcf_csum_params *params_new;
struct nlattr *tb[TCA_CSUM_MAX + 1];
struct tcf_chain *goto_ch = NULL;
} else if (err > 0) {
if (bind)/* dont override defaults */
return 0;
- if (!ovr) {
+ if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
tcf_idr_release(*a, bind);
return -EEXIST;
}
static int tcf_ct_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int replace, int bind, bool rtnl_held,
struct tcf_proto *tp, u32 flags,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, ct_net_id);
+ bool bind = flags & TCA_ACT_FLAGS_BIND;
struct tcf_ct_params *params = NULL;
struct nlattr *tb[TCA_CT_MAX + 1];
struct tcf_chain *goto_ch = NULL;
if (bind)
return 0;
- if (!replace) {
+ if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
tcf_idr_release(*a, bind);
return -EEXIST;
}
static int tcf_ctinfo_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int ovr, int bind, bool rtnl_held,
struct tcf_proto *tp, u32 flags,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, ctinfo_net_id);
+ bool bind = flags & TCA_ACT_FLAGS_BIND;
u32 dscpmask = 0, dscpstatemask, index;
struct nlattr *tb[TCA_CTINFO_MAX + 1];
struct tcf_ctinfo_params *cp_new;
} else if (err > 0) {
if (bind) /* don't override defaults */
return 0;
- if (!ovr) {
+ if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
tcf_idr_release(*a, bind);
return -EEXIST;
}
static int tcf_gact_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int ovr, int bind, bool rtnl_held,
struct tcf_proto *tp, u32 flags,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, gact_net_id);
+ bool bind = flags & TCA_ACT_FLAGS_BIND;
struct nlattr *tb[TCA_GACT_MAX + 1];
struct tcf_chain *goto_ch = NULL;
struct tc_gact *parm;
} else if (err > 0) {
if (bind)/* dont override defaults */
return 0;
- if (!ovr) {
+ if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
tcf_idr_release(*a, bind);
return -EEXIST;
}
static int tcf_gate_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int ovr, int bind, bool rtnl_held,
struct tcf_proto *tp, u32 flags,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, gate_net_id);
enum tk_offsets tk_offset = TK_OFFS_TAI;
+ bool bind = flags & TCA_ACT_FLAGS_BIND;
struct nlattr *tb[TCA_GATE_MAX + 1];
struct tcf_chain *goto_ch = NULL;
u64 cycletime = 0, basetime = 0;
}
ret = ACT_P_CREATED;
- } else if (!ovr) {
+ } else if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
tcf_idr_release(*a, bind);
return -EEXIST;
}
static int tcf_ife_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int ovr, int bind, bool rtnl_held,
struct tcf_proto *tp, u32 flags,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, ife_net_id);
+ bool bind = flags & TCA_ACT_FLAGS_BIND;
struct nlattr *tb[TCA_IFE_MAX + 1];
struct nlattr *tb2[IFE_META_MAX + 1];
struct tcf_chain *goto_ch = NULL;
kfree(p);
return err;
}
- err = load_metalist(tb2, rtnl_held);
+ err = load_metalist(tb2, !(flags & TCA_ACT_FLAGS_NO_RTNL));
if (err) {
kfree(p);
return err;
return ret;
}
ret = ACT_P_CREATED;
- } else if (!ovr) {
+ } else if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
tcf_idr_release(*a, bind);
kfree(p);
return -EEXIST;
}
if (tb[TCA_IFE_METALST]) {
- err = populate_metalist(ife, tb2, exists, rtnl_held);
+ err = populate_metalist(ife, tb2, exists,
+ !(flags & TCA_ACT_FLAGS_NO_RTNL));
if (err)
goto metadata_parse_err;
} else {
static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- const struct tc_action_ops *ops, int ovr, int bind,
+ const struct tc_action_ops *ops,
struct tcf_proto *tp, u32 flags)
{
struct tc_action_net *tn = net_generic(net, id);
+ bool bind = flags & TCA_ACT_FLAGS_BIND;
struct nlattr *tb[TCA_IPT_MAX + 1];
struct tcf_ipt *ipt;
struct xt_entry_target *td, *t;
if (bind)/* dont override defaults */
return 0;
- if (!ovr) {
+ if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
tcf_idr_release(*a, bind);
return -EEXIST;
}
}
static int tcf_ipt_init(struct net *net, struct nlattr *nla,
- struct nlattr *est, struct tc_action **a, int ovr,
- int bind, bool rtnl_held, struct tcf_proto *tp,
+ struct nlattr *est, struct tc_action **a,
+ struct tcf_proto *tp,
u32 flags, struct netlink_ext_ack *extack)
{
- return __tcf_ipt_init(net, ipt_net_id, nla, est, a, &act_ipt_ops, ovr,
- bind, tp, flags);
+ return __tcf_ipt_init(net, ipt_net_id, nla, est, a, &act_ipt_ops,
+ tp, flags);
}
static int tcf_xt_init(struct net *net, struct nlattr *nla,
- struct nlattr *est, struct tc_action **a, int ovr,
- int bind, bool unlocked, struct tcf_proto *tp,
+ struct nlattr *est, struct tc_action **a,
+ struct tcf_proto *tp,
u32 flags, struct netlink_ext_ack *extack)
{
- return __tcf_ipt_init(net, xt_net_id, nla, est, a, &act_xt_ops, ovr,
- bind, tp, flags);
+ return __tcf_ipt_init(net, xt_net_id, nla, est, a, &act_xt_ops,
+ tp, flags);
}
static int tcf_ipt_act(struct sk_buff *skb, const struct tc_action *a,
/* last reference to action, no need to lock */
dev = rcu_dereference_protected(m->tcfm_dev, 1);
- if (dev)
- dev_put(dev);
+ dev_put(dev);
}
static const struct nla_policy mirred_policy[TCA_MIRRED_MAX + 1] = {
static int tcf_mirred_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int ovr, int bind, bool rtnl_held,
struct tcf_proto *tp,
u32 flags, struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, mirred_net_id);
+ bool bind = flags & TCA_ACT_FLAGS_BIND;
struct nlattr *tb[TCA_MIRRED_MAX + 1];
struct tcf_chain *goto_ch = NULL;
bool mac_header_xmit = false;
return ret;
}
ret = ACT_P_CREATED;
- } else if (!ovr) {
+ } else if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
tcf_idr_release(*a, bind);
return -EEXIST;
}
mac_header_xmit = dev_is_mac_header_xmit(dev);
dev = rcu_replace_pointer(m->tcfm_dev, dev,
lockdep_is_held(&m->tcf_lock));
- if (dev)
- dev_put(dev);
+ dev_put(dev);
m->tcfm_mac_header_xmit = mac_header_xmit;
}
goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
static int tcf_mpls_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int ovr, int bind, bool rtnl_held,
struct tcf_proto *tp, u32 flags,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, mpls_net_id);
+ bool bind = flags & TCA_ACT_FLAGS_BIND;
struct nlattr *tb[TCA_MPLS_MAX + 1];
struct tcf_chain *goto_ch = NULL;
struct tcf_mpls_params *p;
}
ret = ACT_P_CREATED;
- } else if (!ovr) {
+ } else if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
tcf_idr_release(*a, bind);
return -EEXIST;
}
};
static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
- struct tc_action **a, int ovr, int bind,
- bool rtnl_held, struct tcf_proto *tp,
+ struct tc_action **a, struct tcf_proto *tp,
u32 flags, struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, nat_net_id);
+ bool bind = flags & TCA_ACT_FLAGS_BIND;
struct nlattr *tb[TCA_NAT_MAX + 1];
struct tcf_chain *goto_ch = NULL;
struct tc_nat *parm;
} else if (err > 0) {
if (bind)
return 0;
- if (!ovr) {
+ if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
tcf_idr_release(*a, bind);
return -EEXIST;
}
static int tcf_pedit_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int ovr, int bind, bool rtnl_held,
struct tcf_proto *tp, u32 flags,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, pedit_net_id);
+ bool bind = flags & TCA_ACT_FLAGS_BIND;
struct nlattr *tb[TCA_PEDIT_MAX + 1];
struct tcf_chain *goto_ch = NULL;
struct tc_pedit_key *keys = NULL;
} else if (err > 0) {
if (bind)
goto out_free;
- if (!ovr) {
+ if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
ret = -EEXIST;
goto out_release;
}
static int tcf_police_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int ovr, int bind, bool rtnl_held,
struct tcf_proto *tp, u32 flags,
struct netlink_ext_ack *extack)
{
int ret = 0, tcfp_result = TC_ACT_OK, err, size;
+ bool bind = flags & TCA_ACT_FLAGS_BIND;
struct nlattr *tb[TCA_POLICE_MAX + 1];
struct tcf_chain *goto_ch = NULL;
struct tc_police *parm;
}
ret = ACT_P_CREATED;
spin_lock_init(&(to_police(*a)->tcfp_lock));
- } else if (!ovr) {
+ } else if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
tcf_idr_release(*a, bind);
return -EEXIST;
}
};
static int tcf_sample_init(struct net *net, struct nlattr *nla,
- struct nlattr *est, struct tc_action **a, int ovr,
- int bind, bool rtnl_held, struct tcf_proto *tp,
+ struct nlattr *est, struct tc_action **a,
+ struct tcf_proto *tp,
u32 flags, struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, sample_net_id);
+ bool bind = flags & TCA_ACT_FLAGS_BIND;
struct nlattr *tb[TCA_SAMPLE_MAX + 1];
struct psample_group *psample_group;
u32 psample_group_num, rate, index;
return ret;
}
ret = ACT_P_CREATED;
- } else if (!ovr) {
+ } else if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
tcf_idr_release(*a, bind);
return -EEXIST;
}
static int tcf_simp_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int ovr, int bind, bool rtnl_held,
struct tcf_proto *tp, u32 flags,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, simp_net_id);
+ bool bind = flags & TCA_ACT_FLAGS_BIND;
struct nlattr *tb[TCA_DEF_MAX + 1];
struct tcf_chain *goto_ch = NULL;
struct tc_defact *parm;
tcf_action_set_ctrlact(*a, parm->action, goto_ch);
ret = ACT_P_CREATED;
} else {
- if (!ovr) {
+ if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
err = -EEXIST;
goto release_idr;
}
static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int ovr, int bind, bool rtnl_held,
struct tcf_proto *tp, u32 act_flags,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, skbedit_net_id);
+ bool bind = act_flags & TCA_ACT_FLAGS_BIND;
struct tcf_skbedit_params *params_new;
struct nlattr *tb[TCA_SKBEDIT_MAX + 1];
struct tcf_chain *goto_ch = NULL;
ret = ACT_P_CREATED;
} else {
d = to_skbedit(*a);
- if (!ovr) {
+ if (!(act_flags & TCA_ACT_FLAGS_REPLACE)) {
tcf_idr_release(*a, bind);
return -EEXIST;
}
static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int ovr, int bind, bool rtnl_held,
struct tcf_proto *tp, u32 flags,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, skbmod_net_id);
+ bool ovr = flags & TCA_ACT_FLAGS_REPLACE;
+ bool bind = flags & TCA_ACT_FLAGS_BIND;
struct nlattr *tb[TCA_SKBMOD_MAX + 1];
struct tcf_skbmod_params *p, *p_old;
struct tcf_chain *goto_ch = NULL;
static int tunnel_key_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int ovr, int bind, bool rtnl_held,
struct tcf_proto *tp, u32 act_flags,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
+ bool bind = act_flags & TCA_ACT_FLAGS_BIND;
struct nlattr *tb[TCA_TUNNEL_KEY_MAX + 1];
struct tcf_tunnel_key_params *params_new;
struct metadata_dst *metadata = NULL;
}
ret = ACT_P_CREATED;
- } else if (!ovr) {
+ } else if (!(act_flags & TCA_ACT_FLAGS_REPLACE)) {
NL_SET_ERR_MSG(extack, "TC IDR already exists");
ret = -EEXIST;
goto release_tun_meta;
static int tcf_vlan_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int ovr, int bind, bool rtnl_held,
struct tcf_proto *tp, u32 flags,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, vlan_net_id);
+ bool bind = flags & TCA_ACT_FLAGS_BIND;
struct nlattr *tb[TCA_VLAN_MAX + 1];
struct tcf_chain *goto_ch = NULL;
bool push_prio_exists = false;
}
ret = ACT_P_CREATED;
- } else if (!ovr) {
+ } else if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
tcf_idr_release(*a, bind);
return -EEXIST;
}
int err;
int tp_created;
bool rtnl_held = false;
+ u32 flags;
if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
return -EPERM;
tp = NULL;
cl = 0;
block = NULL;
+ flags = 0;
if (prio == 0) {
/* If no priority is provided by the user,
goto errout;
}
+ if (!(n->nlmsg_flags & NLM_F_CREATE))
+ flags |= TCA_ACT_FLAGS_REPLACE;
+ if (!rtnl_held)
+ flags |= TCA_ACT_FLAGS_NO_RTNL;
err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh,
- n->nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE,
- rtnl_held, extack);
+ flags, extack);
if (err == 0) {
tfilter_notify(net, skb, n, tp, block, q, parent, fh,
RTM_NEWTFILTER, false, rtnl_held);
EXPORT_SYMBOL(tcf_exts_destroy);
int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
- struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr,
- bool rtnl_held, struct netlink_ext_ack *extack)
+ struct nlattr *rate_tlv, struct tcf_exts *exts,
+ u32 flags, struct netlink_ext_ack *extack)
{
#ifdef CONFIG_NET_CLS_ACT
{
if (exts->police && tb[exts->police]) {
struct tc_action_ops *a_o;
- a_o = tc_action_load_ops("police", tb[exts->police], rtnl_held, extack);
+ a_o = tc_action_load_ops(tb[exts->police], true,
+ !(flags & TCA_ACT_FLAGS_NO_RTNL),
+ extack);
if (IS_ERR(a_o))
return PTR_ERR(a_o);
+ flags |= TCA_ACT_FLAGS_POLICE | TCA_ACT_FLAGS_BIND;
act = tcf_action_init_1(net, tp, tb[exts->police],
- rate_tlv, "police", ovr,
- TCA_ACT_BIND, a_o, init_res,
- rtnl_held, extack);
+ rate_tlv, a_o, init_res, flags,
+ extack);
module_put(a_o->owner);
if (IS_ERR(act))
return PTR_ERR(act);
} else if (exts->action && tb[exts->action]) {
int err;
+ flags |= TCA_ACT_FLAGS_BIND;
err = tcf_action_init(net, tp, tb[exts->action],
- rate_tlv, NULL, ovr, TCA_ACT_BIND,
- exts->actions, init_res,
- &attr_size, rtnl_held, extack);
+ rate_tlv, exts->actions, init_res,
+ &attr_size, flags, extack);
if (err < 0)
return err;
exts->nr_actions = err;
static int basic_set_parms(struct net *net, struct tcf_proto *tp,
struct basic_filter *f, unsigned long base,
struct nlattr **tb,
- struct nlattr *est, bool ovr,
+ struct nlattr *est, u32 flags,
struct netlink_ext_ack *extack)
{
int err;
- err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, true, extack);
+ err = tcf_exts_validate(net, tp, tb, est, &f->exts, flags, extack);
if (err < 0)
return err;
static int basic_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base, u32 handle,
- struct nlattr **tca, void **arg, bool ovr,
- bool rtnl_held, struct netlink_ext_ack *extack)
+ struct nlattr **tca, void **arg,
+ u32 flags, struct netlink_ext_ack *extack)
{
int err;
struct basic_head *head = rtnl_dereference(tp->root);
goto errout;
}
- err = basic_set_parms(net, tp, fnew, base, tb, tca[TCA_RATE], ovr,
+ err = basic_set_parms(net, tp, fnew, base, tb, tca[TCA_RATE], flags,
extack);
if (err < 0) {
if (!fold)
static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp,
struct cls_bpf_prog *prog, unsigned long base,
- struct nlattr **tb, struct nlattr *est, bool ovr,
+ struct nlattr **tb, struct nlattr *est, u32 flags,
struct netlink_ext_ack *extack)
{
bool is_bpf, is_ebpf, have_exts = false;
if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf))
return -EINVAL;
- ret = tcf_exts_validate(net, tp, tb, est, &prog->exts, ovr, true,
+ ret = tcf_exts_validate(net, tp, tb, est, &prog->exts, flags,
extack);
if (ret < 0)
return ret;
static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca,
- void **arg, bool ovr, bool rtnl_held,
+ void **arg, u32 flags,
struct netlink_ext_ack *extack)
{
struct cls_bpf_head *head = rtnl_dereference(tp->root);
goto errout;
prog->handle = handle;
- ret = cls_bpf_set_parms(net, tp, prog, base, tb, tca[TCA_RATE], ovr,
+ ret = cls_bpf_set_parms(net, tp, prog, base, tb, tca[TCA_RATE], flags,
extack);
if (ret < 0)
goto errout_idr;
static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca,
- void **arg, bool ovr, bool rtnl_held,
+ void **arg, u32 flags,
struct netlink_ext_ack *extack)
{
struct nlattr *tb[TCA_CGROUP_MAX + 1];
if (err < 0)
goto errout;
- err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &new->exts, ovr,
- true, extack);
+ err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &new->exts, flags,
+ extack);
if (err < 0)
goto errout;
static int flow_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca,
- void **arg, bool ovr, bool rtnl_held,
+ void **arg, u32 flags,
struct netlink_ext_ack *extack)
{
struct flow_head *head = rtnl_dereference(tp->root);
if (err < 0)
goto err2;
- err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &fnew->exts, ovr,
- true, extack);
+ err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &fnew->exts, flags,
+ extack);
if (err < 0)
goto err2;
static int fl_set_parms(struct net *net, struct tcf_proto *tp,
struct cls_fl_filter *f, struct fl_flow_mask *mask,
unsigned long base, struct nlattr **tb,
- struct nlattr *est, bool ovr,
- struct fl_flow_tmplt *tmplt, bool rtnl_held,
+ struct nlattr *est,
+ struct fl_flow_tmplt *tmplt, u32 flags,
struct netlink_ext_ack *extack)
{
int err;
- err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, rtnl_held,
- extack);
+ err = tcf_exts_validate(net, tp, tb, est, &f->exts, flags, extack);
if (err < 0)
return err;
if (tb[TCA_FLOWER_CLASSID]) {
f->res.classid = nla_get_u32(tb[TCA_FLOWER_CLASSID]);
- if (!rtnl_held)
+ if (flags & TCA_ACT_FLAGS_NO_RTNL)
rtnl_lock();
tcf_bind_filter(tp, &f->res, base);
- if (!rtnl_held)
+ if (flags & TCA_ACT_FLAGS_NO_RTNL)
rtnl_unlock();
}
static int fl_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca,
- void **arg, bool ovr, bool rtnl_held,
+ void **arg, u32 flags,
struct netlink_ext_ack *extack)
{
struct cls_fl_head *head = fl_head_dereference(tp);
+ bool rtnl_held = !(flags & TCA_ACT_FLAGS_NO_RTNL);
struct cls_fl_filter *fold = *arg;
struct cls_fl_filter *fnew;
struct fl_flow_mask *mask;
}
}
- err = fl_set_parms(net, tp, fnew, mask, base, tb, tca[TCA_RATE], ovr,
- tp->chain->tmplt_priv, rtnl_held, extack);
+ err = fl_set_parms(net, tp, fnew, mask, base, tb, tca[TCA_RATE],
+ tp->chain->tmplt_priv, flags, extack);
if (err)
goto errout;
static int fw_set_parms(struct net *net, struct tcf_proto *tp,
struct fw_filter *f, struct nlattr **tb,
- struct nlattr **tca, unsigned long base, bool ovr,
+ struct nlattr **tca, unsigned long base, u32 flags,
struct netlink_ext_ack *extack)
{
struct fw_head *head = rtnl_dereference(tp->root);
u32 mask;
int err;
- err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &f->exts, ovr,
- true, extack);
+ err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &f->exts, flags,
+ extack);
if (err < 0)
return err;
static int fw_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca, void **arg,
- bool ovr, bool rtnl_held,
- struct netlink_ext_ack *extack)
+ u32 flags, struct netlink_ext_ack *extack)
{
struct fw_head *head = rtnl_dereference(tp->root);
struct fw_filter *f = *arg;
return err;
}
- err = fw_set_parms(net, tp, fnew, tb, tca, base, ovr, extack);
+ err = fw_set_parms(net, tp, fnew, tb, tca, base, flags, extack);
if (err < 0) {
tcf_exts_destroy(&fnew->exts);
kfree(fnew);
f->id = handle;
f->tp = tp;
- err = fw_set_parms(net, tp, f, tb, tca, base, ovr, extack);
+ err = fw_set_parms(net, tp, f, tb, tca, base, flags, extack);
if (err < 0)
goto errout;
static int mall_set_parms(struct net *net, struct tcf_proto *tp,
struct cls_mall_head *head,
unsigned long base, struct nlattr **tb,
- struct nlattr *est, bool ovr,
+ struct nlattr *est, u32 flags,
struct netlink_ext_ack *extack)
{
int err;
- err = tcf_exts_validate(net, tp, tb, est, &head->exts, ovr, true,
- extack);
+ err = tcf_exts_validate(net, tp, tb, est, &head->exts, flags, extack);
if (err < 0)
return err;
static int mall_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca,
- void **arg, bool ovr, bool rtnl_held,
+ void **arg, u32 flags,
struct netlink_ext_ack *extack)
{
struct cls_mall_head *head = rtnl_dereference(tp->root);
struct nlattr *tb[TCA_MATCHALL_MAX + 1];
struct cls_mall_head *new;
- u32 flags = 0;
+ u32 userflags = 0;
int err;
if (!tca[TCA_OPTIONS])
return err;
if (tb[TCA_MATCHALL_FLAGS]) {
- flags = nla_get_u32(tb[TCA_MATCHALL_FLAGS]);
- if (!tc_flags_valid(flags))
+ userflags = nla_get_u32(tb[TCA_MATCHALL_FLAGS]);
+ if (!tc_flags_valid(userflags))
return -EINVAL;
}
if (!handle)
handle = 1;
new->handle = handle;
- new->flags = flags;
+ new->flags = userflags;
new->pf = alloc_percpu(struct tc_matchall_pcnt);
if (!new->pf) {
err = -ENOMEM;
goto err_alloc_percpu;
}
- err = mall_set_parms(net, tp, new, base, tb, tca[TCA_RATE], ovr,
+ err = mall_set_parms(net, tp, new, base, tb, tca[TCA_RATE], flags,
extack);
if (err)
goto err_set_parms;
unsigned long base, struct route4_filter *f,
u32 handle, struct route4_head *head,
struct nlattr **tb, struct nlattr *est, int new,
- bool ovr, struct netlink_ext_ack *extack)
+ u32 flags, struct netlink_ext_ack *extack)
{
u32 id = 0, to = 0, nhandle = 0x8000;
struct route4_filter *fp;
struct route4_bucket *b;
int err;
- err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, true, extack);
+ err = tcf_exts_validate(net, tp, tb, est, &f->exts, flags, extack);
if (err < 0)
return err;
static int route4_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base, u32 handle,
- struct nlattr **tca, void **arg, bool ovr,
- bool rtnl_held, struct netlink_ext_ack *extack)
+ struct nlattr **tca, void **arg, u32 flags,
+ struct netlink_ext_ack *extack)
{
struct route4_head *head = rtnl_dereference(tp->root);
struct route4_filter __rcu **fp;
}
err = route4_set_parms(net, tp, base, f, handle, head, tb,
- tca[TCA_RATE], new, ovr, extack);
+ tca[TCA_RATE], new, flags, extack);
if (err < 0)
goto errout;
static int rsvp_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
- u32 handle,
- struct nlattr **tca,
- void **arg, bool ovr, bool rtnl_held,
+ u32 handle, struct nlattr **tca,
+ void **arg, u32 flags,
struct netlink_ext_ack *extack)
{
struct rsvp_head *data = rtnl_dereference(tp->root);
err = tcf_exts_init(&e, net, TCA_RSVP_ACT, TCA_RSVP_POLICE);
if (err < 0)
return err;
- err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr, true,
+ err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, flags,
extack);
if (err < 0)
goto errout2;
tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
u32 handle, struct tcindex_data *p,
struct tcindex_filter_result *r, struct nlattr **tb,
- struct nlattr *est, bool ovr, struct netlink_ext_ack *extack)
+ struct nlattr *est, u32 flags, struct netlink_ext_ack *extack)
{
struct tcindex_filter_result new_filter_result, *old_r = r;
struct tcindex_data *cp = NULL, *oldp;
err = tcf_exts_init(&e, net, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
if (err < 0)
return err;
- err = tcf_exts_validate(net, tp, tb, est, &e, ovr, true, extack);
+ err = tcf_exts_validate(net, tp, tb, est, &e, flags, extack);
if (err < 0)
goto errout;
static int
tcindex_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base, u32 handle,
- struct nlattr **tca, void **arg, bool ovr,
- bool rtnl_held, struct netlink_ext_ack *extack)
+ struct nlattr **tca, void **arg, u32 flags,
+ struct netlink_ext_ack *extack)
{
struct nlattr *opt = tca[TCA_OPTIONS];
struct nlattr *tb[TCA_TCINDEX_MAX + 1];
return err;
return tcindex_set_parms(net, tp, base, handle, p, r, tb,
- tca[TCA_RATE], ovr, extack);
+ tca[TCA_RATE], flags, extack);
}
static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker,
static int u32_set_parms(struct net *net, struct tcf_proto *tp,
unsigned long base,
struct tc_u_knode *n, struct nlattr **tb,
- struct nlattr *est, bool ovr,
+ struct nlattr *est, u32 flags,
struct netlink_ext_ack *extack)
{
int err;
- err = tcf_exts_validate(net, tp, tb, est, &n->exts, ovr, true, extack);
+ err = tcf_exts_validate(net, tp, tb, est, &n->exts, flags, extack);
if (err < 0)
return err;
static int u32_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base, u32 handle,
- struct nlattr **tca, void **arg, bool ovr, bool rtnl_held,
+ struct nlattr **tca, void **arg, u32 flags,
struct netlink_ext_ack *extack)
{
struct tc_u_common *tp_c = tp->data;
struct tc_u32_sel *s;
struct nlattr *opt = tca[TCA_OPTIONS];
struct nlattr *tb[TCA_U32_MAX + 1];
- u32 htid, flags = 0;
+ u32 htid, userflags = 0;
size_t sel_size;
int err;
return err;
if (tb[TCA_U32_FLAGS]) {
- flags = nla_get_u32(tb[TCA_U32_FLAGS]);
- if (!tc_flags_valid(flags)) {
+ userflags = nla_get_u32(tb[TCA_U32_FLAGS]);
+ if (!tc_flags_valid(userflags)) {
NL_SET_ERR_MSG_MOD(extack, "Invalid filter flags");
return -EINVAL;
}
return -EINVAL;
}
- if ((n->flags ^ flags) &
+ if ((n->flags ^ userflags) &
~(TCA_CLS_FLAGS_IN_HW | TCA_CLS_FLAGS_NOT_IN_HW)) {
NL_SET_ERR_MSG_MOD(extack, "Key node flags do not match passed flags");
return -EINVAL;
return -ENOMEM;
err = u32_set_parms(net, tp, base, new, tb,
- tca[TCA_RATE], ovr, extack);
+ tca[TCA_RATE], flags, extack);
if (err) {
u32_destroy_key(new, false);
ht->handle = handle;
ht->prio = tp->prio;
idr_init(&ht->handle_idr);
- ht->flags = flags;
+ ht->flags = userflags;
- err = u32_replace_hw_hnode(tp, ht, flags, extack);
+ err = u32_replace_hw_hnode(tp, ht, userflags, extack);
if (err) {
idr_remove(&tp_c->handle_idr, handle);
kfree(ht);
RCU_INIT_POINTER(n->ht_up, ht);
n->handle = handle;
n->fshift = s->hmask ? ffs(ntohl(s->hmask)) - 1 : 0;
- n->flags = flags;
+ n->flags = userflags;
err = tcf_exts_init(&n->exts, net, TCA_U32_ACT, TCA_U32_POLICE);
if (err < 0)
}
#endif
- err = u32_set_parms(net, tp, base, n, tb, tca[TCA_RATE], ovr,
+ err = u32_set_parms(net, tp, base, n, tb, tca[TCA_RATE], flags,
extack);
if (err == 0) {
struct tc_u_knode __rcu **ins;
/* seqlock has the same scope of busylock, for NOLOCK qdisc */
spin_lock_init(&sch->seqlock);
- lockdep_set_class(&sch->busylock,
+ lockdep_set_class(&sch->seqlock,
dev->qdisc_tx_busylock ?: &qdisc_tx_busylock);
seqcount_init(&sch->running);
if (FULL_OFFLOAD_IS_ENABLED(q->flags)) {
qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
old = dev_graft_qdisc(qdisc->dev_queue, qdisc);
- if (ntx < dev->real_num_tx_queues)
- qdisc_hash_add(qdisc, false);
} else {
old = dev_graft_qdisc(qdisc->dev_queue, sch);
qdisc_refcount_inc(sch);
memcpy(key->data, &auth_key->sca_key[0], auth_key->sca_keylength);
cur_key->key = key;
- if (replace) {
- list_del_init(&shkey->key_list);
- sctp_auth_shkey_release(shkey);
- if (asoc && asoc->active_key_id == auth_key->sca_keynumber)
- sctp_auth_asoc_init_active_key(asoc, GFP_KERNEL);
+ if (!replace) {
+ list_add(&cur_key->key_list, sh_keys);
+ return 0;
}
+
+ list_del_init(&shkey->key_list);
+ sctp_auth_shkey_release(shkey);
list_add(&cur_key->key_list, sh_keys);
+ if (asoc && asoc->active_key_id == auth_key->sca_keynumber)
+ sctp_auth_asoc_init_active_key(asoc, GFP_KERNEL);
+
return 0;
}
if (unlikely(!af))
return NULL;
- if (af->from_addr_param(&paddr, param, peer_port, 0))
+ if (!af->from_addr_param(&paddr, param, peer_port, 0))
return NULL;
return __sctp_lookup_association(net, laddr, &paddr, transportp);
list_for_each_entry_safe(addr, temp,
&net->sctp.local_addr_list, list) {
if (addr->a.sa.sa_family == AF_INET6 &&
- ipv6_addr_equal(&addr->a.v6.sin6_addr,
- &ifa->addr)) {
+ ipv6_addr_equal(&addr->a.v6.sin6_addr,
+ &ifa->addr) &&
+ addr->a.v6.sin6_scope_id == ifa->idev->dev->ifindex) {
sctp_addr_wq_mgmt(net, addr, SCTP_ADDR_DEL);
found = 1;
addr->valid = 0;
if (!sctp_transport_pl_enabled(transport))
return SCTP_DISPOSITION_CONSUME;
- sctp_transport_pl_send(transport);
-
- reply = sctp_make_heartbeat(asoc, transport, transport->pl.probe_size);
- if (!reply)
- return SCTP_DISPOSITION_NOMEM;
- sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(reply));
+ if (sctp_transport_pl_send(transport)) {
+ reply = sctp_make_heartbeat(asoc, transport, transport->pl.probe_size);
+ if (!reply)
+ return SCTP_DISPOSITION_NOMEM;
+ sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(reply));
+ }
sctp_add_cmd_sf(commands, SCTP_CMD_PROBE_TIMER_UPDATE,
SCTP_TRANSPORT(transport));
!sctp_transport_pl_enabled(link))
return SCTP_DISPOSITION_DISCARD;
- sctp_transport_pl_recv(link);
- if (link->pl.state == SCTP_PL_COMPLETE)
+ if (sctp_transport_pl_recv(link))
return SCTP_DISPOSITION_CONSUME;
return sctp_sf_send_probe(net, ep, asoc, type, link, commands);
sctp_transport_pl_update(transport);
}
-void sctp_transport_pl_send(struct sctp_transport *t)
+bool sctp_transport_pl_send(struct sctp_transport *t)
{
- pr_debug("%s: PLPMTUD: transport: %p, state: %d, pmtu: %d, size: %d, high: %d\n",
- __func__, t, t->pl.state, t->pl.pmtu, t->pl.probe_size, t->pl.probe_high);
-
- if (t->pl.probe_count < SCTP_MAX_PROBES) {
- t->pl.probe_count++;
- return;
- }
+ if (t->pl.probe_count < SCTP_MAX_PROBES)
+ goto out;
+ t->pl.last_rtx_chunks = t->asoc->rtx_data_chunks;
+ t->pl.probe_count = 0;
if (t->pl.state == SCTP_PL_BASE) {
if (t->pl.probe_size == SCTP_BASE_PLPMTU) { /* BASE_PLPMTU Confirmation Failed */
t->pl.state = SCTP_PL_ERROR; /* Base -> Error */
sctp_assoc_sync_pmtu(t->asoc);
}
}
- t->pl.probe_count = 1;
+
+out:
+ if (t->pl.state == SCTP_PL_COMPLETE && t->pl.raise_count < 30 &&
+ !t->pl.probe_count && t->pl.last_rtx_chunks == t->asoc->rtx_data_chunks) {
+ t->pl.raise_count++;
+ return false;
+ }
+
+ pr_debug("%s: PLPMTUD: transport: %p, state: %d, pmtu: %d, size: %d, high: %d\n",
+ __func__, t, t->pl.state, t->pl.pmtu, t->pl.probe_size, t->pl.probe_high);
+
+ t->pl.probe_count++;
+ return true;
}
-void sctp_transport_pl_recv(struct sctp_transport *t)
+bool sctp_transport_pl_recv(struct sctp_transport *t)
{
pr_debug("%s: PLPMTUD: transport: %p, state: %d, pmtu: %d, size: %d, high: %d\n",
__func__, t, t->pl.state, t->pl.pmtu, t->pl.probe_size, t->pl.probe_high);
+ t->pl.last_rtx_chunks = t->asoc->rtx_data_chunks;
t->pl.pmtu = t->pl.probe_size;
t->pl.probe_count = 0;
if (t->pl.state == SCTP_PL_BASE) {
if (!t->pl.probe_high) {
t->pl.probe_size = min(t->pl.probe_size + SCTP_PL_BIG_STEP,
SCTP_MAX_PLPMTU);
- return;
+ return false;
}
t->pl.probe_size += SCTP_PL_MIN_STEP;
if (t->pl.probe_size >= t->pl.probe_high) {
t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t);
sctp_assoc_sync_pmtu(t->asoc);
}
- } else if (t->pl.state == SCTP_PL_COMPLETE) {
- t->pl.raise_count++;
- if (t->pl.raise_count == 30) {
- /* Raise probe_size again after 30 * interval in Search Complete */
- t->pl.state = SCTP_PL_SEARCH; /* Search Complete -> Search */
- t->pl.probe_size += SCTP_PL_MIN_STEP;
- }
+ } else if (t->pl.state == SCTP_PL_COMPLETE && t->pl.raise_count == 30) {
+ /* Raise probe_size again after 30 * interval in Search Complete */
+ t->pl.state = SCTP_PL_SEARCH; /* Search Complete -> Search */
+ t->pl.probe_size += SCTP_PL_MIN_STEP;
}
+
+ return t->pl.state == SCTP_PL_COMPLETE;
}
static bool sctp_transport_pl_toobig(struct sctp_transport *t, u32 pmtu)
return rc;
}
-/* convert the RMB size into the compressed notation - minimum 16K.
+#define SMCD_DMBE_SIZES 6 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */
+#define SMCR_RMBE_SIZES 5 /* 0 -> 16KB, 1 -> 32KB, .. 5 -> 512KB */
+
+/* convert the RMB size into the compressed notation (minimum 16K, see
+ * SMCD/R_DMBE_SIZES.
* In contrast to plain ilog2, this rounds towards the next power of 2,
* so the socket application gets at least its desired sndbuf / rcvbuf size.
*/
-static u8 smc_compress_bufsize(int size)
+static u8 smc_compress_bufsize(int size, bool is_smcd, bool is_rmb)
{
+ const unsigned int max_scat = SG_MAX_SINGLE_ALLOC * PAGE_SIZE;
u8 compressed;
if (size <= SMC_BUF_MIN_SIZE)
return 0;
- size = (size - 1) >> 14;
- compressed = ilog2(size) + 1;
- if (compressed >= SMC_RMBE_SIZES)
- compressed = SMC_RMBE_SIZES - 1;
+ size = (size - 1) >> 14; /* convert to 16K multiple */
+ compressed = min_t(u8, ilog2(size) + 1,
+ is_smcd ? SMCD_DMBE_SIZES : SMCR_RMBE_SIZES);
+
+ if (!is_smcd && is_rmb)
+ /* RMBs are backed by & limited to max size of scatterlists */
+ compressed = min_t(u8, compressed, ilog2(max_scat >> 14));
+
return compressed;
}
return rc;
}
-#define SMCD_DMBE_SIZES 6 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */
-
static struct smc_buf_desc *smcd_new_buf_create(struct smc_link_group *lgr,
bool is_dmb, int bufsize)
{
struct smc_buf_desc *buf_desc;
int rc;
- if (smc_compress_bufsize(bufsize) > SMCD_DMBE_SIZES)
- return ERR_PTR(-EAGAIN);
-
/* try to alloc a new DMB */
buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
if (!buf_desc)
/* use socket send buffer size (w/o overhead) as start value */
sk_buf_size = smc->sk.sk_sndbuf / 2;
- for (bufsize_short = smc_compress_bufsize(sk_buf_size);
+ for (bufsize_short = smc_compress_bufsize(sk_buf_size, is_smcd, is_rmb);
bufsize_short >= 0; bufsize_short--) {
-
if (is_rmb) {
lock = &lgr->rmbs_lock;
buf_list = &lgr->rmbs[bufsize_short];
buf_list = &lgr->sndbufs[bufsize_short];
}
bufsize = smc_uncompress_bufsize(bufsize_short);
- if ((1 << get_order(bufsize)) > SG_MAX_SINGLE_ALLOC)
- continue;
/* check for reusable slot in the link group */
buf_desc = smc_buf_get_slot(bufsize_short, lock, buf_list);
if (!libdev->ops.get_netdev)
continue;
lndev = libdev->ops.get_netdev(libdev, i + 1);
- if (lndev)
- dev_put(lndev);
+ dev_put(lndev);
if (lndev != ndev)
continue;
if (event == NETDEV_REGISTER)
return 0;
out_put:
- if (ndev)
- dev_put(ndev);
+ dev_put(ndev);
return rc;
}
return err;
}
EXPORT_SYMBOL_GPL(switchdev_handle_port_attr_set);
+
+int switchdev_bridge_port_offload(struct net_device *brport_dev,
+ struct net_device *dev, const void *ctx,
+ struct notifier_block *atomic_nb,
+ struct notifier_block *blocking_nb,
+ bool tx_fwd_offload,
+ struct netlink_ext_ack *extack)
+{
+ struct switchdev_notifier_brport_info brport_info = {
+ .brport = {
+ .dev = dev,
+ .ctx = ctx,
+ .atomic_nb = atomic_nb,
+ .blocking_nb = blocking_nb,
+ .tx_fwd_offload = tx_fwd_offload,
+ },
+ };
+ int err;
+
+ ASSERT_RTNL();
+
+ err = call_switchdev_blocking_notifiers(SWITCHDEV_BRPORT_OFFLOADED,
+ brport_dev, &brport_info.info,
+ extack);
+ return notifier_to_errno(err);
+}
+EXPORT_SYMBOL_GPL(switchdev_bridge_port_offload);
+
+void switchdev_bridge_port_unoffload(struct net_device *brport_dev,
+ const void *ctx,
+ struct notifier_block *atomic_nb,
+ struct notifier_block *blocking_nb)
+{
+ struct switchdev_notifier_brport_info brport_info = {
+ .brport = {
+ .ctx = ctx,
+ .atomic_nb = atomic_nb,
+ .blocking_nb = blocking_nb,
+ },
+ };
+
+ ASSERT_RTNL();
+
+ call_switchdev_blocking_notifiers(SWITCHDEV_BRPORT_UNOFFLOADED,
+ brport_dev, &brport_info.info,
+ NULL);
+}
+EXPORT_SYMBOL_GPL(switchdev_bridge_port_unoffload);
if (unlikely(!aead))
return -ENOKEY;
- /* Cow skb data if needed */
- if (likely(!skb_cloned(skb) &&
- (!skb_is_nonlinear(skb) || !skb_has_frag_list(skb)))) {
- nsg = 1 + skb_shinfo(skb)->nr_frags;
- } else {
- nsg = skb_cow_data(skb, 0, &unused);
- if (unlikely(nsg < 0)) {
- pr_err("RX: skb_cow_data() returned %d\n", nsg);
- return nsg;
- }
+ nsg = skb_cow_data(skb, 0, &unused);
+ if (unlikely(nsg < 0)) {
+ pr_err("RX: skb_cow_data() returned %d\n", nsg);
+ return nsg;
}
/* Allocate memory for the AEAD operation */
static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dsz);
static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz);
static void tipc_sk_push_backlog(struct tipc_sock *tsk, bool nagle_ack);
+static int tipc_wait_for_connect(struct socket *sock, long *timeo_p);
static const struct proto_ops packet_ops;
static const struct proto_ops stream_ops;
rc = 0;
}
- if (unlikely(syn && !rc))
+ if (unlikely(syn && !rc)) {
tipc_set_sk_state(sk, TIPC_CONNECTING);
+ if (timeout) {
+ timeout = msecs_to_jiffies(timeout);
+ tipc_wait_for_connect(sock, &timeout);
+ }
+ }
return rc ? rc : dlen;
}
return -EMSGSIZE;
/* Handle implicit connection setup */
- if (unlikely(dest)) {
+ if (unlikely(dest && sk->sk_state == TIPC_OPEN)) {
rc = __tipc_sendmsg(sock, m, dlen);
if (dlen && dlen == rc) {
tsk->peer_caps = tipc_node_get_capabilities(net, dnode);
static int tipc_wait_for_accept(struct socket *sock, long timeo)
{
struct sock *sk = sock->sk;
- DEFINE_WAIT(wait);
+ DEFINE_WAIT_FUNC(wait, woken_wake_function);
int err;
/* True wake-one mechanism for incoming connections: only
* anymore, the common case will execute the loop only once.
*/
for (;;) {
- prepare_to_wait_exclusive(sk_sleep(sk), &wait,
- TASK_INTERRUPTIBLE);
if (timeo && skb_queue_empty(&sk->sk_receive_queue)) {
+ add_wait_queue(sk_sleep(sk), &wait);
release_sock(sk);
- timeo = schedule_timeout(timeo);
+ timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, timeo);
lock_sock(sk);
+ remove_wait_queue(sk_sleep(sk), &wait);
}
err = 0;
if (!skb_queue_empty(&sk->sk_receive_queue))
if (signal_pending(current))
break;
}
- finish_wait(sk_sleep(sk), &wait);
return err;
}
bool kern)
{
struct sock *new_sk, *sk = sock->sk;
- struct sk_buff *buf;
struct tipc_sock *new_tsock;
+ struct msghdr m = {NULL,};
struct tipc_msg *msg;
+ struct sk_buff *buf;
long timeo;
int res;
}
/*
- * Respond to 'SYN-' by discarding it & returning 'ACK'-.
- * Respond to 'SYN+' by queuing it on new socket.
+ * Respond to 'SYN-' by discarding it & returning 'ACK'.
+ * Respond to 'SYN+' by queuing it on new socket & returning 'ACK'.
*/
if (!msg_data_sz(msg)) {
- struct msghdr m = {NULL,};
-
tsk_advance_rx_queue(sk);
- __tipc_sendstream(new_sock, &m, 0);
} else {
__skb_dequeue(&sk->sk_receive_queue);
__skb_queue_head(&new_sk->sk_receive_queue, buf);
skb_set_owner_r(buf, new_sk);
}
+ __tipc_sendstream(new_sock, &m, 0);
release_sock(new_sk);
exit:
release_sock(sk);
depends on UNIX
default y
+config AF_UNIX_OOB
+ bool
+ depends on UNIX
+ default y
+
config UNIX_DIAG
tristate "UNIX: socket monitoring interface"
depends on UNIX
skb_queue_purge(&sk->sk_receive_queue);
+#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
+ if (u->oob_skb) {
+ kfree_skb(u->oob_skb);
+ u->oob_skb = NULL;
+ }
+#endif
WARN_ON(refcount_read(&sk->sk_wmem_alloc));
WARN_ON(!sk_unhashed(sk));
WARN_ON(sk->sk_socket);
return err;
}
+static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb)
+{
+ scm->fp = scm_fp_dup(UNIXCB(skb).fp);
+
+ /*
+ * Garbage collection of unix sockets starts by selecting a set of
+ * candidate sockets which have reference only from being in flight
+ * (total_refs == inflight_refs). This condition is checked once during
+ * the candidate collection phase, and candidates are marked as such, so
+ * that non-candidates can later be ignored. While inflight_refs is
+ * protected by unix_gc_lock, total_refs (file count) is not, hence this
+ * is an instantaneous decision.
+ *
+ * Once a candidate, however, the socket must not be reinstalled into a
+ * file descriptor while the garbage collection is in progress.
+ *
+ * If the above conditions are met, then the directed graph of
+ * candidates (*) does not change while unix_gc_lock is held.
+ *
+ * Any operations that changes the file count through file descriptors
+ * (dup, close, sendmsg) does not change the graph since candidates are
+ * not installed in fds.
+ *
+ * Dequeing a candidate via recvmsg would install it into an fd, but
+ * that takes unix_gc_lock to decrement the inflight count, so it's
+ * serialized with garbage collection.
+ *
+ * MSG_PEEK is special in that it does not change the inflight count,
+ * yet does install the socket into an fd. The following lock/unlock
+ * pair is to ensure serialization with garbage collection. It must be
+ * done between incrementing the file count and installing the file into
+ * an fd.
+ *
+ * If garbage collection starts after the barrier provided by the
+ * lock/unlock, then it will see the elevated refcount and not mark this
+ * as a candidate. If a garbage collection is already in progress
+ * before the file count was incremented, then the lock/unlock pair will
+ * ensure that garbage collection is finished before progressing to
+ * installing the fd.
+ *
+ * (*) A -> B where B is on the queue of A or B is on the queue of C
+ * which is on the queue of listening socket A.
+ */
+ spin_lock(&unix_gc_lock);
+ spin_unlock(&unix_gc_lock);
+}
+
static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
{
int err = 0;
*/
#define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
+#if (IS_ENABLED(CONFIG_AF_UNIX_OOB))
+static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other)
+{
+ struct unix_sock *ousk = unix_sk(other);
+ struct sk_buff *skb;
+ int err = 0;
+
+ skb = sock_alloc_send_skb(sock->sk, 1, msg->msg_flags & MSG_DONTWAIT, &err);
+
+ if (!skb)
+ return err;
+
+ skb_put(skb, 1);
+ skb->len = 1;
+ err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, 1);
+
+ if (err) {
+ kfree_skb(skb);
+ return err;
+ }
+
+ unix_state_lock(other);
+ maybe_add_creds(skb, sock, other);
+ skb_get(skb);
+
+ if (ousk->oob_skb)
+ kfree_skb(ousk->oob_skb);
+
+ ousk->oob_skb = skb;
+
+ scm_stat_add(other, skb);
+ skb_queue_tail(&other->sk_receive_queue, skb);
+ sk_send_sigurg(other);
+ unix_state_unlock(other);
+ other->sk_data_ready(other);
+
+ return err;
+}
+#endif
+
static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
size_t len)
{
return err;
err = -EOPNOTSUPP;
- if (msg->msg_flags&MSG_OOB)
- goto out_err;
+ if (msg->msg_flags & MSG_OOB) {
+#if (IS_ENABLED(CONFIG_AF_UNIX_OOB))
+ if (len)
+ len--;
+ else
+#endif
+ goto out_err;
+ }
if (msg->msg_namelen) {
err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
sent += size;
}
+#if (IS_ENABLED(CONFIG_AF_UNIX_OOB))
+ if (msg->msg_flags & MSG_OOB) {
+ err = queue_oob(sock, msg, other);
+ if (err)
+ goto out_err;
+ sent++;
+ }
+#endif
+
scm_destroy(&scm);
return sent;
sk_peek_offset_fwd(sk, size);
if (UNIXCB(skb).fp)
- scm.fp = scm_fp_dup(UNIXCB(skb).fp);
+ unix_peek_fds(&scm, skb);
}
err = (flags & MSG_TRUNC) ? skb->len - skip : size;
unsigned int splice_flags;
};
+#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
+static int unix_stream_recv_urg(struct unix_stream_read_state *state)
+{
+ struct socket *sock = state->socket;
+ struct sock *sk = sock->sk;
+ struct unix_sock *u = unix_sk(sk);
+ int chunk = 1;
+
+ if (sock_flag(sk, SOCK_URGINLINE) || !u->oob_skb)
+ return -EINVAL;
+
+ chunk = state->recv_actor(u->oob_skb, 0, chunk, state);
+ if (chunk < 0)
+ return -EFAULT;
+
+ if (!(state->flags & MSG_PEEK)) {
+ UNIXCB(u->oob_skb).consumed += 1;
+ kfree_skb(u->oob_skb);
+ u->oob_skb = NULL;
+ }
+ state->msg->msg_flags |= MSG_OOB;
+ return 1;
+}
+
+static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk,
+ int flags, int copied)
+{
+ struct unix_sock *u = unix_sk(sk);
+
+ if (!unix_skb_len(skb) && !(flags & MSG_PEEK)) {
+ skb_unlink(skb, &sk->sk_receive_queue);
+ consume_skb(skb);
+ skb = NULL;
+ } else {
+ if (skb == u->oob_skb) {
+ if (copied) {
+ skb = NULL;
+ } else if (sock_flag(sk, SOCK_URGINLINE)) {
+ if (!(flags & MSG_PEEK)) {
+ u->oob_skb = NULL;
+ consume_skb(skb);
+ }
+ } else if (!(flags & MSG_PEEK)) {
+ skb_unlink(skb, &sk->sk_receive_queue);
+ consume_skb(skb);
+ skb = skb_peek(&sk->sk_receive_queue);
+ }
+ }
+ }
+ return skb;
+}
+#endif
+
static int unix_stream_read_generic(struct unix_stream_read_state *state,
bool freezable)
{
if (unlikely(flags & MSG_OOB)) {
err = -EOPNOTSUPP;
+#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
+ mutex_lock(&u->iolock);
+ unix_state_lock(sk);
+
+ err = unix_stream_recv_urg(state);
+
+ unix_state_unlock(sk);
+ mutex_unlock(&u->iolock);
+#endif
goto out;
}
}
last = skb = skb_peek(&sk->sk_receive_queue);
last_len = last ? last->len : 0;
+
+#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
+ if (skb) {
+ skb = manage_oob(skb, sk, flags, copied);
+ if (!skb) {
+ unix_state_unlock(sk);
+ if (copied)
+ break;
+ goto redo;
+ }
+ }
+#endif
again:
if (skb == NULL) {
if (copied >= target)
/* It is questionable, see note in unix_dgram_recvmsg.
*/
if (UNIXCB(skb).fp)
- scm.fp = scm_fp_dup(UNIXCB(skb).fp);
+ unix_peek_fds(&scm, skb);
sk_peek_offset_fwd(sk, chunk);
case SIOCUNIXFILE:
err = unix_open_file(sk);
break;
+#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
+ case SIOCATMARK:
+ {
+ struct sk_buff *skb;
+ struct unix_sock *u = unix_sk(sk);
+ int answ = 0;
+
+ skb = skb_peek(&sk->sk_receive_queue);
+ if (skb && skb == u->oob_skb)
+ answ = 1;
+ err = put_user(answ, (int __user *)arg);
+ }
+ break;
+#endif
default:
err = -ENOIOCTLCMD;
break;
{
struct unix_sock *u = unix_sk(sk);
struct sk_psock *psock;
- int copied, ret;
+ int copied;
psock = sk_psock_get(sk);
if (unlikely(!psock))
mutex_lock(&u->iolock);
if (!skb_queue_empty(&sk->sk_receive_queue) &&
sk_psock_queue_empty(psock)) {
- ret = __unix_dgram_recvmsg(sk, msg, len, flags);
- goto out;
+ mutex_unlock(&u->iolock);
+ sk_psock_put(sk, psock);
+ return __unix_dgram_recvmsg(sk, msg, len, flags);
}
msg_bytes_ready:
if (data) {
if (!sk_psock_queue_empty(psock))
goto msg_bytes_ready;
- ret = __unix_dgram_recvmsg(sk, msg, len, flags);
- goto out;
+ mutex_unlock(&u->iolock);
+ sk_psock_put(sk, psock);
+ return __unix_dgram_recvmsg(sk, msg, len, flags);
}
copied = -EAGAIN;
}
- ret = copied;
-out:
mutex_unlock(&u->iolock);
sk_psock_put(sk, psock);
- return ret;
+ return copied;
}
static struct proto *unix_prot_saved __read_mostly;
int unix_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore)
{
+ if (sk->sk_type != SOCK_DGRAM)
+ return -EOPNOTSUPP;
+
if (restore) {
sk->sk_write_space = psock->saved_write_space;
WRITE_ONCE(sk->sk_prot, psock->sk_proto);
virtio_transport_recv_enqueue(vsk, pkt);
sk->sk_data_ready(sk);
return err;
+ case VIRTIO_VSOCK_OP_CREDIT_REQUEST:
+ virtio_transport_send_credit_update(vsk);
+ break;
case VIRTIO_VSOCK_OP_CREDIT_UPDATE:
sk->sk_write_space(sk);
break;
goto nla_put_failure;
for (band = state->band_start;
- band < NUM_NL80211_BANDS; band++) {
+ band < (state->split ?
+ NUM_NL80211_BANDS :
+ NL80211_BAND_60GHZ + 1);
+ band++) {
struct ieee80211_supported_band *sband;
/* omit higher bands for ancient software */
err = rdev_change_station(rdev, dev, mac_addr, ¶ms);
out_put_vlan:
- if (params.vlan)
- dev_put(params.vlan);
+ dev_put(params.vlan);
return err;
}
err = rdev_add_station(rdev, dev, mac_addr, ¶ms);
- if (params.vlan)
- dev_put(params.vlan);
+ dev_put(params.vlan);
return err;
}
goto out_free;
nl80211_send_scan_start(rdev, wdev);
- if (wdev->netdev)
- dev_hold(wdev->netdev);
+ dev_hold(wdev->netdev);
return 0;
return -ENETDOWN;
}
- if (dev)
- dev_hold(dev);
-
+ dev_hold(dev);
info->user_ptr[0] = rdev;
}
if (ops->internal_flags & NL80211_FLAG_NEED_WDEV) {
struct wireless_dev *wdev = info->user_ptr[1];
- if (wdev->netdev)
- dev_put(wdev->netdev);
+ dev_put(wdev->netdev);
} else {
dev_put(info->user_ptr[1]);
}
}
#endif
- if (wdev->netdev)
- dev_put(wdev->netdev);
+ dev_put(wdev->netdev);
kfree(rdev->int_scan_req);
rdev->int_scan_req = NULL;
* be grouped with this beacon for updates ...
*/
if (!cfg80211_combine_bsses(rdev, new)) {
- kfree(new);
+ bss_ref_put(rdev, new);
goto drop;
}
}
if (rdev->bss_entries >= bss_entries_limit &&
!cfg80211_bss_expire_oldest(rdev)) {
- if (!list_empty(&new->hidden_list))
- list_del(&new->hidden_list);
- kfree(new);
+ bss_ref_put(rdev, new);
goto drop;
}
len = nlmsg_attrlen(nlh_src, xfrm_msg_min[type]);
nla_for_each_attr(nla, attrs, len, remaining) {
- int err = xfrm_xlate64_attr(dst, nla);
+ int err;
+ switch (type) {
+ case XFRM_MSG_NEWSPDINFO:
+ err = xfrm_nla_cpy(dst, nla, nla_len(nla));
+ break;
+ default:
+ err = xfrm_xlate64_attr(dst, nla);
+ break;
+ }
if (err)
return err;
}
/* Calculates len of translated 64-bit message. */
static size_t xfrm_user_rcv_calculate_len64(const struct nlmsghdr *src,
- struct nlattr *attrs[XFRMA_MAX+1])
+ struct nlattr *attrs[XFRMA_MAX + 1],
+ int maxtype)
{
size_t len = nlmsg_len(src);
case XFRM_MSG_POLEXPIRE:
len += 8;
break;
+ case XFRM_MSG_NEWSPDINFO:
+ /* attirbutes are xfrm_spdattr_type_t, not xfrm_attr_type_t */
+ return len;
default:
break;
}
+ /* Unexpected for anything, but XFRM_MSG_NEWSPDINFO, please
+ * correct both 64=>32-bit and 32=>64-bit translators to copy
+ * new attributes.
+ */
+ if (WARN_ON_ONCE(maxtype))
+ return len;
+
if (attrs[XFRMA_SA])
len += 4;
if (attrs[XFRMA_POLICY])
static int xfrm_xlate32(struct nlmsghdr *dst, const struct nlmsghdr *src,
struct nlattr *attrs[XFRMA_MAX+1],
- size_t size, u8 type, struct netlink_ext_ack *extack)
+ size_t size, u8 type, int maxtype,
+ struct netlink_ext_ack *extack)
{
size_t pos;
int i;
}
pos = dst->nlmsg_len;
+ if (maxtype) {
+ /* attirbutes are xfrm_spdattr_type_t, not xfrm_attr_type_t */
+ WARN_ON_ONCE(src->nlmsg_type != XFRM_MSG_NEWSPDINFO);
+
+ for (i = 1; i <= maxtype; i++) {
+ int err;
+
+ if (!attrs[i])
+ continue;
+
+ /* just copy - no need for translation */
+ err = xfrm_attr_cpy32(dst, &pos, attrs[i], size,
+ nla_len(attrs[i]), nla_len(attrs[i]));
+ if (err)
+ return err;
+ }
+ return 0;
+ }
+
for (i = 1; i < XFRMA_MAX + 1; i++) {
int err;
if (err < 0)
return ERR_PTR(err);
- len = xfrm_user_rcv_calculate_len64(h32, attrs);
+ len = xfrm_user_rcv_calculate_len64(h32, attrs, maxtype);
/* The message doesn't need translation */
if (len == nlmsg_len(h32))
return NULL;
if (!h64)
return ERR_PTR(-ENOMEM);
- err = xfrm_xlate32(h64, h32, attrs, len, type, extack);
+ err = xfrm_xlate32(h64, h32, attrs, len, type, maxtype, extack);
if (err < 0) {
kvfree(h64);
return ERR_PTR(err);
break;
}
- WARN_ON(!pos);
+ WARN_ON(list_entry_is_head(pos, &ipcomp_tfms_list, list));
if (--pos->users)
return;
__read_mostly;
static struct kmem_cache *xfrm_dst_cache __ro_after_init;
-static __read_mostly seqcount_mutex_t xfrm_policy_hash_generation;
static struct rhashtable xfrm_policy_inexact_table;
static const struct rhashtable_params xfrm_pol_inexact_params;
return;
spin_lock_bh(&net->xfrm.xfrm_policy_lock);
- write_seqcount_begin(&xfrm_policy_hash_generation);
+ write_seqcount_begin(&net->xfrm.xfrm_policy_hash_generation);
odst = rcu_dereference_protected(net->xfrm.policy_bydst[dir].table,
lockdep_is_held(&net->xfrm.xfrm_policy_lock));
rcu_assign_pointer(net->xfrm.policy_bydst[dir].table, ndst);
net->xfrm.policy_bydst[dir].hmask = nhashmask;
- write_seqcount_end(&xfrm_policy_hash_generation);
+ write_seqcount_end(&net->xfrm.xfrm_policy_hash_generation);
spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
synchronize_rcu();
} while (read_seqretry(&net->xfrm.policy_hthresh.lock, seq));
spin_lock_bh(&net->xfrm.xfrm_policy_lock);
- write_seqcount_begin(&xfrm_policy_hash_generation);
+ write_seqcount_begin(&net->xfrm.xfrm_policy_hash_generation);
/* make sure that we can insert the indirect policies again before
* we start with destructive action.
out_unlock:
__xfrm_policy_inexact_flush(net);
- write_seqcount_end(&xfrm_policy_hash_generation);
+ write_seqcount_end(&net->xfrm.xfrm_policy_hash_generation);
spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
mutex_unlock(&hash_resize_mutex);
if (unlikely(!daddr || !saddr))
return NULL;
- retry:
- sequence = read_seqcount_begin(&xfrm_policy_hash_generation);
rcu_read_lock();
-
- chain = policy_hash_direct(net, daddr, saddr, family, dir);
- if (read_seqcount_retry(&xfrm_policy_hash_generation, sequence)) {
- rcu_read_unlock();
- goto retry;
- }
+ retry:
+ do {
+ sequence = read_seqcount_begin(&net->xfrm.xfrm_policy_hash_generation);
+ chain = policy_hash_direct(net, daddr, saddr, family, dir);
+ } while (read_seqcount_retry(&net->xfrm.xfrm_policy_hash_generation, sequence));
ret = NULL;
hlist_for_each_entry_rcu(pol, chain, bydst) {
}
skip_inexact:
- if (read_seqcount_retry(&xfrm_policy_hash_generation, sequence)) {
- rcu_read_unlock();
+ if (read_seqcount_retry(&net->xfrm.xfrm_policy_hash_generation, sequence))
goto retry;
- }
- if (ret && !xfrm_pol_hold_rcu(ret)) {
- rcu_read_unlock();
+ if (ret && !xfrm_pol_hold_rcu(ret))
goto retry;
- }
fail:
rcu_read_unlock();
/* Initialize the per-net locks here */
spin_lock_init(&net->xfrm.xfrm_state_lock);
spin_lock_init(&net->xfrm.xfrm_policy_lock);
+ seqcount_spinlock_init(&net->xfrm.xfrm_policy_hash_generation, &net->xfrm.xfrm_policy_lock);
mutex_init(&net->xfrm.xfrm_cfg_mutex);
rv = xfrm_statistics_init(net);
{
register_pernet_subsys(&xfrm_net_ops);
xfrm_dev_init();
- seqcount_mutex_init(&xfrm_policy_hash_generation, &hash_resize_mutex);
xfrm_input_init();
#ifdef CONFIG_XFRM_ESPINTCP
err = link->doit(skb, nlh, attrs);
+ /* We need to free skb allocated in xfrm_alloc_compat() before
+ * returning from this function, because consume_skb() won't take
+ * care of frag_list since netlink destructor sets
+ * sbk->head to NULL. (see netlink_skb_destructor())
+ */
+ if (skb_has_frag_list(skb)) {
+ kfree_skb(skb_shinfo(skb)->frag_list);
+ skb_shinfo(skb)->frag_list = NULL;
+ }
+
err:
kvfree(nlh64);
return err;
xdp_redirect
xdp_redirect_cpu
xdp_redirect_map
+xdp_redirect_map_multi
xdp_router_ipv4
xdp_rxq_info
xdp_sample_pkts
xdp_tx_iptunnel
xdpsock
+xdpsock_ctrl_proc
xsk_fwd
testfile.img
hbm_out.log
#!/bin/bash
+rm -r tmpmnt
rm -f testfile.img
dd if=/dev/zero of=testfile.img bs=1M seek=1000 count=1
DEVICE=$(losetup --show -f testfile.img)
int ret = 0;
FILE *f;
+ if (!argv[1]) {
+ fprintf(stderr, "ERROR: Run with the btrfs device argument!\n");
+ return 0;
+ }
+
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
obj = bpf_object__open_file(filename, NULL);
if (libbpf_get_error(obj)) {
h_proto = eth->h_proto;
+ /* Handle VLAN tagged packet */
if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) {
struct vlan_hdr *vhdr;
return rc;
h_proto = vhdr->h_vlan_encapsulated_proto;
}
+ /* Handle double VLAN tagged packet */
if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) {
struct vlan_hdr *vhdr;
h_proto = eth->h_proto;
+ /* Handle VLAN tagged packet */
if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) {
struct vlan_hdr *vhdr;
return rc;
h_proto = vhdr->h_vlan_encapsulated_proto;
}
+ /* Handle double VLAN tagged packet */
if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) {
struct vlan_hdr *vhdr;
memset(cpu, 0, n_cpus * sizeof(int));
/* Parse commands line args */
- while ((opt = getopt_long(argc, argv, "hSd:s:p:q:c:xzFf:e:r:m:",
+ while ((opt = getopt_long(argc, argv, "hSd:s:p:q:c:xzFf:e:r:m:n",
long_options, &longindex)) != -1) {
switch (opt) {
case 'd':
// SPDX-License-Identifier: GPL-2.0
/* Copyright(c) 2017 - 2018 Intel Corporation. */
-#include <asm/barrier.h>
#include <errno.h>
#include <getopt.h>
#include <libgen.h>
#include <linux/bpf.h>
-#include <linux/compiler.h>
#include <linux/if_link.h>
#include <linux/if_xdp.h>
#include <linux/if_ether.h>
return result;
}
-__sum16 ip_fast_csum(const void *iph, unsigned int ihl);
-
/*
* This is a version of ip_compute_csum() optimized for IP headers,
* which always checksum on 4 octet boundaries.
* This function code has been taken from
* Linux kernel lib/checksum.c
*/
-__sum16 ip_fast_csum(const void *iph, unsigned int ihl)
+static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
{
- return (__force __sum16)~do_csum(iph, ihl * 4);
+ return (__sum16)~do_csum(iph, ihl * 4);
}
/*
*/
static inline __sum16 csum_fold(__wsum csum)
{
- u32 sum = (__force u32)csum;
+ u32 sum = (u32)csum;
sum = (sum & 0xffff) + (sum >> 16);
sum = (sum & 0xffff) + (sum >> 16);
- return (__force __sum16)~sum;
+ return (__sum16)~sum;
}
/*
__wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
__u32 len, __u8 proto, __wsum sum)
{
- unsigned long long s = (__force u32)sum;
+ unsigned long long s = (u32)sum;
- s += (__force u32)saddr;
- s += (__force u32)daddr;
+ s += (u32)saddr;
+ s += (u32)daddr;
#ifdef __BIG_ENDIAN__
s += proto + len;
#else
s += (proto + len) << 8;
#endif
- return (__force __wsum)from64to32(s);
+ return (__wsum)from64to32(s);
}
/*
my $mcount_adjust; # Address adjustment to mcount offset
my $alignment; # The .align value to use for $mcount_section
my $section_type; # Section header plus possible alignment command
-my $can_use_local = 0; # If we can use local function references
-
-# Shut up recordmcount if user has older objcopy
-my $quiet_recordmcount = ".tmp_quiet_recordmcount";
-my $print_warning = 1;
-$print_warning = 0 if ( -f $quiet_recordmcount);
-
-##
-# check_objcopy - whether objcopy supports --globalize-symbols
-#
-# --globalize-symbols came out in 2.17, we must test the version
-# of objcopy, and if it is less than 2.17, then we can not
-# record local functions.
-sub check_objcopy
-{
- open (IN, "$objcopy --version |") or die "error running $objcopy";
- while (<IN>) {
- if (/objcopy.*\s(\d+)\.(\d+)/) {
- $can_use_local = 1 if ($1 > 2 || ($1 == 2 && $2 >= 17));
- last;
- }
- }
- close (IN);
-
- if (!$can_use_local && $print_warning) {
- print STDERR "WARNING: could not find objcopy version or version " .
- "is less than 2.17.\n" .
- "\tLocal function references are disabled.\n";
- open (QUIET, ">$quiet_recordmcount");
- printf QUIET "Disables the warning from recordmcount.pl\n";
- close QUIET;
- }
-}
if ($arch =~ /(x86(_64)?)|(i386)/) {
if ($bits == 64) {
my $mcount_s = $dirname . "/.tmp_mc_" . $prefix . ".s";
my $mcount_o = $dirname . "/.tmp_mc_" . $prefix . ".o";
-check_objcopy();
-
#
# Step 1: find all the local (static functions) and weak symbols.
# 't' is local, 'w/W' is weak
# is this function static? If so, note this fact.
if (defined $locals{$ref_func}) {
-
- # only use locals if objcopy supports globalize-symbols
- if (!$can_use_local) {
- return;
- }
$convert{$ref_func} = 1;
}
$ cat /sys/kernel/debug/tracing/trace_pipe > ~/raw_trace_func
Wait some times but not too much, the script is a bit slow.
Break the pipe (Ctrl + Z)
- $ scripts/draw_functrace.py < raw_trace_func > draw_functrace
+ $ scripts/tracing/draw_functrace.py < ~/raw_trace_func > draw_functrace
Then you have your drawn trace in draw_functrace
"""
line = line.strip()
if line.startswith("#"):
raise CommentLineException
- m = re.match("[^]]+?\\] +([0-9.]+): (\\w+) <-(\\w+)", line)
+ m = re.match("[^]]+?\\] +([a-z.]+) +([0-9.]+): (\\w+) <-(\\w+)", line)
if m is None:
raise BrokenLineException
- return (m.group(1), m.group(2), m.group(3))
+ return (m.group(2), m.group(3), m.group(4))
def main():
rc = sidtab_init(s);
if (rc) {
pr_err("SELinux: out of memory on SID table init\n");
- goto out;
+ return rc;
}
head = p->ocontexts[OCON_ISID];
if (sid == SECSID_NULL) {
pr_err("SELinux: SID 0 was assigned a context.\n");
sidtab_destroy(s);
- goto out;
+ return -EINVAL;
}
/* Ignore initial SIDs unused by this kernel. */
pr_err("SELinux: unable to load initial SID %s.\n",
name);
sidtab_destroy(s);
- goto out;
+ return rc;
}
}
- rc = 0;
-out:
- return rc;
+ return 0;
}
int policydb_class_isvalid(struct policydb *p, unsigned int class)
if (!(substream->runtime->hw.info & SNDRV_PCM_INFO_MMAP))
return false;
- if (substream->ops->mmap ||
- (substream->dma_buffer.dev.type != SNDRV_DMA_TYPE_DEV &&
- substream->dma_buffer.dev.type != SNDRV_DMA_TYPE_DEV_UC))
+ if (substream->ops->mmap)
return true;
- return dma_can_mmap(substream->dma_buffer.dev.dev);
+ switch (substream->dma_buffer.dev.type) {
+ case SNDRV_DMA_TYPE_UNKNOWN:
+ return false;
+ case SNDRV_DMA_TYPE_CONTINUOUS:
+ case SNDRV_DMA_TYPE_VMALLOC:
+ return true;
+ default:
+ return dma_can_mmap(substream->dma_buffer.dev.dev);
+ }
}
static int constrain_mask_params(struct snd_pcm_substream *substream,
boundary = 0x7fffffff;
snd_pcm_stream_lock_irq(substream);
/* FIXME: we should consider the boundary for the sync from app */
- if (!(sflags & SNDRV_PCM_SYNC_PTR_APPL))
- control->appl_ptr = scontrol.appl_ptr;
- else
+ if (!(sflags & SNDRV_PCM_SYNC_PTR_APPL)) {
+ err = pcm_lib_apply_appl_ptr(substream,
+ scontrol.appl_ptr);
+ if (err < 0) {
+ snd_pcm_stream_unlock_irq(substream);
+ return err;
+ }
+ } else
scontrol.appl_ptr = control->appl_ptr % boundary;
if (!(sflags & SNDRV_PCM_SYNC_PTR_AVAIL_MIN))
control->avail_min = scontrol.avail_min;
return VM_FAULT_SIGBUS;
if (substream->ops->page)
page = substream->ops->page(substream, offset);
+ else if (!snd_pcm_get_dma_buf(substream))
+ page = virt_to_page(runtime->dma_area + offset);
else
page = snd_sgbuf_get_page(snd_pcm_get_dma_buf(substream), offset);
if (!page)
.flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC,
.device = 0x4b55,
},
+ {
+ .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC,
+ .device = 0x4b58,
+ },
#endif
/* Alder Lake */
mixR = snd_sbmixer_read(p->chip, SB_DSP4_PCM_DEV + 1);
snd_sbmixer_write(p->chip, SB_DSP4_PCM_DEV, mixL & 0x7);
snd_sbmixer_write(p->chip, SB_DSP4_PCM_DEV + 1, mixR & 0x7);
+ spin_unlock_irqrestore(&p->chip->mixer_lock, flags);
spin_lock(&p->chip->reg_lock);
set_mode_register(p->chip, 0xc0); /* c0 = STOP */
spin_unlock(&p->chip->reg_lock);
/* restore PCM volume */
+ spin_lock_irqsave(&p->chip->mixer_lock, flags);
snd_sbmixer_write(p->chip, SB_DSP4_PCM_DEV, mixL);
snd_sbmixer_write(p->chip, SB_DSP4_PCM_DEV + 1, mixR);
spin_unlock_irqrestore(&p->chip->mixer_lock, flags);
mixR = snd_sbmixer_read(p->chip, SB_DSP4_PCM_DEV + 1);
snd_sbmixer_write(p->chip, SB_DSP4_PCM_DEV, mixL & 0x7);
snd_sbmixer_write(p->chip, SB_DSP4_PCM_DEV + 1, mixR & 0x7);
+ spin_unlock_irqrestore(&p->chip->mixer_lock, flags);
spin_lock(&p->chip->reg_lock);
if (p->running & SNDRV_SB_CSP_ST_QSOUND) {
spin_unlock(&p->chip->reg_lock);
/* restore PCM volume */
+ spin_lock_irqsave(&p->chip->mixer_lock, flags);
snd_sbmixer_write(p->chip, SB_DSP4_PCM_DEV, mixL);
snd_sbmixer_write(p->chip, SB_DSP4_PCM_DEV + 1, mixR);
spin_unlock_irqrestore(&p->chip->mixer_lock, flags);
static const struct snd_pci_quirk force_connect_list[] = {
SND_PCI_QUIRK(0x103c, 0x870f, "HP", 1),
SND_PCI_QUIRK(0x103c, 0x871a, "HP", 1),
+ SND_PCI_QUIRK(0x1462, 0xec94, "MS-7C94", 1),
+ SND_PCI_QUIRK(0x8086, 0x2081, "Intel NUC 10", 1),
{}
};
SND_PCI_QUIRK(0x17aa, 0x3151, "ThinkCentre Station", ALC283_FIXUP_HEADSET_MIC),
SND_PCI_QUIRK(0x17aa, 0x3176, "ThinkCentre Station", ALC283_FIXUP_HEADSET_MIC),
SND_PCI_QUIRK(0x17aa, 0x3178, "ThinkCentre Station", ALC283_FIXUP_HEADSET_MIC),
+ SND_PCI_QUIRK(0x17aa, 0x31af, "ThinkCentre Station", ALC623_FIXUP_LENOVO_THINKSTATION_P340),
SND_PCI_QUIRK(0x17aa, 0x3818, "Lenovo C940", ALC298_FIXUP_LENOVO_SPK_VOLUME),
SND_PCI_QUIRK(0x17aa, 0x3827, "Ideapad S740", ALC285_FIXUP_IDEAPAD_S740_COEF),
SND_PCI_QUIRK(0x17aa, 0x3843, "Yoga 9i", ALC287_FIXUP_IDEAPAD_BASS_SPK_AMP),
| SND_SOC_DAIFMT_CBM_CFM,
.init = cz_rt5682_init,
.dpcm_playback = 1,
+ .stop_dma_first = 1,
.ops = &cz_rt5682_play_ops,
SND_SOC_DAILINK_REG(designware1, rt5682, platform),
},
.dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF
| SND_SOC_DAIFMT_CBM_CFM,
.dpcm_capture = 1,
+ .stop_dma_first = 1,
.ops = &cz_rt5682_cap_ops,
SND_SOC_DAILINK_REG(designware2, rt5682, platform),
},
.dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF
| SND_SOC_DAIFMT_CBM_CFM,
.dpcm_playback = 1,
+ .stop_dma_first = 1,
.ops = &cz_rt5682_max_play_ops,
SND_SOC_DAILINK_REG(designware3, mx, platform),
},
.dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF
| SND_SOC_DAIFMT_CBM_CFM,
.dpcm_capture = 1,
+ .stop_dma_first = 1,
.ops = &cz_rt5682_dmic0_cap_ops,
SND_SOC_DAILINK_REG(designware3, adau, platform),
},
.dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF
| SND_SOC_DAIFMT_CBM_CFM,
.dpcm_capture = 1,
+ .stop_dma_first = 1,
.ops = &cz_rt5682_dmic1_cap_ops,
SND_SOC_DAILINK_REG(designware2, adau, platform),
},
high-efficiency mono Class-D audio power amplifiers.
config SND_SOC_SSM2518
- tristate
+ tristate "Analog Devices SSM2518 Class-D Amplifier"
depends on I2C
config SND_SOC_SSM2602
Qualcomm SoCs like SDM845.
config SND_SOC_WCD938X
+ depends on SND_SOC_WCD938X_SDW
tristate
config SND_SOC_WCD938X_SDW
which consists of a Digital Signal Processor (DSP), several Digital
Audio Interfaces (DAIs), analog outputs, and a block of 14 GPIOs.
-config SND_SOC_ZX_AUD96P22
- tristate "ZTE ZX AUD96P22 CODEC"
- depends on I2C
- select REGMAP_I2C
-
# Amp
config SND_SOC_LM4857
tristate
.reg_defaults = rt5631_reg,
.num_reg_defaults = ARRAY_SIZE(rt5631_reg),
.cache_type = REGCACHE_RBTREE,
+ .use_single_read = true,
+ .use_single_write = true,
};
static int rt5631_i2c_probe(struct i2c_client *i2c,
rt5682_enable_push_button_irq(component, false);
snd_soc_component_update_bits(component, RT5682_CBJ_CTRL_1,
RT5682_TRIG_JD_MASK, RT5682_TRIG_JD_LOW);
- if (!snd_soc_dapm_get_pin_status(dapm, "MICBIAS"))
+ if (!snd_soc_dapm_get_pin_status(dapm, "MICBIAS") &&
+ !snd_soc_dapm_get_pin_status(dapm, "PLL1") &&
+ !snd_soc_dapm_get_pin_status(dapm, "PLL2B"))
snd_soc_component_update_bits(component,
RT5682_PWR_ANLG_1, RT5682_PWR_MB, 0);
- if (!snd_soc_dapm_get_pin_status(dapm, "Vref2"))
+ if (!snd_soc_dapm_get_pin_status(dapm, "Vref2") &&
+ !snd_soc_dapm_get_pin_status(dapm, "PLL1") &&
+ !snd_soc_dapm_get_pin_status(dapm, "PLL2B"))
snd_soc_component_update_bits(component,
RT5682_PWR_ANLG_1, RT5682_PWR_VREF2, 0);
snd_soc_component_update_bits(component, RT5682_PWR_ANLG_3,
ret);
return ret;
}
+ regcache_cache_only(aic31xx->regmap, true);
+
aic31xx->dev = &i2c->dev;
aic31xx->irq = i2c->irq;
#define AIC31XX_WORD_LEN_24BITS 0x02
#define AIC31XX_WORD_LEN_32BITS 0x03
#define AIC31XX_IFACE1_MASTER_MASK GENMASK(3, 2)
-#define AIC31XX_BCLK_MASTER BIT(2)
-#define AIC31XX_WCLK_MASTER BIT(3)
+#define AIC31XX_BCLK_MASTER BIT(3)
+#define AIC31XX_WCLK_MASTER BIT(2)
/* AIC31XX_DATA_OFFSET */
#define AIC31XX_DATA_OFFSET_MASK GENMASK(7, 0)
static DECLARE_TLV_DB_SCALE(tlv_driver_gain, -600, 100, 0);
/* -12dB min, 0.5dB steps */
static DECLARE_TLV_DB_SCALE(tlv_adc_vol, -1200, 50, 0);
-
-static DECLARE_TLV_DB_LINEAR(tlv_spk_vol, TLV_DB_GAIN_MUTE, 0);
+/* -6dB min, 1dB steps */
+static DECLARE_TLV_DB_SCALE(tlv_tas_driver_gain, -5850, 50, 0);
static DECLARE_TLV_DB_SCALE(tlv_amp_vol, 0, 600, 1);
static const char * const lo_cm_text[] = {
};
static const struct snd_kcontrol_new aic32x4_tas2505_snd_controls[] = {
- SOC_DOUBLE_R_S_TLV("PCM Playback Volume", AIC32X4_LDACVOL,
- AIC32X4_LDACVOL, 0, -0x7f, 0x30, 7, 0, tlv_pcm),
+ SOC_SINGLE_S8_TLV("PCM Playback Volume",
+ AIC32X4_LDACVOL, -0x7f, 0x30, tlv_pcm),
SOC_ENUM("DAC Playback PowerTune Switch", l_ptm_enum),
- SOC_DOUBLE_R_S_TLV("HP Driver Playback Volume", AIC32X4_HPLGAIN,
- AIC32X4_HPLGAIN, 0, -0x6, 0x1d, 5, 0,
- tlv_driver_gain),
- SOC_DOUBLE_R("HP DAC Playback Switch", AIC32X4_HPLGAIN,
- AIC32X4_HPLGAIN, 6, 0x01, 1),
- SOC_SINGLE("Auto-mute Switch", AIC32X4_DACMUTE, 4, 7, 0),
+ SOC_SINGLE_TLV("HP Driver Gain Volume",
+ AIC32X4_HPLGAIN, 0, 0x74, 1, tlv_tas_driver_gain),
+ SOC_SINGLE("HP DAC Playback Switch", AIC32X4_HPLGAIN, 6, 1, 1),
- SOC_SINGLE_RANGE_TLV("Speaker Driver Playback Volume", TAS2505_SPKVOL1,
- 0, 0, 117, 1, tlv_spk_vol),
- SOC_SINGLE_TLV("Speaker Amplifier Playback Volume", TAS2505_SPKVOL2,
- 4, 5, 0, tlv_amp_vol),
+ SOC_SINGLE_TLV("Speaker Driver Playback Volume",
+ TAS2505_SPKVOL1, 0, 0x74, 1, tlv_tas_driver_gain),
+ SOC_SINGLE_TLV("Speaker Amplifier Playback Volume",
+ TAS2505_SPKVOL2, 4, 5, 0, tlv_amp_vol),
+
+ SOC_SINGLE("Auto-mute Switch", AIC32X4_DACMUTE, 4, 7, 0),
};
static const struct snd_kcontrol_new hp_output_mixer_controls[] = {
(WCD938X_DIGITAL_INTR_LEVEL_0 + i), 0);
}
- ret = wcd938x_irq_init(wcd938x, component->dev);
- if (ret) {
- dev_err(component->dev, "%s: IRQ init failed: %d\n",
- __func__, ret);
- return ret;
- }
-
wcd938x->hphr_pdm_wd_int = regmap_irq_get_virq(wcd938x->irq_chip,
WCD938X_IRQ_HPHR_PDM_WD_INT);
wcd938x->hphl_pdm_wd_int = regmap_irq_get_virq(wcd938x->irq_chip,
}
wcd938x->sdw_priv[AIF1_PB] = dev_get_drvdata(wcd938x->rxdev);
wcd938x->sdw_priv[AIF1_PB]->wcd938x = wcd938x;
- wcd938x->sdw_priv[AIF1_PB]->slave_irq = wcd938x->virq;
wcd938x->txdev = wcd938x_sdw_device_get(wcd938x->txnode);
if (!wcd938x->txdev) {
}
wcd938x->sdw_priv[AIF1_CAP] = dev_get_drvdata(wcd938x->txdev);
wcd938x->sdw_priv[AIF1_CAP]->wcd938x = wcd938x;
- wcd938x->sdw_priv[AIF1_CAP]->slave_irq = wcd938x->virq;
wcd938x->tx_sdw_dev = dev_to_sdw_dev(wcd938x->txdev);
if (!wcd938x->tx_sdw_dev) {
dev_err(dev, "could not get txslave with matching of dev\n");
return PTR_ERR(wcd938x->regmap);
}
+ ret = wcd938x_irq_init(wcd938x, dev);
+ if (ret) {
+ dev_err(dev, "%s: IRQ init failed: %d\n", __func__, ret);
+ return ret;
+ }
+
+ wcd938x->sdw_priv[AIF1_PB]->slave_irq = wcd938x->virq;
+ wcd938x->sdw_priv[AIF1_CAP]->slave_irq = wcd938x->virq;
+
ret = wcd938x_set_micbias_data(wcd938x);
if (ret < 0) {
dev_err(dev, "%s: bad micbias pdata\n", __func__);
/*
* HALO_CCM_CORE_CONTROL
*/
+#define HALO_CORE_RESET 0x00000200
#define HALO_CORE_EN 0x00000001
/*
mutex_lock(&ctl->dsp->pwr_lock);
- ret = wm_coeff_read_ctrl_raw(ctl, ctl->cache, size);
+ ret = wm_coeff_read_ctrl(ctl, ctl->cache, size);
if (!ret && copy_to_user(bytes, ctl->cache, size))
ret = -EFAULT;
{
return regmap_update_bits(dsp->regmap,
dsp->base + HALO_CCM_CORE_CONTROL,
- HALO_CORE_EN, HALO_CORE_EN);
+ HALO_CORE_RESET | HALO_CORE_EN,
+ HALO_CORE_RESET | HALO_CORE_EN);
}
static void wm_halo_stop_core(struct wm_adsp *dsp)
return ret;
}
-static int max98373_sdw_trigger(struct snd_pcm_substream *substream, int cmd)
+static int mx8373_enable_spk_pin(struct snd_pcm_substream *substream, bool enable)
{
+ struct snd_soc_pcm_runtime *rtd = asoc_substream_to_rtd(substream);
+ struct snd_soc_dai *codec_dai;
+ struct snd_soc_dai *cpu_dai;
int ret;
+ int j;
- switch (cmd) {
- case SNDRV_PCM_TRIGGER_START:
- case SNDRV_PCM_TRIGGER_RESUME:
- case SNDRV_PCM_TRIGGER_PAUSE_RELEASE:
- /* enable max98373 first */
- ret = max_98373_trigger(substream, cmd);
- if (ret < 0)
- break;
-
- ret = sdw_trigger(substream, cmd);
- break;
- case SNDRV_PCM_TRIGGER_STOP:
- case SNDRV_PCM_TRIGGER_SUSPEND:
- case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
- ret = sdw_trigger(substream, cmd);
- if (ret < 0)
- break;
-
- ret = max_98373_trigger(substream, cmd);
- break;
- default:
- ret = -EINVAL;
- break;
+ /* set spk pin by playback only */
+ if (substream->stream == SNDRV_PCM_STREAM_CAPTURE)
+ return 0;
+
+ cpu_dai = asoc_rtd_to_cpu(rtd, 0);
+ for_each_rtd_codec_dais(rtd, j, codec_dai) {
+ struct snd_soc_dapm_context *dapm =
+ snd_soc_component_get_dapm(cpu_dai->component);
+ char pin_name[16];
+
+ snprintf(pin_name, ARRAY_SIZE(pin_name), "%s Spk",
+ codec_dai->component->name_prefix);
+
+ if (enable)
+ ret = snd_soc_dapm_enable_pin(dapm, pin_name);
+ else
+ ret = snd_soc_dapm_disable_pin(dapm, pin_name);
+
+ if (!ret)
+ snd_soc_dapm_sync(dapm);
}
- return ret;
+ return 0;
+}
+
+static int mx8373_sdw_prepare(struct snd_pcm_substream *substream)
+{
+ int ret = 0;
+
+ /* according to soc_pcm_prepare dai link prepare is called first */
+ ret = sdw_prepare(substream);
+ if (ret < 0)
+ return ret;
+
+ return mx8373_enable_spk_pin(substream, true);
+}
+
+static int mx8373_sdw_hw_free(struct snd_pcm_substream *substream)
+{
+ int ret = 0;
+
+ /* according to soc_pcm_hw_free dai link free is called first */
+ ret = sdw_hw_free(substream);
+ if (ret < 0)
+ return ret;
+
+ return mx8373_enable_spk_pin(substream, false);
}
static const struct snd_soc_ops max_98373_sdw_ops = {
.startup = sdw_startup,
- .prepare = sdw_prepare,
- .trigger = max98373_sdw_trigger,
- .hw_free = sdw_hw_free,
+ .prepare = mx8373_sdw_prepare,
+ .trigger = sdw_trigger,
+ .hw_free = mx8373_sdw_hw_free,
.shutdown = sdw_shutdown,
};
static int soc_pcm_trigger(struct snd_pcm_substream *substream, int cmd)
{
+ struct snd_soc_pcm_runtime *rtd = asoc_substream_to_rtd(substream);
int ret = -EINVAL, _ret = 0;
int rollback = 0;
case SNDRV_PCM_TRIGGER_STOP:
case SNDRV_PCM_TRIGGER_SUSPEND:
case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
- ret = snd_soc_pcm_dai_trigger(substream, cmd, rollback);
- if (ret < 0)
- break;
+ if (rtd->dai_link->stop_dma_first) {
+ ret = snd_soc_pcm_component_trigger(substream, cmd, rollback);
+ if (ret < 0)
+ break;
- ret = snd_soc_pcm_component_trigger(substream, cmd, rollback);
- if (ret < 0)
- break;
+ ret = snd_soc_pcm_dai_trigger(substream, cmd, rollback);
+ if (ret < 0)
+ break;
+ } else {
+ ret = snd_soc_pcm_dai_trigger(substream, cmd, rollback);
+ if (ret < 0)
+ break;
+ ret = snd_soc_pcm_component_trigger(substream, cmd, rollback);
+ if (ret < 0)
+ break;
+ }
ret = snd_soc_link_trigger(substream, cmd, rollback);
break;
}
static const struct sof_dev_desc adl_desc = {
.machines = snd_soc_acpi_intel_adl_machines,
.alt_machines = snd_soc_acpi_intel_adl_sdw_machines,
+ .use_acpi_target_states = true,
.resindex_lpe_base = 0,
.resindex_pcicfg_base = -1,
.resindex_imr_base = -1,
}
EXPORT_SYMBOL_GPL(tegra_pcm_pointer);
-static int tegra_pcm_preallocate_dma_buffer(struct snd_pcm *pcm, int stream,
+static int tegra_pcm_preallocate_dma_buffer(struct device *dev, struct snd_pcm *pcm, int stream,
size_t size)
{
struct snd_pcm_substream *substream = pcm->streams[stream].substream;
struct snd_dma_buffer *buf = &substream->dma_buffer;
- buf->area = dma_alloc_wc(pcm->card->dev, size, &buf->addr, GFP_KERNEL);
+ buf->area = dma_alloc_wc(dev, size, &buf->addr, GFP_KERNEL);
if (!buf->area)
return -ENOMEM;
buf->private_data = NULL;
buf->dev.type = SNDRV_DMA_TYPE_DEV;
- buf->dev.dev = pcm->card->dev;
+ buf->dev.dev = dev;
buf->bytes = size;
return 0;
if (!buf->area)
return;
- dma_free_wc(pcm->card->dev, buf->bytes, buf->area, buf->addr);
+ dma_free_wc(buf->dev.dev, buf->bytes, buf->area, buf->addr);
buf->area = NULL;
}
-static int tegra_pcm_dma_allocate(struct snd_soc_pcm_runtime *rtd,
+static int tegra_pcm_dma_allocate(struct device *dev, struct snd_soc_pcm_runtime *rtd,
size_t size)
{
- struct snd_card *card = rtd->card->snd_card;
struct snd_pcm *pcm = rtd->pcm;
int ret;
- ret = dma_set_mask_and_coherent(card->dev, DMA_BIT_MASK(32));
+ ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32));
if (ret < 0)
return ret;
if (pcm->streams[SNDRV_PCM_STREAM_PLAYBACK].substream) {
- ret = tegra_pcm_preallocate_dma_buffer(pcm,
- SNDRV_PCM_STREAM_PLAYBACK, size);
+ ret = tegra_pcm_preallocate_dma_buffer(dev, pcm, SNDRV_PCM_STREAM_PLAYBACK, size);
if (ret)
goto err;
}
if (pcm->streams[SNDRV_PCM_STREAM_CAPTURE].substream) {
- ret = tegra_pcm_preallocate_dma_buffer(pcm,
- SNDRV_PCM_STREAM_CAPTURE, size);
+ ret = tegra_pcm_preallocate_dma_buffer(dev, pcm, SNDRV_PCM_STREAM_CAPTURE, size);
if (ret)
goto err_free_play;
}
int tegra_pcm_construct(struct snd_soc_component *component,
struct snd_soc_pcm_runtime *rtd)
{
- return tegra_pcm_dma_allocate(rtd, tegra_pcm_hardware.buffer_bytes_max);
+ struct device *dev = component->dev;
+
+ /*
+ * Fallback for backwards-compatibility with older device trees that
+ * have the iommus property in the virtual, top-level "sound" node.
+ */
+ if (!of_get_property(dev->of_node, "iommus", NULL))
+ dev = rtd->card->snd_card->dev;
+
+ return tegra_pcm_dma_allocate(dev, rtd, tegra_pcm_hardware.buffer_bytes_max);
}
EXPORT_SYMBOL_GPL(tegra_pcm_construct);
return ret;
}
- if (priv->hsdiv_rates[domain->parent_clk_id] != scki) {
+ if (domain->parent_clk_id == -1 || priv->hsdiv_rates[domain->parent_clk_id] != scki) {
dev_dbg(priv->dev,
"%s configuration for %u Hz: %s, %dxFS (SCKI: %u Hz)\n",
audio_domain == J721E_AUDIO_DOMAIN_CPB ? "CPB" : "IVI",
j721e_rule_rate, &priv->rate_range,
SNDRV_PCM_HW_PARAM_RATE, -1);
- mutex_unlock(&priv->mutex);
if (ret)
- return ret;
+ goto out;
/* Reset TDM slots to 32 */
ret = snd_soc_dai_set_tdm_slot(cpu_dai, 0x3, 0x3, 2, 32);
if (ret && ret != -ENOTSUPP)
- return ret;
+ goto out;
for_each_rtd_codec_dais(rtd, i, codec_dai) {
ret = snd_soc_dai_set_tdm_slot(codec_dai, 0x3, 0x3, 2, 32);
if (ret && ret != -ENOTSUPP)
- return ret;
+ goto out;
}
- return 0;
+ if (ret == -ENOTSUPP)
+ ret = 0;
+out:
+ if (ret)
+ domain->active--;
+ mutex_unlock(&priv->mutex);
+
+ return ret;
}
static int j721e_audio_hw_params(struct snd_pcm_substream *substream,
{
struct usb_mixer_elem_info *cval = mixer_elem_list_to_info(list);
static const char * const val_types[] = {
- "BOOLEAN", "INV_BOOLEAN", "S8", "U8", "S16", "U16", "S32", "U32",
+ [USB_MIXER_BOOLEAN] = "BOOLEAN",
+ [USB_MIXER_INV_BOOLEAN] = "INV_BOOLEAN",
+ [USB_MIXER_S8] = "S8",
+ [USB_MIXER_U8] = "U8",
+ [USB_MIXER_S16] = "S16",
+ [USB_MIXER_U16] = "U16",
+ [USB_MIXER_S32] = "S32",
+ [USB_MIXER_U32] = "U32",
+ [USB_MIXER_BESPOKEN] = "BESPOKEN",
};
snd_iprintf(buffer, " Info: id=%i, control=%i, cmask=0x%x, "
"channels=%i, type=\"%s\"\n", cval->head.id,
REG_QUIRK_ENTRY(0x0951, 0x16d8, 2), /* Kingston HyperX AMP */
REG_QUIRK_ENTRY(0x0951, 0x16ed, 2), /* Kingston HyperX Cloud Alpha S */
REG_QUIRK_ENTRY(0x0951, 0x16ea, 2), /* Kingston HyperX Cloud Flight S */
+ REG_QUIRK_ENTRY(0x0ecb, 0x1f46, 2), /* JBL Quantum 600 */
+ REG_QUIRK_ENTRY(0x0ecb, 0x2039, 2), /* JBL Quantum 400 */
+ REG_QUIRK_ENTRY(0x0ecb, 0x203e, 2), /* JBL Quantum 800 */
{ 0 } /* terminator */
};
**bpftool** [*OPTIONS*] **btf** *COMMAND*
- *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] }
+ *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | {**-d** | **--debug** } |
+ { **-B** | **--base-btf** } }
*COMMANDS* := { **dump** | **help** }
=======
.. include:: common_options.rst
+ -B, --base-btf *FILE*
+ Pass a base BTF object. Base BTF objects are typically used
+ with BTF objects for kernel modules. To avoid duplicating
+ all kernel symbols required by modules, BTF objects for
+ modules are "split", they are built incrementally on top of
+ the kernel (vmlinux) BTF object. So the base BTF reference
+ should usually point to the kernel BTF.
+
+ When the main BTF object to process (for example, the
+ module BTF to dump) is passed as a *FILE*, bpftool attempts
+ to autodetect the path for the base object, and passing
+ this option is optional. When the main BTF object is passed
+ through other handles, this option becomes necessary.
+
EXAMPLES
========
**# bpftool btf dump id 1226**
**# bpftool btf dump prog tag b88e0a09b1d9759d**
**# bpftool btf dump prog pinned /sys/fs/bpf/prog_name**
+
+|
+| **# bpftool btf dump file /sys/kernel/btf/i2c_smbus**
+| (or)
+| **# I2C_SMBUS_ID=$(bpftool btf show -p | jq '.[] | select(.name=="i2c_smbus").id')**
+| **# bpftool btf dump id ${I2C_SMBUS_ID} -B /sys/kernel/btf/vmlinux**
+
+::
+
+ [104848] STRUCT 'i2c_smbus_alert' size=40 vlen=2
+ 'alert' type_id=393 bits_offset=0
+ 'ara' type_id=56050 bits_offset=256
+ [104849] STRUCT 'alert_data' size=12 vlen=3
+ 'addr' type_id=16 bits_offset=0
+ 'type' type_id=56053 bits_offset=32
+ 'data' type_id=7 bits_offset=64
+ [104850] PTR '(anon)' type_id=104848
+ [104851] PTR '(anon)' type_id=104849
+ [104852] FUNC 'i2c_register_spd' type_id=84745 linkage=static
+ [104853] FUNC 'smbalert_driver_init' type_id=1213 linkage=static
+ [104854] FUNC_PROTO '(anon)' ret_type_id=18 vlen=1
+ 'ara' type_id=56050
+ [104855] FUNC 'i2c_handle_smbus_alert' type_id=104854 linkage=static
+ [104856] FUNC 'smbalert_remove' type_id=104854 linkage=static
+ [104857] FUNC_PROTO '(anon)' ret_type_id=18 vlen=2
+ 'ara' type_id=56050
+ 'id' type_id=56056
+ [104858] FUNC 'smbalert_probe' type_id=104857 linkage=static
+ [104859] FUNC 'smbalert_work' type_id=9695 linkage=static
+ [104860] FUNC 'smbus_alert' type_id=71367 linkage=static
+ [104861] FUNC 'smbus_do_alert' type_id=84827 linkage=static
**bpftool** [*OPTIONS*] **cgroup** *COMMAND*
- *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } }
+ *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } |
+ { **-f** | **--bpffs** } }
*COMMANDS* :=
{ **show** | **list** | **tree** | **attach** | **detach** | **help** }
**bpftool** [*OPTIONS*] **feature** *COMMAND*
- *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] }
+ *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } }
*COMMANDS* := { **probe** | **help** }
**bpftool** [*OPTIONS*] **gen** *COMMAND*
- *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] }
+ *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } |
+ { **-L** | **--use-loader** } }
*COMMAND* := { **object** | **skeleton** | **help** }
=======
.. include:: common_options.rst
+ -L, --use-loader
+ For skeletons, generate a "light" skeleton (also known as "loader"
+ skeleton). A light skeleton contains a loader eBPF program. It does
+ not use the majority of the libbpf infrastructure, and does not need
+ libelf.
+
EXAMPLES
========
**$ cat example1.bpf.c**
**bpftool** [*OPTIONS*] **iter** *COMMAND*
+ *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } }
+
*COMMANDS* := { **pin** | **help** }
ITER COMMANDS
**bpftool** [*OPTIONS*] **link** *COMMAND*
- *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } }
+ *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } |
+ { **-f** | **--bpffs** } | { **-n** | **--nomount** } }
*COMMANDS* := { **show** | **list** | **pin** | **help** }
**bpftool** [*OPTIONS*] **map** *COMMAND*
- *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } }
+ *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } |
+ { **-f** | **--bpffs** } | { **-n** | **--nomount** } }
*COMMANDS* :=
{ **show** | **list** | **create** | **dump** | **update** | **lookup** | **getnext**
**bpftool** [*OPTIONS*] **net** *COMMAND*
- *OPTIONS* := { [{ **-j** | **--json** }] [{ **-p** | **--pretty** }] }
+ *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } }
*COMMANDS* :=
{ **show** | **list** | **attach** | **detach** | **help** }
**bpftool** [*OPTIONS*] **perf** *COMMAND*
- *OPTIONS* := { [{ **-j** | **--json** }] [{ **-p** | **--pretty** }] }
+ *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } }
*COMMANDS* :=
{ **show** | **list** | **help** }
**bpftool** [*OPTIONS*] **prog** *COMMAND*
- *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } }
+ *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } |
+ { **-f** | **--bpffs** } | { **-m** | **--mapcompat** } | { **-n** | **--nomount** } |
+ { **-L** | **--use-loader** } }
*COMMANDS* :=
{ **show** | **list** | **dump xlated** | **dump jited** | **pin** | **load**
| **struct_ops** | **fentry** | **fexit** | **freplace** | **sk_lookup**
| }
| *ATTACH_TYPE* := {
-| **msg_verdict** | **stream_verdict** | **stream_parser** | **flow_dissector**
+| **msg_verdict** | **skb_verdict** | **stream_verdict** | **stream_parser** | **flow_dissector**
| }
| *METRICs* := {
-| **cycles** | **instructions** | **l1d_loads** | **llc_misses**
+| **cycles** | **instructions** | **l1d_loads** | **llc_misses** |
+| **itlb_misses** | **dtlb_misses**
| }
Do not automatically attempt to mount any virtual file system
(such as tracefs or BPF virtual file system) when necessary.
+ -L, --use-loader
+ Load program as a "loader" program. This is useful to debug
+ the generation of such programs. When this option is in
+ use, bpftool attempts to load the programs from the object
+ file into the kernel, but does not pin them (therefore, the
+ *PATH* must not be provided).
+
+ When combined with the **-d**\ \|\ **--debug** option,
+ additional debug messages are generated, and the execution
+ of the loader program will use the **bpf_trace_printk**\ ()
+ helper to log each step of loading BTF, creating the maps,
+ and loading the programs (see **bpftool prog tracelog** as
+ a way to dump those messages).
+
EXAMPLES
========
**# bpftool prog show**
40176203 cycles (83.05%)
42518139 instructions # 1.06 insns per cycle (83.39%)
123 llc_misses # 2.89 LLC misses per million insns (83.15%)
+
+|
+| Output below is for the trace logs.
+| Run in separate terminals:
+| **# bpftool prog tracelog**
+| **# bpftool prog load -L -d file.o**
+
+::
+
+ bpftool-620059 [004] d... 2634685.517903: bpf_trace_printk: btf_load size 665 r=5
+ bpftool-620059 [004] d... 2634685.517912: bpf_trace_printk: map_create sample_map idx 0 type 2 value_size 4 value_btf_id 0 r=6
+ bpftool-620059 [004] d... 2634685.517997: bpf_trace_printk: prog_load sample insn_cnt 13 r=7
+ bpftool-620059 [004] d... 2634685.517999: bpf_trace_printk: close(5) = 0
**bpftool** [*OPTIONS*] **struct_ops** *COMMAND*
- *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] }
+ *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } }
*COMMANDS* :=
{ **show** | **list** | **dump** | **register** | **unregister** | **help** }
*OBJECT* := { **map** | **program** | **cgroup** | **perf** | **net** | **feature** }
- *OPTIONS* := { { **-V** | **--version** } | { **-h** | **--help** }
- | { **-j** | **--json** } [{ **-p** | **--pretty** }] }
+ *OPTIONS* := { { **-V** | **--version** } |
+ { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } }
*MAP-COMMANDS* :=
- { **show** | **list** | **create** | **dump** | **update** | **lookup** | **getnext**
- | **delete** | **pin** | **event_pipe** | **help** }
+ { **show** | **list** | **create** | **dump** | **update** | **lookup** | **getnext** |
+ **delete** | **pin** | **event_pipe** | **help** }
- *PROG-COMMANDS* := { **show** | **list** | **dump jited** | **dump xlated** | **pin**
- | **load** | **attach** | **detach** | **help** }
+ *PROG-COMMANDS* := { **show** | **list** | **dump jited** | **dump xlated** | **pin** |
+ **load** | **attach** | **detach** | **help** }
*CGROUP-COMMANDS* := { **show** | **list** | **attach** | **detach** | **help** }
# Deal with options
if [[ ${words[cword]} == -* ]]; then
- local c='--version --json --pretty --bpffs --mapcompat --debug'
+ local c='--version --json --pretty --bpffs --mapcompat --debug \
+ --use-loader --base-btf'
COMPREPLY=( $( compgen -W "$c" -- "$cur" ) )
return 0
fi
_sysfs_get_netdevs
return 0
;;
- file|pinned)
+ file|pinned|-B|--base-btf)
_filedir
return 0
;;
# Remove all options so completions don't have to deal with them.
local i
for (( i=1; i < ${#words[@]}; )); do
- if [[ ${words[i]::1} == - ]]; then
+ if [[ ${words[i]::1} == - ]] &&
+ [[ ${words[i]} != "-B" ]] && [[ ${words[i]} != "--base-btf" ]]; then
words=( "${words[@]:0:i}" "${words[@]:i+1}" )
[[ $i -le $cword ]] && cword=$(( cword - 1 ))
else
local PROG_TYPE='id pinned tag name'
local MAP_TYPE='id pinned name'
- local METRIC_TYPE='cycles instructions l1d_loads llc_misses'
+ local METRIC_TYPE='cycles instructions l1d_loads llc_misses \
+ itlb_misses dtlb_misses'
case $command in
show|list)
[[ $prev != "$command" ]] && return 0
return 0
;;
5)
- COMPREPLY=( $( compgen -W 'msg_verdict stream_verdict \
- stream_parser flow_dissector' -- "$cur" ) )
+ local BPFTOOL_PROG_ATTACH_TYPES='msg_verdict \
+ skb_verdict stream_verdict stream_parser \
+ flow_dissector'
+ COMPREPLY=( $( compgen -W "$BPFTOOL_PROG_ATTACH_TYPES" -- "$cur" ) )
return 0
;;
6)
case $prev in
type)
- COMPREPLY=( $( compgen -W "socket kprobe \
+ local BPFTOOL_PROG_LOAD_TYPES='socket kprobe \
kretprobe classifier flow_dissector \
action tracepoint raw_tracepoint \
xdp perf_event cgroup/skb cgroup/sock \
cgroup/post_bind4 cgroup/post_bind6 \
cgroup/sysctl cgroup/getsockopt \
cgroup/setsockopt cgroup/sock_release struct_ops \
- fentry fexit freplace sk_lookup" -- \
- "$cur" ) )
+ fentry fexit freplace sk_lookup'
+ COMPREPLY=( $( compgen -W "$BPFTOOL_PROG_LOAD_TYPES" -- "$cur" ) )
return 0
;;
id)
return 0
;;
type)
- COMPREPLY=( $( compgen -W 'hash array prog_array \
- perf_event_array percpu_hash percpu_array \
- stack_trace cgroup_array lru_hash \
+ local BPFTOOL_MAP_CREATE_TYPES='hash array \
+ prog_array perf_event_array percpu_hash \
+ percpu_array stack_trace cgroup_array lru_hash \
lru_percpu_hash lpm_trie array_of_maps \
hash_of_maps devmap devmap_hash sockmap cpumap \
xskmap sockhash cgroup_storage reuseport_sockarray \
percpu_cgroup_storage queue stack sk_storage \
- struct_ops inode_storage task_storage' -- \
- "$cur" ) )
+ struct_ops inode_storage task_storage ringbuf'
+ COMPREPLY=( $( compgen -W "$BPFTOOL_MAP_CREATE_TYPES" -- "$cur" ) )
return 0
;;
key|value|flags|entries)
return 0
;;
attach|detach)
- local ATTACH_TYPES='ingress egress sock_create sock_ops \
- device bind4 bind6 post_bind4 post_bind6 connect4 connect6 \
+ local BPFTOOL_CGROUP_ATTACH_TYPES='ingress egress \
+ sock_create sock_ops device \
+ bind4 bind6 post_bind4 post_bind6 connect4 connect6 \
getpeername4 getpeername6 getsockname4 getsockname6 \
sendmsg4 sendmsg6 recvmsg4 recvmsg6 sysctl getsockopt \
setsockopt sock_release'
local ATTACH_FLAGS='multi override'
local PROG_TYPE='id pinned tag name'
- case $prev in
- $command)
- _filedir
- return 0
- ;;
- ingress|egress|sock_create|sock_ops|device|bind4|bind6|\
- post_bind4|post_bind6|connect4|connect6|getpeername4|\
- getpeername6|getsockname4|getsockname6|sendmsg4|sendmsg6|\
- recvmsg4|recvmsg6|sysctl|getsockopt|setsockopt|sock_release)
+ # Check for $prev = $command first
+ if [ $prev = $command ]; then
+ _filedir
+ return 0
+ # Then check for attach type. This is done outside of the
+ # "case $prev in" to avoid writing the whole list of attach
+ # types again as pattern to match (where we cannot reuse
+ # our variable).
+ elif [[ $BPFTOOL_CGROUP_ATTACH_TYPES =~ $prev ]]; then
COMPREPLY=( $( compgen -W "$PROG_TYPE" -- \
"$cur" ) )
return 0
- ;;
+ fi
+ # case/esac for the other cases
+ case $prev in
id)
_bpftool_get_prog_ids
return 0
;;
*)
- if ! _bpftool_search_list "$ATTACH_TYPES"; then
- COMPREPLY=( $( compgen -W "$ATTACH_TYPES" -- \
- "$cur" ) )
+ if ! _bpftool_search_list "$BPFTOOL_CGROUP_ATTACH_TYPES"; then
+ COMPREPLY=( $( compgen -W \
+ "$BPFTOOL_CGROUP_ATTACH_TYPES" -- "$cur" ) )
elif [[ "$command" == "attach" ]]; then
# We have an attach type on the command line,
# but it is not the previous word, or
}
if (!btf) {
- err = btf__get_from_id(btf_id, &btf);
+ btf = btf__load_from_kernel_by_id_split(btf_id, base_btf);
+ err = libbpf_get_error(btf);
if (err) {
p_err("get btf by id (%u): %s", btf_id, strerror(err));
goto done;
}
- if (!btf) {
- err = -ENOENT;
- p_err("can't find btf with ID (%u)", btf_id);
- goto done;
- }
}
if (dump_c) {
" FORMAT := { raw | c }\n"
" " HELP_SPEC_MAP "\n"
" " HELP_SPEC_PROGRAM "\n"
- " " HELP_SPEC_OPTIONS "\n"
+ " " HELP_SPEC_OPTIONS " |\n"
+ " {-B|--base-btf} }\n"
"",
bin_name, "btf");
}
info = &prog_info->info;
- if (!info->btf_id || !info->nr_func_info ||
- btf__get_from_id(info->btf_id, &prog_btf))
+ if (!info->btf_id || !info->nr_func_info)
+ goto print;
+ prog_btf = btf__load_from_kernel_by_id(info->btf_id);
+ if (libbpf_get_error(prog_btf))
goto print;
finfo = u64_to_ptr(info->func_info);
func_type = btf__type_by_id(prog_btf, finfo->type_id);
HELP_SPEC_ATTACH_TYPES "\n"
" " HELP_SPEC_ATTACH_FLAGS "\n"
" " HELP_SPEC_PROGRAM "\n"
- " " HELP_SPEC_OPTIONS "\n"
+ " " HELP_SPEC_OPTIONS " |\n"
+ " {-f|--bpffs} }\n"
"",
bin_name, argv[-2]);
[BPF_MODIFY_RETURN] = "mod_ret",
[BPF_LSM_MAC] = "lsm_mac",
[BPF_SK_LOOKUP] = "sk_lookup",
+ [BPF_TRACE_ITER] = "trace_iter",
+ [BPF_XDP_DEVMAP] = "xdp_devmap",
+ [BPF_XDP_CPUMAP] = "xdp_cpumap",
+ [BPF_XDP] = "xdp",
+ [BPF_SK_REUSEPORT_SELECT] = "sk_skb_reuseport_select",
+ [BPF_SK_REUSEPORT_SELECT_OR_MIGRATE] = "sk_skb_reuseport_select_or_migrate",
};
void p_err(const char *fmt, ...)
" %1$s %2$s help\n"
"\n"
" COMPONENT := { kernel | dev NAME }\n"
+ " " HELP_SPEC_OPTIONS " }\n"
"",
bin_name, argv[-2]);
" %1$s %2$s skeleton FILE [name OBJECT_NAME]\n"
" %1$s %2$s help\n"
"\n"
- " " HELP_SPEC_OPTIONS "\n"
+ " " HELP_SPEC_OPTIONS " |\n"
+ " {-L|--use-loader} }\n"
"",
bin_name, "gen");
fprintf(stderr,
"Usage: %1$s %2$s pin OBJ PATH [map MAP]\n"
" %1$s %2$s help\n"
+ "\n"
" " HELP_SPEC_MAP "\n"
+ " " HELP_SPEC_OPTIONS " }\n"
"",
bin_name, "iter");
" %1$s %2$s help\n"
"\n"
" " HELP_SPEC_LINK "\n"
- " " HELP_SPEC_OPTIONS "\n"
+ " " HELP_SPEC_OPTIONS " |\n"
+ " {-f|--bpffs} | {-n|--nomount} }\n"
"",
bin_name, argv[-2]);
" %s version\n"
"\n"
" OBJECT := { prog | map | link | cgroup | perf | net | feature | btf | gen | struct_ops | iter }\n"
- " " HELP_SPEC_OPTIONS "\n"
+ " " HELP_SPEC_OPTIONS " |\n"
+ " {-V|--version} }\n"
"",
bin_name, bin_name, bin_name);
#define HELP_SPEC_PROGRAM \
"PROG := { id PROG_ID | pinned FILE | tag PROG_TAG | name PROG_NAME }"
#define HELP_SPEC_OPTIONS \
- "OPTIONS := { {-j|--json} [{-p|--pretty}] | {-f|--bpffs} |\n" \
- "\t {-m|--mapcompat} | {-n|--nomount} }"
+ "OPTIONS := { {-j|--json} [{-p|--pretty}] | {-d|--debug}"
#define HELP_SPEC_MAP \
"MAP := { id MAP_ID | pinned FILE | name MAP_NAME }"
#define HELP_SPEC_LINK \
} else if (info->btf_value_type_id) {
int err;
- err = btf__get_from_id(info->btf_id, &btf);
- if (err || !btf) {
+ btf = btf__load_from_kernel_by_id(info->btf_id);
+ err = libbpf_get_error(btf);
+ if (err) {
p_err("failed to get btf");
- btf = err ? ERR_PTR(err) : ERR_PTR(-ESRCH);
+ btf = ERR_PTR(err);
}
}
void *value)
{
json_writer_t *btf_wtr;
- struct btf *btf = NULL;
- int err;
+ struct btf *btf;
- err = btf__get_from_id(info->btf_id, &btf);
- if (err) {
+ btf = btf__load_from_kernel_by_id(info->btf_id);
+ if (libbpf_get_error(btf)) {
p_err("failed to get btf");
return;
}
" devmap | devmap_hash | sockmap | cpumap | xskmap | sockhash |\n"
" cgroup_storage | reuseport_sockarray | percpu_cgroup_storage |\n"
" queue | stack | sk_storage | struct_ops | ringbuf | inode_storage |\n"
- " task_storage }\n"
- " " HELP_SPEC_OPTIONS "\n"
+ " task_storage }\n"
+ " " HELP_SPEC_OPTIONS " |\n"
+ " {-f|--bpffs} | {-n|--nomount} }\n"
"",
bin_name, argv[-2]);
"\n"
" " HELP_SPEC_PROGRAM "\n"
" ATTACH_TYPE := { xdp | xdpgeneric | xdpdrv | xdpoffload }\n"
+ " " HELP_SPEC_OPTIONS " }\n"
"\n"
"Note: Only xdp and tc attachments are supported now.\n"
" For progs attached to cgroups, use \"bpftool cgroup\"\n"
static int do_help(int argc, char **argv)
{
fprintf(stderr,
- "Usage: %1$s %2$s { show | list | help }\n"
+ "Usage: %1$s %2$s { show | list }\n"
+ " %1$s %2$s help }\n"
+ "\n"
+ " " HELP_SPEC_OPTIONS " }\n"
"",
bin_name, argv[-2]);
struct bpf_map_info map_info;
struct btf_var_secinfo *vsi;
bool printed_header = false;
- struct btf *btf = NULL;
unsigned int i, vlen;
void *value = NULL;
const char *name;
+ struct btf *btf;
int err;
if (!num_maps)
if (!value)
return;
- err = btf__get_from_id(map_info.btf_id, &btf);
- if (err || !btf)
+ btf = btf__load_from_kernel_by_id(map_info.btf_id);
+ if (libbpf_get_error(btf))
goto out_free;
t_datasec = btf__type_by_id(btf, map_info.btf_value_type_id);
member_len = info->xlated_prog_len;
}
- if (info->btf_id && btf__get_from_id(info->btf_id, &btf)) {
- p_err("failed to get btf");
- return -1;
+ if (info->btf_id) {
+ btf = btf__load_from_kernel_by_id(info->btf_id);
+ if (libbpf_get_error(btf)) {
+ p_err("failed to get btf");
+ return -1;
+ }
}
func_info = u64_to_ptr(info->func_info);
kernel_syms_destroy(&dd);
}
+ btf__free(btf);
+
return 0;
}
struct bpf_prog_info_linear *info_linear;
struct bpf_func_info *func_info;
const struct btf_type *t;
+ struct btf *btf = NULL;
char *name = NULL;
- struct btf *btf;
info_linear = bpf_program__get_prog_info_linear(
tgt_fd, 1UL << BPF_PROG_INFO_FUNC_INFO);
return NULL;
}
- if (info_linear->info.btf_id == 0 ||
- btf__get_from_id(info_linear->info.btf_id, &btf)) {
+ if (info_linear->info.btf_id == 0) {
p_err("prog FD %d doesn't have valid btf", tgt_fd);
goto out;
}
+ btf = btf__load_from_kernel_by_id(info_linear->info.btf_id);
+ if (libbpf_get_error(btf)) {
+ p_err("failed to load btf for prog FD %d", tgt_fd);
+ goto out;
+ }
+
func_info = u64_to_ptr(info_linear->info.func_info);
t = btf__type_by_id(btf, func_info[0].type_id);
if (!t) {
}
name = strdup(btf__name_by_offset(btf, t->name_off));
out:
+ btf__free(btf);
free(info_linear);
return name;
}
" cgroup/sendmsg6 | cgroup/recvmsg4 | cgroup/recvmsg6 |\n"
" cgroup/getsockopt | cgroup/setsockopt | cgroup/sock_release |\n"
" struct_ops | fentry | fexit | freplace | sk_lookup }\n"
- " ATTACH_TYPE := { msg_verdict | stream_verdict | stream_parser |\n"
- " flow_dissector }\n"
+ " ATTACH_TYPE := { msg_verdict | skb_verdict | stream_verdict |\n"
+ " stream_parser | flow_dissector }\n"
" METRIC := { cycles | instructions | l1d_loads | llc_misses | itlb_misses | dtlb_misses }\n"
- " " HELP_SPEC_OPTIONS "\n"
+ " " HELP_SPEC_OPTIONS " |\n"
+ " {-f|--bpffs} | {-m|--mapcompat} | {-n|--nomount} |\n"
+ " {-L|--use-loader} }\n"
"",
bin_name, argv[-2]);
" %1$s %2$s unregister STRUCT_OPS_MAP\n"
" %1$s %2$s help\n"
"\n"
- " OPTIONS := { {-j|--json} [{-p|--pretty}] }\n"
" STRUCT_OPS_MAP := [ id STRUCT_OPS_MAP_ID | name STRUCT_OPS_MAP_NAME ]\n"
+ " " HELP_SPEC_OPTIONS " }\n"
"",
bin_name, argv[-2]);
sh->sh_addralign = expected;
if (gelf_update_shdr(scn, sh) == 0) {
- printf("FAILED cannot update section header: %s\n",
+ pr_err("FAILED cannot update section header: %s\n",
elf_errmsg(-1));
return -1;
}
elf = elf_begin(fd, ELF_C_RDWR_MMAP, NULL);
if (!elf) {
+ close(fd);
pr_err("FAILED cannot create ELF descriptor: %s\n",
elf_errmsg(-1));
return -1;
err = libbpf_get_error(btf);
if (err) {
pr_err("FAILED: load BTF from %s: %s\n",
- obj->path, strerror(-err));
+ obj->btf ?: obj->path, strerror(-err));
return -1;
}
int i;
if (!id->id) {
- pr_err("FAILED unresolved symbol %s\n", id->name);
- return -EINVAL;
+ pr_err("WARN: resolve_btfids: unresolved symbol %s\n", id->name);
}
for (i = 0; i < id->addr_cnt; i++) {
err = 0;
out:
- if (obj.efile.elf)
+ if (obj.efile.elf) {
elf_end(obj.efile.elf);
- close(obj.efile.fd);
+ close(obj.efile.fd);
+ }
return err;
}
IFLA_BOND_AD_ACTOR_SYSTEM,
IFLA_BOND_TLB_DYNAMIC_LB,
IFLA_BOND_PEER_NOTIF_DELAY,
+ IFLA_BOND_AD_LACP_ACTIVE,
__IFLA_BOND_MAX,
};
libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \
netlink.o bpf_prog_linfo.o libbpf_probes.o xsk.o hashmap.o \
- btf_dump.o ringbuf.o strset.o linker.o gen_loader.o
+ btf_dump.o ringbuf.o strset.o linker.o gen_loader.o relo_core.o
static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endian);
-int btf__load(struct btf *btf)
+int btf__load_into_kernel(struct btf *btf)
{
__u32 log_buf_size = 0, raw_size;
char *log_buf = NULL;
free(log_buf);
return libbpf_err(err);
}
+int btf__load(struct btf *) __attribute__((alias("btf__load_into_kernel")));
int btf__fd(const struct btf *btf)
{
return btf;
}
-int btf__get_from_id(__u32 id, struct btf **btf)
+struct btf *btf__load_from_kernel_by_id_split(__u32 id, struct btf *base_btf)
{
- struct btf *res;
- int err, btf_fd;
+ struct btf *btf;
+ int btf_fd;
- *btf = NULL;
btf_fd = bpf_btf_get_fd_by_id(id);
if (btf_fd < 0)
- return libbpf_err(-errno);
-
- res = btf_get_from_fd(btf_fd, NULL);
- err = libbpf_get_error(res);
+ return libbpf_err_ptr(-errno);
+ btf = btf_get_from_fd(btf_fd, base_btf);
close(btf_fd);
+ return libbpf_ptr(btf);
+}
+
+struct btf *btf__load_from_kernel_by_id(__u32 id)
+{
+ return btf__load_from_kernel_by_id_split(id, NULL);
+}
+
+int btf__get_from_id(__u32 id, struct btf **btf)
+{
+ struct btf *res;
+ int err;
+
+ *btf = NULL;
+ res = btf__load_from_kernel_by_id(id);
+ err = libbpf_get_error(res);
+
if (err)
return libbpf_err(err);
*/
if (d->hypot_adjust_canon)
continue;
-
+
if (t_kind == BTF_KIND_FWD && c_kind != BTF_KIND_FWD)
d->map[t_id] = c_id;
* Probe few well-known locations for vmlinux kernel image and try to load BTF
* data out of it to use for target BTF.
*/
-struct btf *libbpf_find_kernel_btf(void)
+struct btf *btf__load_vmlinux_btf(void)
{
struct {
const char *path_fmt;
return libbpf_err_ptr(-ESRCH);
}
+struct btf *libbpf_find_kernel_btf(void) __attribute__((alias("btf__load_vmlinux_btf")));
+
+struct btf *btf__load_module_btf(const char *module_name, struct btf *vmlinux_btf)
+{
+ char path[80];
+
+ snprintf(path, sizeof(path), "/sys/kernel/btf/%s", module_name);
+ return btf__parse_split(path, vmlinux_btf);
+}
+
int btf_type_visit_type_ids(struct btf_type *t, type_id_visit_fn visit, void *ctx)
{
int i, n, err;
LIBBPF_API struct btf *btf__parse_raw(const char *path);
LIBBPF_API struct btf *btf__parse_raw_split(const char *path, struct btf *base_btf);
+LIBBPF_API struct btf *btf__load_vmlinux_btf(void);
+LIBBPF_API struct btf *btf__load_module_btf(const char *module_name, struct btf *vmlinux_btf);
+LIBBPF_API struct btf *libbpf_find_kernel_btf(void);
+
+LIBBPF_API struct btf *btf__load_from_kernel_by_id(__u32 id);
+LIBBPF_API struct btf *btf__load_from_kernel_by_id_split(__u32 id, struct btf *base_btf);
+LIBBPF_API int btf__get_from_id(__u32 id, struct btf **btf);
+
LIBBPF_API int btf__finalize_data(struct bpf_object *obj, struct btf *btf);
LIBBPF_API int btf__load(struct btf *btf);
+LIBBPF_API int btf__load_into_kernel(struct btf *btf);
LIBBPF_API __s32 btf__find_by_name(const struct btf *btf,
const char *type_name);
LIBBPF_API __s32 btf__find_by_name_kind(const struct btf *btf,
LIBBPF_API const void *btf__get_raw_data(const struct btf *btf, __u32 *size);
LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset);
LIBBPF_API const char *btf__str_by_offset(const struct btf *btf, __u32 offset);
-LIBBPF_API int btf__get_from_id(__u32 id, struct btf **btf);
LIBBPF_API int btf__get_map_kv_tids(const struct btf *btf, const char *map_name,
__u32 expected_key_size,
__u32 expected_value_size,
LIBBPF_API __u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext);
LIBBPF_API __u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext);
-LIBBPF_API struct btf *libbpf_find_kernel_btf(void);
-
LIBBPF_API int btf__find_str(struct btf *btf, const char *s);
LIBBPF_API int btf__add_str(struct btf *btf, const char *s);
LIBBPF_API int btf__add_type(struct btf *btf, const struct btf *src_btf,
btf_dump__emit_type_decl(struct btf_dump *d, __u32 id,
const struct btf_dump_emit_type_decl_opts *opts);
+
+struct btf_dump_type_data_opts {
+ /* size of this struct, for forward/backward compatibility */
+ size_t sz;
+ const char *indent_str;
+ int indent_level;
+ /* below match "show" flags for bpf_show_snprintf() */
+ bool compact; /* no newlines/indentation */
+ bool skip_names; /* skip member/type names */
+ bool emit_zeroes; /* show 0-valued fields */
+ size_t :0;
+};
+#define btf_dump_type_data_opts__last_field emit_zeroes
+
+LIBBPF_API int
+btf_dump__dump_type_data(struct btf_dump *d, __u32 id,
+ const void *data, size_t data_sz,
+ const struct btf_dump_type_data_opts *opts);
+
/*
* A set of helpers for easier BTF types handling
*/
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
+#include <ctype.h>
+#include <endian.h>
#include <errno.h>
#include <linux/err.h>
#include <linux/btf.h>
__u8 referenced: 1;
};
+/* indent string length; one indent string is added for each indent level */
+#define BTF_DATA_INDENT_STR_LEN 32
+
+/*
+ * Common internal data for BTF type data dump operations.
+ */
+struct btf_dump_data {
+ const void *data_end; /* end of valid data to show */
+ bool compact;
+ bool skip_names;
+ bool emit_zeroes;
+ __u8 indent_lvl; /* base indent level */
+ char indent_str[BTF_DATA_INDENT_STR_LEN];
+ /* below are used during iteration */
+ int depth;
+ bool is_array_member;
+ bool is_array_terminated;
+ bool is_array_char;
+};
+
struct btf_dump {
const struct btf *btf;
const struct btf_ext *btf_ext;
struct btf_dump_opts opts;
int ptr_sz;
bool strip_mods;
+ bool skip_anon_defs;
int last_id;
/* per-type auxiliary state */
* name occurrences
*/
struct hashmap *ident_names;
+ /*
+ * data for typed display; allocated if needed.
+ */
+ struct btf_dump_data *typed_dump;
};
static size_t str_hash_fn(const void *key, void *ctx)
break;
case BTF_KIND_FUNC_PROTO: {
const struct btf_param *p = btf_params(t);
- __u16 vlen = btf_vlen(t);
+ __u16 n = btf_vlen(t);
int i;
btf_dump_emit_type(d, t->type, cont_id);
- for (i = 0; i < vlen; i++, p++)
+ for (i = 0; i < n; i++, p++)
btf_dump_emit_type(d, p->type, cont_id);
break;
static void btf_dump_emit_struct_fwd(struct btf_dump *d, __u32 id,
const struct btf_type *t)
{
- btf_dump_printf(d, "%s %s",
+ btf_dump_printf(d, "%s%s%s",
btf_is_struct(t) ? "struct" : "union",
+ t->name_off ? " " : "",
btf_dump_type_name(d, id));
}
case BTF_KIND_UNION:
btf_dump_emit_mods(d, decls);
/* inline anonymous struct/union */
- if (t->name_off == 0)
+ if (t->name_off == 0 && !d->skip_anon_defs)
btf_dump_emit_struct_def(d, id, t, lvl);
else
btf_dump_emit_struct_fwd(d, id, t);
case BTF_KIND_ENUM:
btf_dump_emit_mods(d, decls);
/* inline anonymous enum */
- if (t->name_off == 0)
+ if (t->name_off == 0 && !d->skip_anon_defs)
btf_dump_emit_enum_def(d, id, t, lvl);
else
btf_dump_emit_enum_fwd(d, id, t);
btf_dump_emit_name(d, fname, last_was_ptr);
}
+/* show type name as (type_name) */
+static void btf_dump_emit_type_cast(struct btf_dump *d, __u32 id,
+ bool top_level)
+{
+ const struct btf_type *t;
+
+ /* for array members, we don't bother emitting type name for each
+ * member to avoid the redundancy of
+ * .name = (char[4])[(char)'f',(char)'o',(char)'o',]
+ */
+ if (d->typed_dump->is_array_member)
+ return;
+
+ /* avoid type name specification for variable/section; it will be done
+ * for the associated variable value(s).
+ */
+ t = btf__type_by_id(d->btf, id);
+ if (btf_is_var(t) || btf_is_datasec(t))
+ return;
+
+ if (top_level)
+ btf_dump_printf(d, "(");
+
+ d->skip_anon_defs = true;
+ d->strip_mods = true;
+ btf_dump_emit_type_decl(d, id, "", 0);
+ d->strip_mods = false;
+ d->skip_anon_defs = false;
+
+ if (top_level)
+ btf_dump_printf(d, ")");
+}
+
/* return number of duplicates (occurrences) of a given name */
static size_t btf_dump_name_dups(struct btf_dump *d, struct hashmap *name_map,
const char *orig_name)
{
return btf_dump_resolve_name(d, id, d->ident_names);
}
+
+static int btf_dump_dump_type_data(struct btf_dump *d,
+ const char *fname,
+ const struct btf_type *t,
+ __u32 id,
+ const void *data,
+ __u8 bits_offset,
+ __u8 bit_sz);
+
+static const char *btf_dump_data_newline(struct btf_dump *d)
+{
+ return d->typed_dump->compact || d->typed_dump->depth == 0 ? "" : "\n";
+}
+
+static const char *btf_dump_data_delim(struct btf_dump *d)
+{
+ return d->typed_dump->depth == 0 ? "" : ",";
+}
+
+static void btf_dump_data_pfx(struct btf_dump *d)
+{
+ int i, lvl = d->typed_dump->indent_lvl + d->typed_dump->depth;
+
+ if (d->typed_dump->compact)
+ return;
+
+ for (i = 0; i < lvl; i++)
+ btf_dump_printf(d, "%s", d->typed_dump->indent_str);
+}
+
+/* A macro is used here as btf_type_value[s]() appends format specifiers
+ * to the format specifier passed in; these do the work of appending
+ * delimiters etc while the caller simply has to specify the type values
+ * in the format specifier + value(s).
+ */
+#define btf_dump_type_values(d, fmt, ...) \
+ btf_dump_printf(d, fmt "%s%s", \
+ ##__VA_ARGS__, \
+ btf_dump_data_delim(d), \
+ btf_dump_data_newline(d))
+
+static int btf_dump_unsupported_data(struct btf_dump *d,
+ const struct btf_type *t,
+ __u32 id)
+{
+ btf_dump_printf(d, "<unsupported kind:%u>", btf_kind(t));
+ return -ENOTSUP;
+}
+
+static int btf_dump_get_bitfield_value(struct btf_dump *d,
+ const struct btf_type *t,
+ const void *data,
+ __u8 bits_offset,
+ __u8 bit_sz,
+ __u64 *value)
+{
+ __u16 left_shift_bits, right_shift_bits;
+ __u8 nr_copy_bits, nr_copy_bytes;
+ const __u8 *bytes = data;
+ int sz = t->size;
+ __u64 num = 0;
+ int i;
+
+ /* Maximum supported bitfield size is 64 bits */
+ if (sz > 8) {
+ pr_warn("unexpected bitfield size %d\n", sz);
+ return -EINVAL;
+ }
+
+ /* Bitfield value retrieval is done in two steps; first relevant bytes are
+ * stored in num, then we left/right shift num to eliminate irrelevant bits.
+ */
+ nr_copy_bits = bit_sz + bits_offset;
+ nr_copy_bytes = t->size;
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ for (i = nr_copy_bytes - 1; i >= 0; i--)
+ num = num * 256 + bytes[i];
+#elif __BYTE_ORDER == __BIG_ENDIAN
+ for (i = 0; i < nr_copy_bytes; i++)
+ num = num * 256 + bytes[i];
+#else
+# error "Unrecognized __BYTE_ORDER__"
+#endif
+ left_shift_bits = 64 - nr_copy_bits;
+ right_shift_bits = 64 - bit_sz;
+
+ *value = (num << left_shift_bits) >> right_shift_bits;
+
+ return 0;
+}
+
+static int btf_dump_bitfield_check_zero(struct btf_dump *d,
+ const struct btf_type *t,
+ const void *data,
+ __u8 bits_offset,
+ __u8 bit_sz)
+{
+ __u64 check_num;
+ int err;
+
+ err = btf_dump_get_bitfield_value(d, t, data, bits_offset, bit_sz, &check_num);
+ if (err)
+ return err;
+ if (check_num == 0)
+ return -ENODATA;
+ return 0;
+}
+
+static int btf_dump_bitfield_data(struct btf_dump *d,
+ const struct btf_type *t,
+ const void *data,
+ __u8 bits_offset,
+ __u8 bit_sz)
+{
+ __u64 print_num;
+ int err;
+
+ err = btf_dump_get_bitfield_value(d, t, data, bits_offset, bit_sz, &print_num);
+ if (err)
+ return err;
+
+ btf_dump_type_values(d, "0x%llx", (unsigned long long)print_num);
+
+ return 0;
+}
+
+/* ints, floats and ptrs */
+static int btf_dump_base_type_check_zero(struct btf_dump *d,
+ const struct btf_type *t,
+ __u32 id,
+ const void *data)
+{
+ static __u8 bytecmp[16] = {};
+ int nr_bytes;
+
+ /* For pointer types, pointer size is not defined on a per-type basis.
+ * On dump creation however, we store the pointer size.
+ */
+ if (btf_kind(t) == BTF_KIND_PTR)
+ nr_bytes = d->ptr_sz;
+ else
+ nr_bytes = t->size;
+
+ if (nr_bytes < 1 || nr_bytes > 16) {
+ pr_warn("unexpected size %d for id [%u]\n", nr_bytes, id);
+ return -EINVAL;
+ }
+
+ if (memcmp(data, bytecmp, nr_bytes) == 0)
+ return -ENODATA;
+ return 0;
+}
+
+static bool ptr_is_aligned(const void *data, int data_sz)
+{
+ return ((uintptr_t)data) % data_sz == 0;
+}
+
+static int btf_dump_int_data(struct btf_dump *d,
+ const struct btf_type *t,
+ __u32 type_id,
+ const void *data,
+ __u8 bits_offset)
+{
+ __u8 encoding = btf_int_encoding(t);
+ bool sign = encoding & BTF_INT_SIGNED;
+ int sz = t->size;
+
+ if (sz == 0) {
+ pr_warn("unexpected size %d for id [%u]\n", sz, type_id);
+ return -EINVAL;
+ }
+
+ /* handle packed int data - accesses of integers not aligned on
+ * int boundaries can cause problems on some platforms.
+ */
+ if (!ptr_is_aligned(data, sz))
+ return btf_dump_bitfield_data(d, t, data, 0, 0);
+
+ switch (sz) {
+ case 16: {
+ const __u64 *ints = data;
+ __u64 lsi, msi;
+
+ /* avoid use of __int128 as some 32-bit platforms do not
+ * support it.
+ */
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ lsi = ints[0];
+ msi = ints[1];
+#elif __BYTE_ORDER == __BIG_ENDIAN
+ lsi = ints[1];
+ msi = ints[0];
+#else
+# error "Unrecognized __BYTE_ORDER__"
+#endif
+ if (msi == 0)
+ btf_dump_type_values(d, "0x%llx", (unsigned long long)lsi);
+ else
+ btf_dump_type_values(d, "0x%llx%016llx", (unsigned long long)msi,
+ (unsigned long long)lsi);
+ break;
+ }
+ case 8:
+ if (sign)
+ btf_dump_type_values(d, "%lld", *(long long *)data);
+ else
+ btf_dump_type_values(d, "%llu", *(unsigned long long *)data);
+ break;
+ case 4:
+ if (sign)
+ btf_dump_type_values(d, "%d", *(__s32 *)data);
+ else
+ btf_dump_type_values(d, "%u", *(__u32 *)data);
+ break;
+ case 2:
+ if (sign)
+ btf_dump_type_values(d, "%d", *(__s16 *)data);
+ else
+ btf_dump_type_values(d, "%u", *(__u16 *)data);
+ break;
+ case 1:
+ if (d->typed_dump->is_array_char) {
+ /* check for null terminator */
+ if (d->typed_dump->is_array_terminated)
+ break;
+ if (*(char *)data == '\0') {
+ d->typed_dump->is_array_terminated = true;
+ break;
+ }
+ if (isprint(*(char *)data)) {
+ btf_dump_type_values(d, "'%c'", *(char *)data);
+ break;
+ }
+ }
+ if (sign)
+ btf_dump_type_values(d, "%d", *(__s8 *)data);
+ else
+ btf_dump_type_values(d, "%u", *(__u8 *)data);
+ break;
+ default:
+ pr_warn("unexpected sz %d for id [%u]\n", sz, type_id);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+union float_data {
+ long double ld;
+ double d;
+ float f;
+};
+
+static int btf_dump_float_data(struct btf_dump *d,
+ const struct btf_type *t,
+ __u32 type_id,
+ const void *data)
+{
+ const union float_data *flp = data;
+ union float_data fl;
+ int sz = t->size;
+
+ /* handle unaligned data; copy to local union */
+ if (!ptr_is_aligned(data, sz)) {
+ memcpy(&fl, data, sz);
+ flp = &fl;
+ }
+
+ switch (sz) {
+ case 16:
+ btf_dump_type_values(d, "%Lf", flp->ld);
+ break;
+ case 8:
+ btf_dump_type_values(d, "%lf", flp->d);
+ break;
+ case 4:
+ btf_dump_type_values(d, "%f", flp->f);
+ break;
+ default:
+ pr_warn("unexpected size %d for id [%u]\n", sz, type_id);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int btf_dump_var_data(struct btf_dump *d,
+ const struct btf_type *v,
+ __u32 id,
+ const void *data)
+{
+ enum btf_func_linkage linkage = btf_var(v)->linkage;
+ const struct btf_type *t;
+ const char *l;
+ __u32 type_id;
+
+ switch (linkage) {
+ case BTF_FUNC_STATIC:
+ l = "static ";
+ break;
+ case BTF_FUNC_EXTERN:
+ l = "extern ";
+ break;
+ case BTF_FUNC_GLOBAL:
+ default:
+ l = "";
+ break;
+ }
+
+ /* format of output here is [linkage] [type] [varname] = (type)value,
+ * for example "static int cpu_profile_flip = (int)1"
+ */
+ btf_dump_printf(d, "%s", l);
+ type_id = v->type;
+ t = btf__type_by_id(d->btf, type_id);
+ btf_dump_emit_type_cast(d, type_id, false);
+ btf_dump_printf(d, " %s = ", btf_name_of(d, v->name_off));
+ return btf_dump_dump_type_data(d, NULL, t, type_id, data, 0, 0);
+}
+
+static int btf_dump_array_data(struct btf_dump *d,
+ const struct btf_type *t,
+ __u32 id,
+ const void *data)
+{
+ const struct btf_array *array = btf_array(t);
+ const struct btf_type *elem_type;
+ __u32 i, elem_size = 0, elem_type_id;
+ bool is_array_member;
+
+ elem_type_id = array->type;
+ elem_type = skip_mods_and_typedefs(d->btf, elem_type_id, NULL);
+ elem_size = btf__resolve_size(d->btf, elem_type_id);
+ if (elem_size <= 0) {
+ pr_warn("unexpected elem size %d for array type [%u]\n", elem_size, id);
+ return -EINVAL;
+ }
+
+ if (btf_is_int(elem_type)) {
+ /*
+ * BTF_INT_CHAR encoding never seems to be set for
+ * char arrays, so if size is 1 and element is
+ * printable as a char, we'll do that.
+ */
+ if (elem_size == 1)
+ d->typed_dump->is_array_char = true;
+ }
+
+ /* note that we increment depth before calling btf_dump_print() below;
+ * this is intentional. btf_dump_data_newline() will not print a
+ * newline for depth 0 (since this leaves us with trailing newlines
+ * at the end of typed display), so depth is incremented first.
+ * For similar reasons, we decrement depth before showing the closing
+ * parenthesis.
+ */
+ d->typed_dump->depth++;
+ btf_dump_printf(d, "[%s", btf_dump_data_newline(d));
+
+ /* may be a multidimensional array, so store current "is array member"
+ * status so we can restore it correctly later.
+ */
+ is_array_member = d->typed_dump->is_array_member;
+ d->typed_dump->is_array_member = true;
+ for (i = 0; i < array->nelems; i++, data += elem_size) {
+ if (d->typed_dump->is_array_terminated)
+ break;
+ btf_dump_dump_type_data(d, NULL, elem_type, elem_type_id, data, 0, 0);
+ }
+ d->typed_dump->is_array_member = is_array_member;
+ d->typed_dump->depth--;
+ btf_dump_data_pfx(d);
+ btf_dump_type_values(d, "]");
+
+ return 0;
+}
+
+static int btf_dump_struct_data(struct btf_dump *d,
+ const struct btf_type *t,
+ __u32 id,
+ const void *data)
+{
+ const struct btf_member *m = btf_members(t);
+ __u16 n = btf_vlen(t);
+ int i, err;
+
+ /* note that we increment depth before calling btf_dump_print() below;
+ * this is intentional. btf_dump_data_newline() will not print a
+ * newline for depth 0 (since this leaves us with trailing newlines
+ * at the end of typed display), so depth is incremented first.
+ * For similar reasons, we decrement depth before showing the closing
+ * parenthesis.
+ */
+ d->typed_dump->depth++;
+ btf_dump_printf(d, "{%s", btf_dump_data_newline(d));
+
+ for (i = 0; i < n; i++, m++) {
+ const struct btf_type *mtype;
+ const char *mname;
+ __u32 moffset;
+ __u8 bit_sz;
+
+ mtype = btf__type_by_id(d->btf, m->type);
+ mname = btf_name_of(d, m->name_off);
+ moffset = btf_member_bit_offset(t, i);
+
+ bit_sz = btf_member_bitfield_size(t, i);
+ err = btf_dump_dump_type_data(d, mname, mtype, m->type, data + moffset / 8,
+ moffset % 8, bit_sz);
+ if (err < 0)
+ return err;
+ }
+ d->typed_dump->depth--;
+ btf_dump_data_pfx(d);
+ btf_dump_type_values(d, "}");
+ return err;
+}
+
+union ptr_data {
+ unsigned int p;
+ unsigned long long lp;
+};
+
+static int btf_dump_ptr_data(struct btf_dump *d,
+ const struct btf_type *t,
+ __u32 id,
+ const void *data)
+{
+ if (ptr_is_aligned(data, d->ptr_sz) && d->ptr_sz == sizeof(void *)) {
+ btf_dump_type_values(d, "%p", *(void **)data);
+ } else {
+ union ptr_data pt;
+
+ memcpy(&pt, data, d->ptr_sz);
+ if (d->ptr_sz == 4)
+ btf_dump_type_values(d, "0x%x", pt.p);
+ else
+ btf_dump_type_values(d, "0x%llx", pt.lp);
+ }
+ return 0;
+}
+
+static int btf_dump_get_enum_value(struct btf_dump *d,
+ const struct btf_type *t,
+ const void *data,
+ __u32 id,
+ __s64 *value)
+{
+ int sz = t->size;
+
+ /* handle unaligned enum value */
+ if (!ptr_is_aligned(data, sz)) {
+ __u64 val;
+ int err;
+
+ err = btf_dump_get_bitfield_value(d, t, data, 0, 0, &val);
+ if (err)
+ return err;
+ *value = (__s64)val;
+ return 0;
+ }
+
+ switch (t->size) {
+ case 8:
+ *value = *(__s64 *)data;
+ return 0;
+ case 4:
+ *value = *(__s32 *)data;
+ return 0;
+ case 2:
+ *value = *(__s16 *)data;
+ return 0;
+ case 1:
+ *value = *(__s8 *)data;
+ return 0;
+ default:
+ pr_warn("unexpected size %d for enum, id:[%u]\n", t->size, id);
+ return -EINVAL;
+ }
+}
+
+static int btf_dump_enum_data(struct btf_dump *d,
+ const struct btf_type *t,
+ __u32 id,
+ const void *data)
+{
+ const struct btf_enum *e;
+ __s64 value;
+ int i, err;
+
+ err = btf_dump_get_enum_value(d, t, data, id, &value);
+ if (err)
+ return err;
+
+ for (i = 0, e = btf_enum(t); i < btf_vlen(t); i++, e++) {
+ if (value != e->val)
+ continue;
+ btf_dump_type_values(d, "%s", btf_name_of(d, e->name_off));
+ return 0;
+ }
+
+ btf_dump_type_values(d, "%d", value);
+ return 0;
+}
+
+static int btf_dump_datasec_data(struct btf_dump *d,
+ const struct btf_type *t,
+ __u32 id,
+ const void *data)
+{
+ const struct btf_var_secinfo *vsi;
+ const struct btf_type *var;
+ __u32 i;
+ int err;
+
+ btf_dump_type_values(d, "SEC(\"%s\") ", btf_name_of(d, t->name_off));
+
+ for (i = 0, vsi = btf_var_secinfos(t); i < btf_vlen(t); i++, vsi++) {
+ var = btf__type_by_id(d->btf, vsi->type);
+ err = btf_dump_dump_type_data(d, NULL, var, vsi->type, data + vsi->offset, 0, 0);
+ if (err < 0)
+ return err;
+ btf_dump_printf(d, ";");
+ }
+ return 0;
+}
+
+/* return size of type, or if base type overflows, return -E2BIG. */
+static int btf_dump_type_data_check_overflow(struct btf_dump *d,
+ const struct btf_type *t,
+ __u32 id,
+ const void *data,
+ __u8 bits_offset)
+{
+ __s64 size = btf__resolve_size(d->btf, id);
+
+ if (size < 0 || size >= INT_MAX) {
+ pr_warn("unexpected size [%zu] for id [%u]\n",
+ (size_t)size, id);
+ return -EINVAL;
+ }
+
+ /* Only do overflow checking for base types; we do not want to
+ * avoid showing part of a struct, union or array, even if we
+ * do not have enough data to show the full object. By
+ * restricting overflow checking to base types we can ensure
+ * that partial display succeeds, while avoiding overflowing
+ * and using bogus data for display.
+ */
+ t = skip_mods_and_typedefs(d->btf, id, NULL);
+ if (!t) {
+ pr_warn("unexpected error skipping mods/typedefs for id [%u]\n",
+ id);
+ return -EINVAL;
+ }
+
+ switch (btf_kind(t)) {
+ case BTF_KIND_INT:
+ case BTF_KIND_FLOAT:
+ case BTF_KIND_PTR:
+ case BTF_KIND_ENUM:
+ if (data + bits_offset / 8 + size > d->typed_dump->data_end)
+ return -E2BIG;
+ break;
+ default:
+ break;
+ }
+ return (int)size;
+}
+
+static int btf_dump_type_data_check_zero(struct btf_dump *d,
+ const struct btf_type *t,
+ __u32 id,
+ const void *data,
+ __u8 bits_offset,
+ __u8 bit_sz)
+{
+ __s64 value;
+ int i, err;
+
+ /* toplevel exceptions; we show zero values if
+ * - we ask for them (emit_zeros)
+ * - if we are at top-level so we see "struct empty { }"
+ * - or if we are an array member and the array is non-empty and
+ * not a char array; we don't want to be in a situation where we
+ * have an integer array 0, 1, 0, 1 and only show non-zero values.
+ * If the array contains zeroes only, or is a char array starting
+ * with a '\0', the array-level check_zero() will prevent showing it;
+ * we are concerned with determining zero value at the array member
+ * level here.
+ */
+ if (d->typed_dump->emit_zeroes || d->typed_dump->depth == 0 ||
+ (d->typed_dump->is_array_member &&
+ !d->typed_dump->is_array_char))
+ return 0;
+
+ t = skip_mods_and_typedefs(d->btf, id, NULL);
+
+ switch (btf_kind(t)) {
+ case BTF_KIND_INT:
+ if (bit_sz)
+ return btf_dump_bitfield_check_zero(d, t, data, bits_offset, bit_sz);
+ return btf_dump_base_type_check_zero(d, t, id, data);
+ case BTF_KIND_FLOAT:
+ case BTF_KIND_PTR:
+ return btf_dump_base_type_check_zero(d, t, id, data);
+ case BTF_KIND_ARRAY: {
+ const struct btf_array *array = btf_array(t);
+ const struct btf_type *elem_type;
+ __u32 elem_type_id, elem_size;
+ bool ischar;
+
+ elem_type_id = array->type;
+ elem_size = btf__resolve_size(d->btf, elem_type_id);
+ elem_type = skip_mods_and_typedefs(d->btf, elem_type_id, NULL);
+
+ ischar = btf_is_int(elem_type) && elem_size == 1;
+
+ /* check all elements; if _any_ element is nonzero, all
+ * of array is displayed. We make an exception however
+ * for char arrays where the first element is 0; these
+ * are considered zeroed also, even if later elements are
+ * non-zero because the string is terminated.
+ */
+ for (i = 0; i < array->nelems; i++) {
+ if (i == 0 && ischar && *(char *)data == 0)
+ return -ENODATA;
+ err = btf_dump_type_data_check_zero(d, elem_type,
+ elem_type_id,
+ data +
+ (i * elem_size),
+ bits_offset, 0);
+ if (err != -ENODATA)
+ return err;
+ }
+ return -ENODATA;
+ }
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION: {
+ const struct btf_member *m = btf_members(t);
+ __u16 n = btf_vlen(t);
+
+ /* if any struct/union member is non-zero, the struct/union
+ * is considered non-zero and dumped.
+ */
+ for (i = 0; i < n; i++, m++) {
+ const struct btf_type *mtype;
+ __u32 moffset;
+
+ mtype = btf__type_by_id(d->btf, m->type);
+ moffset = btf_member_bit_offset(t, i);
+
+ /* btf_int_bits() does not store member bitfield size;
+ * bitfield size needs to be stored here so int display
+ * of member can retrieve it.
+ */
+ bit_sz = btf_member_bitfield_size(t, i);
+ err = btf_dump_type_data_check_zero(d, mtype, m->type, data + moffset / 8,
+ moffset % 8, bit_sz);
+ if (err != ENODATA)
+ return err;
+ }
+ return -ENODATA;
+ }
+ case BTF_KIND_ENUM:
+ err = btf_dump_get_enum_value(d, t, data, id, &value);
+ if (err)
+ return err;
+ if (value == 0)
+ return -ENODATA;
+ return 0;
+ default:
+ return 0;
+ }
+}
+
+/* returns size of data dumped, or error. */
+static int btf_dump_dump_type_data(struct btf_dump *d,
+ const char *fname,
+ const struct btf_type *t,
+ __u32 id,
+ const void *data,
+ __u8 bits_offset,
+ __u8 bit_sz)
+{
+ int size, err;
+
+ size = btf_dump_type_data_check_overflow(d, t, id, data, bits_offset);
+ if (size < 0)
+ return size;
+ err = btf_dump_type_data_check_zero(d, t, id, data, bits_offset, bit_sz);
+ if (err) {
+ /* zeroed data is expected and not an error, so simply skip
+ * dumping such data. Record other errors however.
+ */
+ if (err == -ENODATA)
+ return size;
+ return err;
+ }
+ btf_dump_data_pfx(d);
+
+ if (!d->typed_dump->skip_names) {
+ if (fname && strlen(fname) > 0)
+ btf_dump_printf(d, ".%s = ", fname);
+ btf_dump_emit_type_cast(d, id, true);
+ }
+
+ t = skip_mods_and_typedefs(d->btf, id, NULL);
+
+ switch (btf_kind(t)) {
+ case BTF_KIND_UNKN:
+ case BTF_KIND_FWD:
+ case BTF_KIND_FUNC:
+ case BTF_KIND_FUNC_PROTO:
+ err = btf_dump_unsupported_data(d, t, id);
+ break;
+ case BTF_KIND_INT:
+ if (bit_sz)
+ err = btf_dump_bitfield_data(d, t, data, bits_offset, bit_sz);
+ else
+ err = btf_dump_int_data(d, t, id, data, bits_offset);
+ break;
+ case BTF_KIND_FLOAT:
+ err = btf_dump_float_data(d, t, id, data);
+ break;
+ case BTF_KIND_PTR:
+ err = btf_dump_ptr_data(d, t, id, data);
+ break;
+ case BTF_KIND_ARRAY:
+ err = btf_dump_array_data(d, t, id, data);
+ break;
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION:
+ err = btf_dump_struct_data(d, t, id, data);
+ break;
+ case BTF_KIND_ENUM:
+ /* handle bitfield and int enum values */
+ if (bit_sz) {
+ __u64 print_num;
+ __s64 enum_val;
+
+ err = btf_dump_get_bitfield_value(d, t, data, bits_offset, bit_sz,
+ &print_num);
+ if (err)
+ break;
+ enum_val = (__s64)print_num;
+ err = btf_dump_enum_data(d, t, id, &enum_val);
+ } else
+ err = btf_dump_enum_data(d, t, id, data);
+ break;
+ case BTF_KIND_VAR:
+ err = btf_dump_var_data(d, t, id, data);
+ break;
+ case BTF_KIND_DATASEC:
+ err = btf_dump_datasec_data(d, t, id, data);
+ break;
+ default:
+ pr_warn("unexpected kind [%u] for id [%u]\n",
+ BTF_INFO_KIND(t->info), id);
+ return -EINVAL;
+ }
+ if (err < 0)
+ return err;
+ return size;
+}
+
+int btf_dump__dump_type_data(struct btf_dump *d, __u32 id,
+ const void *data, size_t data_sz,
+ const struct btf_dump_type_data_opts *opts)
+{
+ struct btf_dump_data typed_dump = {};
+ const struct btf_type *t;
+ int ret;
+
+ if (!OPTS_VALID(opts, btf_dump_type_data_opts))
+ return libbpf_err(-EINVAL);
+
+ t = btf__type_by_id(d->btf, id);
+ if (!t)
+ return libbpf_err(-ENOENT);
+
+ d->typed_dump = &typed_dump;
+ d->typed_dump->data_end = data + data_sz;
+ d->typed_dump->indent_lvl = OPTS_GET(opts, indent_level, 0);
+
+ /* default indent string is a tab */
+ if (!opts->indent_str)
+ d->typed_dump->indent_str[0] = '\t';
+ else
+ strncat(d->typed_dump->indent_str, opts->indent_str,
+ sizeof(d->typed_dump->indent_str) - 1);
+
+ d->typed_dump->compact = OPTS_GET(opts, compact, false);
+ d->typed_dump->skip_names = OPTS_GET(opts, skip_names, false);
+ d->typed_dump->emit_zeroes = OPTS_GET(opts, emit_zeroes, false);
+
+ ret = btf_dump_dump_type_data(d, NULL, t, id, data, 0, 0);
+
+ d->typed_dump = NULL;
+
+ return libbpf_err(ret);
+}
* it at load time.
*/
struct btf *btf_vmlinux;
+ /* Path to the custom BTF to be used for BPF CO-RE relocations as an
+ * override for vmlinux BTF.
+ */
+ char *btf_custom_path;
/* vmlinux BTF override for CO-RE relocations */
struct btf *btf_vmlinux_override;
/* Lazily initialized kernel module BTFs */
insn->off == 0;
}
-static bool is_ldimm64_insn(struct bpf_insn *insn)
-{
- return insn->code == (BPF_LD | BPF_IMM | BPF_DW);
-}
-
static bool is_call_insn(const struct bpf_insn *insn)
{
return insn->code == (BPF_JMP | BPF_CALL);
struct bpf_program *prog;
int i;
- /* CO-RE relocations need kernel BTF */
- if (obj->btf_ext && obj->btf_ext->core_relo_info.len)
+ /* CO-RE relocations need kernel BTF, only when btf_custom_path
+ * is not specified
+ */
+ if (obj->btf_ext && obj->btf_ext->core_relo_info.len && !obj->btf_custom_path)
return true;
/* Support for typed ksyms needs kernel BTF */
if (!force && !obj_needs_vmlinux_btf(obj))
return 0;
- obj->btf_vmlinux = libbpf_find_kernel_btf();
+ obj->btf_vmlinux = btf__load_vmlinux_btf();
err = libbpf_get_error(obj->btf_vmlinux);
if (err) {
pr_warn("Error loading vmlinux BTF: %d\n", err);
*/
btf__set_fd(kern_btf, 0);
} else {
- err = btf__load(kern_btf);
+ err = btf__load_into_kernel(kern_btf);
}
if (sanitize) {
if (!err) {
{
struct bpf_create_map_attr create_attr;
struct bpf_map_def *def = &map->def;
+ int err = 0;
memset(&create_attr, 0, sizeof(create_attr));
if (bpf_map_type__is_map_in_map(def->type)) {
if (map->inner_map) {
- int err;
-
err = bpf_object__create_map(obj, map->inner_map, true);
if (err) {
pr_warn("map '%s': failed to create inner map: %d\n",
if (map->fd < 0 && (create_attr.btf_key_type_id ||
create_attr.btf_value_type_id)) {
char *cp, errmsg[STRERR_BUFSIZE];
- int err = -errno;
+ err = -errno;
cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
pr_warn("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n",
map->name, cp, err);
map->fd = bpf_create_map_xattr(&create_attr);
}
- if (map->fd < 0)
- return -errno;
+ err = map->fd < 0 ? -errno : 0;
if (bpf_map_type__is_map_in_map(def->type) && map->inner_map) {
if (obj->gen_loader)
zfree(&map->inner_map);
}
- return 0;
+ return err;
}
static int init_map_slots(struct bpf_object *obj, struct bpf_map *map)
char *cp, errmsg[STRERR_BUFSIZE];
unsigned int i, j;
int err;
+ bool retried;
for (i = 0; i < obj->nr_maps; i++) {
map = &obj->maps[i];
+ retried = false;
+retry:
if (map->pin_path) {
err = bpf_object__reuse_map(map);
if (err) {
map->name);
goto err_out;
}
+ if (retried && map->fd < 0) {
+ pr_warn("map '%s': cannot find pinned map\n",
+ map->name);
+ err = -ENOENT;
+ goto err_out;
+ }
}
if (map->fd >= 0) {
if (map->pin_path && !map->pinned) {
err = bpf_map__pin(map, NULL);
if (err) {
+ zclose(map->fd);
+ if (!retried && err == -EEXIST) {
+ retried = true;
+ goto retry;
+ }
pr_warn("map '%s': failed to auto-pin at '%s': %d\n",
map->name, map->pin_path, err);
- zclose(map->fd);
goto err_out;
}
}
return err;
}
-#define BPF_CORE_SPEC_MAX_LEN 64
-
-/* represents BPF CO-RE field or array element accessor */
-struct bpf_core_accessor {
- __u32 type_id; /* struct/union type or array element type */
- __u32 idx; /* field index or array index */
- const char *name; /* field name or NULL for array accessor */
-};
-
-struct bpf_core_spec {
- const struct btf *btf;
- /* high-level spec: named fields and array indices only */
- struct bpf_core_accessor spec[BPF_CORE_SPEC_MAX_LEN];
- /* original unresolved (no skip_mods_or_typedefs) root type ID */
- __u32 root_type_id;
- /* CO-RE relocation kind */
- enum bpf_core_relo_kind relo_kind;
- /* high-level spec length */
- int len;
- /* raw, low-level spec: 1-to-1 with accessor spec string */
- int raw_spec[BPF_CORE_SPEC_MAX_LEN];
- /* raw spec length */
- int raw_len;
- /* field bit offset represented by spec */
- __u32 bit_offset;
-};
-
-static bool str_is_empty(const char *s)
-{
- return !s || !s[0];
-}
-
-static bool is_flex_arr(const struct btf *btf,
- const struct bpf_core_accessor *acc,
- const struct btf_array *arr)
-{
- const struct btf_type *t;
-
- /* not a flexible array, if not inside a struct or has non-zero size */
- if (!acc->name || arr->nelems > 0)
- return false;
-
- /* has to be the last member of enclosing struct */
- t = btf__type_by_id(btf, acc->type_id);
- return acc->idx == btf_vlen(t) - 1;
-}
-
-static const char *core_relo_kind_str(enum bpf_core_relo_kind kind)
-{
- switch (kind) {
- case BPF_FIELD_BYTE_OFFSET: return "byte_off";
- case BPF_FIELD_BYTE_SIZE: return "byte_sz";
- case BPF_FIELD_EXISTS: return "field_exists";
- case BPF_FIELD_SIGNED: return "signed";
- case BPF_FIELD_LSHIFT_U64: return "lshift_u64";
- case BPF_FIELD_RSHIFT_U64: return "rshift_u64";
- case BPF_TYPE_ID_LOCAL: return "local_type_id";
- case BPF_TYPE_ID_TARGET: return "target_type_id";
- case BPF_TYPE_EXISTS: return "type_exists";
- case BPF_TYPE_SIZE: return "type_size";
- case BPF_ENUMVAL_EXISTS: return "enumval_exists";
- case BPF_ENUMVAL_VALUE: return "enumval_value";
- default: return "unknown";
- }
-}
-
-static bool core_relo_is_field_based(enum bpf_core_relo_kind kind)
-{
- switch (kind) {
- case BPF_FIELD_BYTE_OFFSET:
- case BPF_FIELD_BYTE_SIZE:
- case BPF_FIELD_EXISTS:
- case BPF_FIELD_SIGNED:
- case BPF_FIELD_LSHIFT_U64:
- case BPF_FIELD_RSHIFT_U64:
- return true;
- default:
- return false;
- }
-}
-
-static bool core_relo_is_type_based(enum bpf_core_relo_kind kind)
-{
- switch (kind) {
- case BPF_TYPE_ID_LOCAL:
- case BPF_TYPE_ID_TARGET:
- case BPF_TYPE_EXISTS:
- case BPF_TYPE_SIZE:
- return true;
- default:
- return false;
- }
-}
-
-static bool core_relo_is_enumval_based(enum bpf_core_relo_kind kind)
-{
- switch (kind) {
- case BPF_ENUMVAL_EXISTS:
- case BPF_ENUMVAL_VALUE:
- return true;
- default:
- return false;
- }
-}
-
-/*
- * Turn bpf_core_relo into a low- and high-level spec representation,
- * validating correctness along the way, as well as calculating resulting
- * field bit offset, specified by accessor string. Low-level spec captures
- * every single level of nestedness, including traversing anonymous
- * struct/union members. High-level one only captures semantically meaningful
- * "turning points": named fields and array indicies.
- * E.g., for this case:
- *
- * struct sample {
- * int __unimportant;
- * struct {
- * int __1;
- * int __2;
- * int a[7];
- * };
- * };
- *
- * struct sample *s = ...;
- *
- * int x = &s->a[3]; // access string = '0:1:2:3'
- *
- * Low-level spec has 1:1 mapping with each element of access string (it's
- * just a parsed access string representation): [0, 1, 2, 3].
- *
- * High-level spec will capture only 3 points:
- * - intial zero-index access by pointer (&s->... is the same as &s[0]...);
- * - field 'a' access (corresponds to '2' in low-level spec);
- * - array element #3 access (corresponds to '3' in low-level spec).
- *
- * Type-based relocations (TYPE_EXISTS/TYPE_SIZE,
- * TYPE_ID_LOCAL/TYPE_ID_TARGET) don't capture any field information. Their
- * spec and raw_spec are kept empty.
- *
- * Enum value-based relocations (ENUMVAL_EXISTS/ENUMVAL_VALUE) use access
- * string to specify enumerator's value index that need to be relocated.
- */
-static int bpf_core_parse_spec(const struct btf *btf,
- __u32 type_id,
- const char *spec_str,
- enum bpf_core_relo_kind relo_kind,
- struct bpf_core_spec *spec)
-{
- int access_idx, parsed_len, i;
- struct bpf_core_accessor *acc;
- const struct btf_type *t;
- const char *name;
- __u32 id;
- __s64 sz;
-
- if (str_is_empty(spec_str) || *spec_str == ':')
- return -EINVAL;
-
- memset(spec, 0, sizeof(*spec));
- spec->btf = btf;
- spec->root_type_id = type_id;
- spec->relo_kind = relo_kind;
-
- /* type-based relocations don't have a field access string */
- if (core_relo_is_type_based(relo_kind)) {
- if (strcmp(spec_str, "0"))
- return -EINVAL;
- return 0;
- }
-
- /* parse spec_str="0:1:2:3:4" into array raw_spec=[0, 1, 2, 3, 4] */
- while (*spec_str) {
- if (*spec_str == ':')
- ++spec_str;
- if (sscanf(spec_str, "%d%n", &access_idx, &parsed_len) != 1)
- return -EINVAL;
- if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
- return -E2BIG;
- spec_str += parsed_len;
- spec->raw_spec[spec->raw_len++] = access_idx;
- }
-
- if (spec->raw_len == 0)
- return -EINVAL;
-
- t = skip_mods_and_typedefs(btf, type_id, &id);
- if (!t)
- return -EINVAL;
-
- access_idx = spec->raw_spec[0];
- acc = &spec->spec[0];
- acc->type_id = id;
- acc->idx = access_idx;
- spec->len++;
-
- if (core_relo_is_enumval_based(relo_kind)) {
- if (!btf_is_enum(t) || spec->raw_len > 1 || access_idx >= btf_vlen(t))
- return -EINVAL;
-
- /* record enumerator name in a first accessor */
- acc->name = btf__name_by_offset(btf, btf_enum(t)[access_idx].name_off);
- return 0;
- }
-
- if (!core_relo_is_field_based(relo_kind))
- return -EINVAL;
-
- sz = btf__resolve_size(btf, id);
- if (sz < 0)
- return sz;
- spec->bit_offset = access_idx * sz * 8;
-
- for (i = 1; i < spec->raw_len; i++) {
- t = skip_mods_and_typedefs(btf, id, &id);
- if (!t)
- return -EINVAL;
-
- access_idx = spec->raw_spec[i];
- acc = &spec->spec[spec->len];
-
- if (btf_is_composite(t)) {
- const struct btf_member *m;
- __u32 bit_offset;
-
- if (access_idx >= btf_vlen(t))
- return -EINVAL;
-
- bit_offset = btf_member_bit_offset(t, access_idx);
- spec->bit_offset += bit_offset;
-
- m = btf_members(t) + access_idx;
- if (m->name_off) {
- name = btf__name_by_offset(btf, m->name_off);
- if (str_is_empty(name))
- return -EINVAL;
-
- acc->type_id = id;
- acc->idx = access_idx;
- acc->name = name;
- spec->len++;
- }
-
- id = m->type;
- } else if (btf_is_array(t)) {
- const struct btf_array *a = btf_array(t);
- bool flex;
-
- t = skip_mods_and_typedefs(btf, a->type, &id);
- if (!t)
- return -EINVAL;
-
- flex = is_flex_arr(btf, acc - 1, a);
- if (!flex && access_idx >= a->nelems)
- return -EINVAL;
-
- spec->spec[spec->len].type_id = id;
- spec->spec[spec->len].idx = access_idx;
- spec->len++;
-
- sz = btf__resolve_size(btf, id);
- if (sz < 0)
- return sz;
- spec->bit_offset += access_idx * sz * 8;
- } else {
- pr_warn("relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %s\n",
- type_id, spec_str, i, id, btf_kind_str(t));
- return -EINVAL;
- }
- }
-
- return 0;
-}
-
static bool bpf_core_is_flavor_sep(const char *s)
{
/* check X___Y name pattern, where X and Y are not underscores */
* before last triple underscore. Struct name part after last triple
* underscore is ignored by BPF CO-RE relocation during relocation matching.
*/
-static size_t bpf_core_essential_name_len(const char *name)
+size_t bpf_core_essential_name_len(const char *name)
{
size_t n = strlen(name);
int i;
return n;
}
-struct core_cand
-{
- const struct btf *btf;
- const struct btf_type *t;
- const char *name;
- __u32 id;
-};
-
-/* dynamically sized list of type IDs and its associated struct btf */
-struct core_cand_list {
- struct core_cand *cands;
- int len;
-};
-
-static void bpf_core_free_cands(struct core_cand_list *cands)
+static void bpf_core_free_cands(struct bpf_core_cand_list *cands)
{
free(cands->cands);
free(cands);
}
-static int bpf_core_add_cands(struct core_cand *local_cand,
+static int bpf_core_add_cands(struct bpf_core_cand *local_cand,
size_t local_essent_len,
const struct btf *targ_btf,
const char *targ_btf_name,
int targ_start_id,
- struct core_cand_list *cands)
+ struct bpf_core_cand_list *cands)
{
- struct core_cand *new_cands, *cand;
+ struct bpf_core_cand *new_cands, *cand;
const struct btf_type *t;
const char *targ_name;
size_t targ_essent_len;
return 0;
}
-static struct core_cand_list *
+static struct bpf_core_cand_list *
bpf_core_find_cands(struct bpf_object *obj, const struct btf *local_btf, __u32 local_type_id)
{
- struct core_cand local_cand = {};
- struct core_cand_list *cands;
+ struct bpf_core_cand local_cand = {};
+ struct bpf_core_cand_list *cands;
const struct btf *main_btf;
size_t local_essent_len;
int err, i;
return ERR_PTR(err);
}
-/* Check two types for compatibility for the purpose of field access
- * relocation. const/volatile/restrict and typedefs are skipped to ensure we
- * are relocating semantically compatible entities:
- * - any two STRUCTs/UNIONs are compatible and can be mixed;
- * - any two FWDs are compatible, if their names match (modulo flavor suffix);
- * - any two PTRs are always compatible;
- * - for ENUMs, names should be the same (ignoring flavor suffix) or at
- * least one of enums should be anonymous;
- * - for ENUMs, check sizes, names are ignored;
- * - for INT, size and signedness are ignored;
- * - any two FLOATs are always compatible;
- * - for ARRAY, dimensionality is ignored, element types are checked for
- * compatibility recursively;
- * - everything else shouldn't be ever a target of relocation.
- * These rules are not set in stone and probably will be adjusted as we get
- * more experience with using BPF CO-RE relocations.
- */
-static int bpf_core_fields_are_compat(const struct btf *local_btf,
- __u32 local_id,
- const struct btf *targ_btf,
- __u32 targ_id)
-{
- const struct btf_type *local_type, *targ_type;
-
-recur:
- local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id);
- targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
- if (!local_type || !targ_type)
- return -EINVAL;
-
- if (btf_is_composite(local_type) && btf_is_composite(targ_type))
- return 1;
- if (btf_kind(local_type) != btf_kind(targ_type))
- return 0;
-
- switch (btf_kind(local_type)) {
- case BTF_KIND_PTR:
- case BTF_KIND_FLOAT:
- return 1;
- case BTF_KIND_FWD:
- case BTF_KIND_ENUM: {
- const char *local_name, *targ_name;
- size_t local_len, targ_len;
-
- local_name = btf__name_by_offset(local_btf,
- local_type->name_off);
- targ_name = btf__name_by_offset(targ_btf, targ_type->name_off);
- local_len = bpf_core_essential_name_len(local_name);
- targ_len = bpf_core_essential_name_len(targ_name);
- /* one of them is anonymous or both w/ same flavor-less names */
- return local_len == 0 || targ_len == 0 ||
- (local_len == targ_len &&
- strncmp(local_name, targ_name, local_len) == 0);
- }
- case BTF_KIND_INT:
- /* just reject deprecated bitfield-like integers; all other
- * integers are by default compatible between each other
- */
- return btf_int_offset(local_type) == 0 &&
- btf_int_offset(targ_type) == 0;
- case BTF_KIND_ARRAY:
- local_id = btf_array(local_type)->type;
- targ_id = btf_array(targ_type)->type;
- goto recur;
- default:
- pr_warn("unexpected kind %d relocated, local [%d], target [%d]\n",
- btf_kind(local_type), local_id, targ_id);
- return 0;
- }
-}
-
-/*
- * Given single high-level named field accessor in local type, find
- * corresponding high-level accessor for a target type. Along the way,
- * maintain low-level spec for target as well. Also keep updating target
- * bit offset.
- *
- * Searching is performed through recursive exhaustive enumeration of all
- * fields of a struct/union. If there are any anonymous (embedded)
- * structs/unions, they are recursively searched as well. If field with
- * desired name is found, check compatibility between local and target types,
- * before returning result.
- *
- * 1 is returned, if field is found.
- * 0 is returned if no compatible field is found.
- * <0 is returned on error.
- */
-static int bpf_core_match_member(const struct btf *local_btf,
- const struct bpf_core_accessor *local_acc,
- const struct btf *targ_btf,
- __u32 targ_id,
- struct bpf_core_spec *spec,
- __u32 *next_targ_id)
-{
- const struct btf_type *local_type, *targ_type;
- const struct btf_member *local_member, *m;
- const char *local_name, *targ_name;
- __u32 local_id;
- int i, n, found;
-
- targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
- if (!targ_type)
- return -EINVAL;
- if (!btf_is_composite(targ_type))
- return 0;
-
- local_id = local_acc->type_id;
- local_type = btf__type_by_id(local_btf, local_id);
- local_member = btf_members(local_type) + local_acc->idx;
- local_name = btf__name_by_offset(local_btf, local_member->name_off);
-
- n = btf_vlen(targ_type);
- m = btf_members(targ_type);
- for (i = 0; i < n; i++, m++) {
- __u32 bit_offset;
-
- bit_offset = btf_member_bit_offset(targ_type, i);
-
- /* too deep struct/union/array nesting */
- if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
- return -E2BIG;
-
- /* speculate this member will be the good one */
- spec->bit_offset += bit_offset;
- spec->raw_spec[spec->raw_len++] = i;
-
- targ_name = btf__name_by_offset(targ_btf, m->name_off);
- if (str_is_empty(targ_name)) {
- /* embedded struct/union, we need to go deeper */
- found = bpf_core_match_member(local_btf, local_acc,
- targ_btf, m->type,
- spec, next_targ_id);
- if (found) /* either found or error */
- return found;
- } else if (strcmp(local_name, targ_name) == 0) {
- /* matching named field */
- struct bpf_core_accessor *targ_acc;
-
- targ_acc = &spec->spec[spec->len++];
- targ_acc->type_id = targ_id;
- targ_acc->idx = i;
- targ_acc->name = targ_name;
-
- *next_targ_id = m->type;
- found = bpf_core_fields_are_compat(local_btf,
- local_member->type,
- targ_btf, m->type);
- if (!found)
- spec->len--; /* pop accessor */
- return found;
- }
- /* member turned out not to be what we looked for */
- spec->bit_offset -= bit_offset;
- spec->raw_len--;
- }
-
- return 0;
-}
-
/* Check local and target types for compatibility. This check is used for
* type-based CO-RE relocations and follow slightly different rules than
* field-based relocations. This function assumes that root types were already
* These rules are not set in stone and probably will be adjusted as we get
* more experience with using BPF CO-RE relocations.
*/
-static int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
- const struct btf *targ_btf, __u32 targ_id)
+int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
+ const struct btf *targ_btf, __u32 targ_id)
{
const struct btf_type *local_type, *targ_type;
int depth = 32; /* max recursion depth */
}
}
-/*
- * Try to match local spec to a target type and, if successful, produce full
- * target spec (high-level, low-level + bit offset).
- */
-static int bpf_core_spec_match(struct bpf_core_spec *local_spec,
- const struct btf *targ_btf, __u32 targ_id,
- struct bpf_core_spec *targ_spec)
-{
- const struct btf_type *targ_type;
- const struct bpf_core_accessor *local_acc;
- struct bpf_core_accessor *targ_acc;
- int i, sz, matched;
-
- memset(targ_spec, 0, sizeof(*targ_spec));
- targ_spec->btf = targ_btf;
- targ_spec->root_type_id = targ_id;
- targ_spec->relo_kind = local_spec->relo_kind;
-
- if (core_relo_is_type_based(local_spec->relo_kind)) {
- return bpf_core_types_are_compat(local_spec->btf,
- local_spec->root_type_id,
- targ_btf, targ_id);
- }
-
- local_acc = &local_spec->spec[0];
- targ_acc = &targ_spec->spec[0];
-
- if (core_relo_is_enumval_based(local_spec->relo_kind)) {
- size_t local_essent_len, targ_essent_len;
- const struct btf_enum *e;
- const char *targ_name;
-
- /* has to resolve to an enum */
- targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id, &targ_id);
- if (!btf_is_enum(targ_type))
- return 0;
-
- local_essent_len = bpf_core_essential_name_len(local_acc->name);
-
- for (i = 0, e = btf_enum(targ_type); i < btf_vlen(targ_type); i++, e++) {
- targ_name = btf__name_by_offset(targ_spec->btf, e->name_off);
- targ_essent_len = bpf_core_essential_name_len(targ_name);
- if (targ_essent_len != local_essent_len)
- continue;
- if (strncmp(local_acc->name, targ_name, local_essent_len) == 0) {
- targ_acc->type_id = targ_id;
- targ_acc->idx = i;
- targ_acc->name = targ_name;
- targ_spec->len++;
- targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx;
- targ_spec->raw_len++;
- return 1;
- }
- }
- return 0;
- }
-
- if (!core_relo_is_field_based(local_spec->relo_kind))
- return -EINVAL;
-
- for (i = 0; i < local_spec->len; i++, local_acc++, targ_acc++) {
- targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id,
- &targ_id);
- if (!targ_type)
- return -EINVAL;
-
- if (local_acc->name) {
- matched = bpf_core_match_member(local_spec->btf,
- local_acc,
- targ_btf, targ_id,
- targ_spec, &targ_id);
- if (matched <= 0)
- return matched;
- } else {
- /* for i=0, targ_id is already treated as array element
- * type (because it's the original struct), for others
- * we should find array element type first
- */
- if (i > 0) {
- const struct btf_array *a;
- bool flex;
-
- if (!btf_is_array(targ_type))
- return 0;
-
- a = btf_array(targ_type);
- flex = is_flex_arr(targ_btf, targ_acc - 1, a);
- if (!flex && local_acc->idx >= a->nelems)
- return 0;
- if (!skip_mods_and_typedefs(targ_btf, a->type,
- &targ_id))
- return -EINVAL;
- }
-
- /* too deep struct/union/array nesting */
- if (targ_spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
- return -E2BIG;
-
- targ_acc->type_id = targ_id;
- targ_acc->idx = local_acc->idx;
- targ_acc->name = NULL;
- targ_spec->len++;
- targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx;
- targ_spec->raw_len++;
-
- sz = btf__resolve_size(targ_btf, targ_id);
- if (sz < 0)
- return sz;
- targ_spec->bit_offset += local_acc->idx * sz * 8;
- }
- }
-
- return 1;
-}
-
-static int bpf_core_calc_field_relo(const struct bpf_program *prog,
- const struct bpf_core_relo *relo,
- const struct bpf_core_spec *spec,
- __u32 *val, __u32 *field_sz, __u32 *type_id,
- bool *validate)
-{
- const struct bpf_core_accessor *acc;
- const struct btf_type *t;
- __u32 byte_off, byte_sz, bit_off, bit_sz, field_type_id;
- const struct btf_member *m;
- const struct btf_type *mt;
- bool bitfield;
- __s64 sz;
-
- *field_sz = 0;
-
- if (relo->kind == BPF_FIELD_EXISTS) {
- *val = spec ? 1 : 0;
- return 0;
- }
-
- if (!spec)
- return -EUCLEAN; /* request instruction poisoning */
-
- acc = &spec->spec[spec->len - 1];
- t = btf__type_by_id(spec->btf, acc->type_id);
-
- /* a[n] accessor needs special handling */
- if (!acc->name) {
- if (relo->kind == BPF_FIELD_BYTE_OFFSET) {
- *val = spec->bit_offset / 8;
- /* remember field size for load/store mem size */
- sz = btf__resolve_size(spec->btf, acc->type_id);
- if (sz < 0)
- return -EINVAL;
- *field_sz = sz;
- *type_id = acc->type_id;
- } else if (relo->kind == BPF_FIELD_BYTE_SIZE) {
- sz = btf__resolve_size(spec->btf, acc->type_id);
- if (sz < 0)
- return -EINVAL;
- *val = sz;
- } else {
- pr_warn("prog '%s': relo %d at insn #%d can't be applied to array access\n",
- prog->name, relo->kind, relo->insn_off / 8);
- return -EINVAL;
- }
- if (validate)
- *validate = true;
- return 0;
- }
-
- m = btf_members(t) + acc->idx;
- mt = skip_mods_and_typedefs(spec->btf, m->type, &field_type_id);
- bit_off = spec->bit_offset;
- bit_sz = btf_member_bitfield_size(t, acc->idx);
-
- bitfield = bit_sz > 0;
- if (bitfield) {
- byte_sz = mt->size;
- byte_off = bit_off / 8 / byte_sz * byte_sz;
- /* figure out smallest int size necessary for bitfield load */
- while (bit_off + bit_sz - byte_off * 8 > byte_sz * 8) {
- if (byte_sz >= 8) {
- /* bitfield can't be read with 64-bit read */
- pr_warn("prog '%s': relo %d at insn #%d can't be satisfied for bitfield\n",
- prog->name, relo->kind, relo->insn_off / 8);
- return -E2BIG;
- }
- byte_sz *= 2;
- byte_off = bit_off / 8 / byte_sz * byte_sz;
- }
- } else {
- sz = btf__resolve_size(spec->btf, field_type_id);
- if (sz < 0)
- return -EINVAL;
- byte_sz = sz;
- byte_off = spec->bit_offset / 8;
- bit_sz = byte_sz * 8;
- }
-
- /* for bitfields, all the relocatable aspects are ambiguous and we
- * might disagree with compiler, so turn off validation of expected
- * value, except for signedness
- */
- if (validate)
- *validate = !bitfield;
-
- switch (relo->kind) {
- case BPF_FIELD_BYTE_OFFSET:
- *val = byte_off;
- if (!bitfield) {
- *field_sz = byte_sz;
- *type_id = field_type_id;
- }
- break;
- case BPF_FIELD_BYTE_SIZE:
- *val = byte_sz;
- break;
- case BPF_FIELD_SIGNED:
- /* enums will be assumed unsigned */
- *val = btf_is_enum(mt) ||
- (btf_int_encoding(mt) & BTF_INT_SIGNED);
- if (validate)
- *validate = true; /* signedness is never ambiguous */
- break;
- case BPF_FIELD_LSHIFT_U64:
-#if __BYTE_ORDER == __LITTLE_ENDIAN
- *val = 64 - (bit_off + bit_sz - byte_off * 8);
-#else
- *val = (8 - byte_sz) * 8 + (bit_off - byte_off * 8);
-#endif
- break;
- case BPF_FIELD_RSHIFT_U64:
- *val = 64 - bit_sz;
- if (validate)
- *validate = true; /* right shift is never ambiguous */
- break;
- case BPF_FIELD_EXISTS:
- default:
- return -EOPNOTSUPP;
- }
-
- return 0;
-}
-
-static int bpf_core_calc_type_relo(const struct bpf_core_relo *relo,
- const struct bpf_core_spec *spec,
- __u32 *val)
-{
- __s64 sz;
-
- /* type-based relos return zero when target type is not found */
- if (!spec) {
- *val = 0;
- return 0;
- }
-
- switch (relo->kind) {
- case BPF_TYPE_ID_TARGET:
- *val = spec->root_type_id;
- break;
- case BPF_TYPE_EXISTS:
- *val = 1;
- break;
- case BPF_TYPE_SIZE:
- sz = btf__resolve_size(spec->btf, spec->root_type_id);
- if (sz < 0)
- return -EINVAL;
- *val = sz;
- break;
- case BPF_TYPE_ID_LOCAL:
- /* BPF_TYPE_ID_LOCAL is handled specially and shouldn't get here */
- default:
- return -EOPNOTSUPP;
- }
-
- return 0;
-}
-
-static int bpf_core_calc_enumval_relo(const struct bpf_core_relo *relo,
- const struct bpf_core_spec *spec,
- __u32 *val)
-{
- const struct btf_type *t;
- const struct btf_enum *e;
-
- switch (relo->kind) {
- case BPF_ENUMVAL_EXISTS:
- *val = spec ? 1 : 0;
- break;
- case BPF_ENUMVAL_VALUE:
- if (!spec)
- return -EUCLEAN; /* request instruction poisoning */
- t = btf__type_by_id(spec->btf, spec->spec[0].type_id);
- e = btf_enum(t) + spec->spec[0].idx;
- *val = e->val;
- break;
- default:
- return -EOPNOTSUPP;
- }
-
- return 0;
-}
-
-struct bpf_core_relo_res
-{
- /* expected value in the instruction, unless validate == false */
- __u32 orig_val;
- /* new value that needs to be patched up to */
- __u32 new_val;
- /* relocation unsuccessful, poison instruction, but don't fail load */
- bool poison;
- /* some relocations can't be validated against orig_val */
- bool validate;
- /* for field byte offset relocations or the forms:
- * *(T *)(rX + <off>) = rY
- * rX = *(T *)(rY + <off>),
- * we remember original and resolved field size to adjust direct
- * memory loads of pointers and integers; this is necessary for 32-bit
- * host kernel architectures, but also allows to automatically
- * relocate fields that were resized from, e.g., u32 to u64, etc.
- */
- bool fail_memsz_adjust;
- __u32 orig_sz;
- __u32 orig_type_id;
- __u32 new_sz;
- __u32 new_type_id;
-};
-
-/* Calculate original and target relocation values, given local and target
- * specs and relocation kind. These values are calculated for each candidate.
- * If there are multiple candidates, resulting values should all be consistent
- * with each other. Otherwise, libbpf will refuse to proceed due to ambiguity.
- * If instruction has to be poisoned, *poison will be set to true.
- */
-static int bpf_core_calc_relo(const struct bpf_program *prog,
- const struct bpf_core_relo *relo,
- int relo_idx,
- const struct bpf_core_spec *local_spec,
- const struct bpf_core_spec *targ_spec,
- struct bpf_core_relo_res *res)
-{
- int err = -EOPNOTSUPP;
-
- res->orig_val = 0;
- res->new_val = 0;
- res->poison = false;
- res->validate = true;
- res->fail_memsz_adjust = false;
- res->orig_sz = res->new_sz = 0;
- res->orig_type_id = res->new_type_id = 0;
-
- if (core_relo_is_field_based(relo->kind)) {
- err = bpf_core_calc_field_relo(prog, relo, local_spec,
- &res->orig_val, &res->orig_sz,
- &res->orig_type_id, &res->validate);
- err = err ?: bpf_core_calc_field_relo(prog, relo, targ_spec,
- &res->new_val, &res->new_sz,
- &res->new_type_id, NULL);
- if (err)
- goto done;
- /* Validate if it's safe to adjust load/store memory size.
- * Adjustments are performed only if original and new memory
- * sizes differ.
- */
- res->fail_memsz_adjust = false;
- if (res->orig_sz != res->new_sz) {
- const struct btf_type *orig_t, *new_t;
-
- orig_t = btf__type_by_id(local_spec->btf, res->orig_type_id);
- new_t = btf__type_by_id(targ_spec->btf, res->new_type_id);
-
- /* There are two use cases in which it's safe to
- * adjust load/store's mem size:
- * - reading a 32-bit kernel pointer, while on BPF
- * size pointers are always 64-bit; in this case
- * it's safe to "downsize" instruction size due to
- * pointer being treated as unsigned integer with
- * zero-extended upper 32-bits;
- * - reading unsigned integers, again due to
- * zero-extension is preserving the value correctly.
- *
- * In all other cases it's incorrect to attempt to
- * load/store field because read value will be
- * incorrect, so we poison relocated instruction.
- */
- if (btf_is_ptr(orig_t) && btf_is_ptr(new_t))
- goto done;
- if (btf_is_int(orig_t) && btf_is_int(new_t) &&
- btf_int_encoding(orig_t) != BTF_INT_SIGNED &&
- btf_int_encoding(new_t) != BTF_INT_SIGNED)
- goto done;
-
- /* mark as invalid mem size adjustment, but this will
- * only be checked for LDX/STX/ST insns
- */
- res->fail_memsz_adjust = true;
- }
- } else if (core_relo_is_type_based(relo->kind)) {
- err = bpf_core_calc_type_relo(relo, local_spec, &res->orig_val);
- err = err ?: bpf_core_calc_type_relo(relo, targ_spec, &res->new_val);
- } else if (core_relo_is_enumval_based(relo->kind)) {
- err = bpf_core_calc_enumval_relo(relo, local_spec, &res->orig_val);
- err = err ?: bpf_core_calc_enumval_relo(relo, targ_spec, &res->new_val);
- }
-
-done:
- if (err == -EUCLEAN) {
- /* EUCLEAN is used to signal instruction poisoning request */
- res->poison = true;
- err = 0;
- } else if (err == -EOPNOTSUPP) {
- /* EOPNOTSUPP means unknown/unsupported relocation */
- pr_warn("prog '%s': relo #%d: unrecognized CO-RE relocation %s (%d) at insn #%d\n",
- prog->name, relo_idx, core_relo_kind_str(relo->kind),
- relo->kind, relo->insn_off / 8);
- }
-
- return err;
-}
-
-/*
- * Turn instruction for which CO_RE relocation failed into invalid one with
- * distinct signature.
- */
-static void bpf_core_poison_insn(struct bpf_program *prog, int relo_idx,
- int insn_idx, struct bpf_insn *insn)
-{
- pr_debug("prog '%s': relo #%d: substituting insn #%d w/ invalid insn\n",
- prog->name, relo_idx, insn_idx);
- insn->code = BPF_JMP | BPF_CALL;
- insn->dst_reg = 0;
- insn->src_reg = 0;
- insn->off = 0;
- /* if this instruction is reachable (not a dead code),
- * verifier will complain with the following message:
- * invalid func unknown#195896080
- */
- insn->imm = 195896080; /* => 0xbad2310 => "bad relo" */
-}
-
-static int insn_bpf_size_to_bytes(struct bpf_insn *insn)
-{
- switch (BPF_SIZE(insn->code)) {
- case BPF_DW: return 8;
- case BPF_W: return 4;
- case BPF_H: return 2;
- case BPF_B: return 1;
- default: return -1;
- }
-}
-
-static int insn_bytes_to_bpf_size(__u32 sz)
-{
- switch (sz) {
- case 8: return BPF_DW;
- case 4: return BPF_W;
- case 2: return BPF_H;
- case 1: return BPF_B;
- default: return -1;
- }
-}
-
-/*
- * Patch relocatable BPF instruction.
- *
- * Patched value is determined by relocation kind and target specification.
- * For existence relocations target spec will be NULL if field/type is not found.
- * Expected insn->imm value is determined using relocation kind and local
- * spec, and is checked before patching instruction. If actual insn->imm value
- * is wrong, bail out with error.
- *
- * Currently supported classes of BPF instruction are:
- * 1. rX = <imm> (assignment with immediate operand);
- * 2. rX += <imm> (arithmetic operations with immediate operand);
- * 3. rX = <imm64> (load with 64-bit immediate value);
- * 4. rX = *(T *)(rY + <off>), where T is one of {u8, u16, u32, u64};
- * 5. *(T *)(rX + <off>) = rY, where T is one of {u8, u16, u32, u64};
- * 6. *(T *)(rX + <off>) = <imm>, where T is one of {u8, u16, u32, u64}.
- */
-static int bpf_core_patch_insn(struct bpf_program *prog,
- const struct bpf_core_relo *relo,
- int relo_idx,
- const struct bpf_core_relo_res *res)
-{
- __u32 orig_val, new_val;
- struct bpf_insn *insn;
- int insn_idx;
- __u8 class;
-
- if (relo->insn_off % BPF_INSN_SZ)
- return -EINVAL;
- insn_idx = relo->insn_off / BPF_INSN_SZ;
- /* adjust insn_idx from section frame of reference to the local
- * program's frame of reference; (sub-)program code is not yet
- * relocated, so it's enough to just subtract in-section offset
- */
- insn_idx = insn_idx - prog->sec_insn_off;
- insn = &prog->insns[insn_idx];
- class = BPF_CLASS(insn->code);
-
- if (res->poison) {
-poison:
- /* poison second part of ldimm64 to avoid confusing error from
- * verifier about "unknown opcode 00"
- */
- if (is_ldimm64_insn(insn))
- bpf_core_poison_insn(prog, relo_idx, insn_idx + 1, insn + 1);
- bpf_core_poison_insn(prog, relo_idx, insn_idx, insn);
- return 0;
- }
-
- orig_val = res->orig_val;
- new_val = res->new_val;
-
- switch (class) {
- case BPF_ALU:
- case BPF_ALU64:
- if (BPF_SRC(insn->code) != BPF_K)
- return -EINVAL;
- if (res->validate && insn->imm != orig_val) {
- pr_warn("prog '%s': relo #%d: unexpected insn #%d (ALU/ALU64) value: got %u, exp %u -> %u\n",
- prog->name, relo_idx,
- insn_idx, insn->imm, orig_val, new_val);
- return -EINVAL;
- }
- orig_val = insn->imm;
- insn->imm = new_val;
- pr_debug("prog '%s': relo #%d: patched insn #%d (ALU/ALU64) imm %u -> %u\n",
- prog->name, relo_idx, insn_idx,
- orig_val, new_val);
- break;
- case BPF_LDX:
- case BPF_ST:
- case BPF_STX:
- if (res->validate && insn->off != orig_val) {
- pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDX/ST/STX) value: got %u, exp %u -> %u\n",
- prog->name, relo_idx, insn_idx, insn->off, orig_val, new_val);
- return -EINVAL;
- }
- if (new_val > SHRT_MAX) {
- pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) value too big: %u\n",
- prog->name, relo_idx, insn_idx, new_val);
- return -ERANGE;
- }
- if (res->fail_memsz_adjust) {
- pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) accesses field incorrectly. "
- "Make sure you are accessing pointers, unsigned integers, or fields of matching type and size.\n",
- prog->name, relo_idx, insn_idx);
- goto poison;
- }
-
- orig_val = insn->off;
- insn->off = new_val;
- pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) off %u -> %u\n",
- prog->name, relo_idx, insn_idx, orig_val, new_val);
-
- if (res->new_sz != res->orig_sz) {
- int insn_bytes_sz, insn_bpf_sz;
-
- insn_bytes_sz = insn_bpf_size_to_bytes(insn);
- if (insn_bytes_sz != res->orig_sz) {
- pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) unexpected mem size: got %d, exp %u\n",
- prog->name, relo_idx, insn_idx, insn_bytes_sz, res->orig_sz);
- return -EINVAL;
- }
-
- insn_bpf_sz = insn_bytes_to_bpf_size(res->new_sz);
- if (insn_bpf_sz < 0) {
- pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) invalid new mem size: %u\n",
- prog->name, relo_idx, insn_idx, res->new_sz);
- return -EINVAL;
- }
-
- insn->code = BPF_MODE(insn->code) | insn_bpf_sz | BPF_CLASS(insn->code);
- pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) mem_sz %u -> %u\n",
- prog->name, relo_idx, insn_idx, res->orig_sz, res->new_sz);
- }
- break;
- case BPF_LD: {
- __u64 imm;
-
- if (!is_ldimm64_insn(insn) ||
- insn[0].src_reg != 0 || insn[0].off != 0 ||
- insn_idx + 1 >= prog->insns_cnt ||
- insn[1].code != 0 || insn[1].dst_reg != 0 ||
- insn[1].src_reg != 0 || insn[1].off != 0) {
- pr_warn("prog '%s': relo #%d: insn #%d (LDIMM64) has unexpected form\n",
- prog->name, relo_idx, insn_idx);
- return -EINVAL;
- }
-
- imm = insn[0].imm + ((__u64)insn[1].imm << 32);
- if (res->validate && imm != orig_val) {
- pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDIMM64) value: got %llu, exp %u -> %u\n",
- prog->name, relo_idx,
- insn_idx, (unsigned long long)imm,
- orig_val, new_val);
- return -EINVAL;
- }
-
- insn[0].imm = new_val;
- insn[1].imm = 0; /* currently only 32-bit values are supported */
- pr_debug("prog '%s': relo #%d: patched insn #%d (LDIMM64) imm64 %llu -> %u\n",
- prog->name, relo_idx, insn_idx,
- (unsigned long long)imm, new_val);
- break;
- }
- default:
- pr_warn("prog '%s': relo #%d: trying to relocate unrecognized insn #%d, code:0x%x, src:0x%x, dst:0x%x, off:0x%x, imm:0x%x\n",
- prog->name, relo_idx, insn_idx, insn->code,
- insn->src_reg, insn->dst_reg, insn->off, insn->imm);
- return -EINVAL;
- }
-
- return 0;
-}
-
-/* Output spec definition in the format:
- * [<type-id>] (<type-name>) + <raw-spec> => <offset>@<spec>,
- * where <spec> is a C-syntax view of recorded field access, e.g.: x.a[3].b
- */
-static void bpf_core_dump_spec(int level, const struct bpf_core_spec *spec)
-{
- const struct btf_type *t;
- const struct btf_enum *e;
- const char *s;
- __u32 type_id;
- int i;
-
- type_id = spec->root_type_id;
- t = btf__type_by_id(spec->btf, type_id);
- s = btf__name_by_offset(spec->btf, t->name_off);
-
- libbpf_print(level, "[%u] %s %s", type_id, btf_kind_str(t), str_is_empty(s) ? "<anon>" : s);
-
- if (core_relo_is_type_based(spec->relo_kind))
- return;
-
- if (core_relo_is_enumval_based(spec->relo_kind)) {
- t = skip_mods_and_typedefs(spec->btf, type_id, NULL);
- e = btf_enum(t) + spec->raw_spec[0];
- s = btf__name_by_offset(spec->btf, e->name_off);
-
- libbpf_print(level, "::%s = %u", s, e->val);
- return;
- }
-
- if (core_relo_is_field_based(spec->relo_kind)) {
- for (i = 0; i < spec->len; i++) {
- if (spec->spec[i].name)
- libbpf_print(level, ".%s", spec->spec[i].name);
- else if (i > 0 || spec->spec[i].idx > 0)
- libbpf_print(level, "[%u]", spec->spec[i].idx);
- }
-
- libbpf_print(level, " (");
- for (i = 0; i < spec->raw_len; i++)
- libbpf_print(level, "%s%d", i == 0 ? "" : ":", spec->raw_spec[i]);
-
- if (spec->bit_offset % 8)
- libbpf_print(level, " @ offset %u.%u)",
- spec->bit_offset / 8, spec->bit_offset % 8);
- else
- libbpf_print(level, " @ offset %u)", spec->bit_offset / 8);
- return;
- }
-}
-
static size_t bpf_core_hash_fn(const void *key, void *ctx)
{
return (size_t)key;
return (void *)(uintptr_t)x;
}
-/*
- * CO-RE relocate single instruction.
- *
- * The outline and important points of the algorithm:
- * 1. For given local type, find corresponding candidate target types.
- * Candidate type is a type with the same "essential" name, ignoring
- * everything after last triple underscore (___). E.g., `sample`,
- * `sample___flavor_one`, `sample___flavor_another_one`, are all candidates
- * for each other. Names with triple underscore are referred to as
- * "flavors" and are useful, among other things, to allow to
- * specify/support incompatible variations of the same kernel struct, which
- * might differ between different kernel versions and/or build
- * configurations.
- *
- * N.B. Struct "flavors" could be generated by bpftool's BTF-to-C
- * converter, when deduplicated BTF of a kernel still contains more than
- * one different types with the same name. In that case, ___2, ___3, etc
- * are appended starting from second name conflict. But start flavors are
- * also useful to be defined "locally", in BPF program, to extract same
- * data from incompatible changes between different kernel
- * versions/configurations. For instance, to handle field renames between
- * kernel versions, one can use two flavors of the struct name with the
- * same common name and use conditional relocations to extract that field,
- * depending on target kernel version.
- * 2. For each candidate type, try to match local specification to this
- * candidate target type. Matching involves finding corresponding
- * high-level spec accessors, meaning that all named fields should match,
- * as well as all array accesses should be within the actual bounds. Also,
- * types should be compatible (see bpf_core_fields_are_compat for details).
- * 3. It is supported and expected that there might be multiple flavors
- * matching the spec. As long as all the specs resolve to the same set of
- * offsets across all candidates, there is no error. If there is any
- * ambiguity, CO-RE relocation will fail. This is necessary to accomodate
- * imprefection of BTF deduplication, which can cause slight duplication of
- * the same BTF type, if some directly or indirectly referenced (by
- * pointer) type gets resolved to different actual types in different
- * object files. If such situation occurs, deduplicated BTF will end up
- * with two (or more) structurally identical types, which differ only in
- * types they refer to through pointer. This should be OK in most cases and
- * is not an error.
- * 4. Candidate types search is performed by linearly scanning through all
- * types in target BTF. It is anticipated that this is overall more
- * efficient memory-wise and not significantly worse (if not better)
- * CPU-wise compared to prebuilding a map from all local type names to
- * a list of candidate type names. It's also sped up by caching resolved
- * list of matching candidates per each local "root" type ID, that has at
- * least one bpf_core_relo associated with it. This list is shared
- * between multiple relocations for the same type ID and is updated as some
- * of the candidates are pruned due to structural incompatibility.
- */
static int bpf_core_apply_relo(struct bpf_program *prog,
const struct bpf_core_relo *relo,
int relo_idx,
const struct btf *local_btf,
struct hashmap *cand_cache)
{
- struct bpf_core_spec local_spec, cand_spec, targ_spec = {};
const void *type_key = u32_as_hash_key(relo->type_id);
- struct bpf_core_relo_res cand_res, targ_res;
+ struct bpf_core_cand_list *cands = NULL;
+ const char *prog_name = prog->name;
const struct btf_type *local_type;
const char *local_name;
- struct core_cand_list *cands = NULL;
- __u32 local_id;
- const char *spec_str;
- int i, j, err;
+ __u32 local_id = relo->type_id;
+ struct bpf_insn *insn;
+ int insn_idx, err;
+
+ if (relo->insn_off % BPF_INSN_SZ)
+ return -EINVAL;
+ insn_idx = relo->insn_off / BPF_INSN_SZ;
+ /* adjust insn_idx from section frame of reference to the local
+ * program's frame of reference; (sub-)program code is not yet
+ * relocated, so it's enough to just subtract in-section offset
+ */
+ insn_idx = insn_idx - prog->sec_insn_off;
+ if (insn_idx > prog->insns_cnt)
+ return -EINVAL;
+ insn = &prog->insns[insn_idx];
- local_id = relo->type_id;
local_type = btf__type_by_id(local_btf, local_id);
if (!local_type)
return -EINVAL;
if (!local_name)
return -EINVAL;
- spec_str = btf__name_by_offset(local_btf, relo->access_str_off);
- if (str_is_empty(spec_str))
- return -EINVAL;
-
if (prog->obj->gen_loader) {
- pr_warn("// TODO core_relo: prog %td insn[%d] %s %s kind %d\n",
+ pr_warn("// TODO core_relo: prog %td insn[%d] %s kind %d\n",
prog - prog->obj->programs, relo->insn_off / 8,
- local_name, spec_str, relo->kind);
+ local_name, relo->kind);
return -ENOTSUP;
}
- err = bpf_core_parse_spec(local_btf, local_id, spec_str, relo->kind, &local_spec);
- if (err) {
- pr_warn("prog '%s': relo #%d: parsing [%d] %s %s + %s failed: %d\n",
- prog->name, relo_idx, local_id, btf_kind_str(local_type),
- str_is_empty(local_name) ? "<anon>" : local_name,
- spec_str, err);
- return -EINVAL;
- }
-
- pr_debug("prog '%s': relo #%d: kind <%s> (%d), spec is ", prog->name,
- relo_idx, core_relo_kind_str(relo->kind), relo->kind);
- bpf_core_dump_spec(LIBBPF_DEBUG, &local_spec);
- libbpf_print(LIBBPF_DEBUG, "\n");
-
- /* TYPE_ID_LOCAL relo is special and doesn't need candidate search */
- if (relo->kind == BPF_TYPE_ID_LOCAL) {
- targ_res.validate = true;
- targ_res.poison = false;
- targ_res.orig_val = local_spec.root_type_id;
- targ_res.new_val = local_spec.root_type_id;
- goto patch_insn;
- }
- /* libbpf doesn't support candidate search for anonymous types */
- if (str_is_empty(spec_str)) {
- pr_warn("prog '%s': relo #%d: <%s> (%d) relocation doesn't support anonymous types\n",
- prog->name, relo_idx, core_relo_kind_str(relo->kind), relo->kind);
- return -EOPNOTSUPP;
- }
-
- if (!hashmap__find(cand_cache, type_key, (void **)&cands)) {
+ if (relo->kind != BPF_TYPE_ID_LOCAL &&
+ !hashmap__find(cand_cache, type_key, (void **)&cands)) {
cands = bpf_core_find_cands(prog->obj, local_btf, local_id);
if (IS_ERR(cands)) {
pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s %s: %ld\n",
- prog->name, relo_idx, local_id, btf_kind_str(local_type),
+ prog_name, relo_idx, local_id, btf_kind_str(local_type),
local_name, PTR_ERR(cands));
return PTR_ERR(cands);
}
}
}
- for (i = 0, j = 0; i < cands->len; i++) {
- err = bpf_core_spec_match(&local_spec, cands->cands[i].btf,
- cands->cands[i].id, &cand_spec);
- if (err < 0) {
- pr_warn("prog '%s': relo #%d: error matching candidate #%d ",
- prog->name, relo_idx, i);
- bpf_core_dump_spec(LIBBPF_WARN, &cand_spec);
- libbpf_print(LIBBPF_WARN, ": %d\n", err);
- return err;
- }
-
- pr_debug("prog '%s': relo #%d: %s candidate #%d ", prog->name,
- relo_idx, err == 0 ? "non-matching" : "matching", i);
- bpf_core_dump_spec(LIBBPF_DEBUG, &cand_spec);
- libbpf_print(LIBBPF_DEBUG, "\n");
-
- if (err == 0)
- continue;
-
- err = bpf_core_calc_relo(prog, relo, relo_idx, &local_spec, &cand_spec, &cand_res);
- if (err)
- return err;
-
- if (j == 0) {
- targ_res = cand_res;
- targ_spec = cand_spec;
- } else if (cand_spec.bit_offset != targ_spec.bit_offset) {
- /* if there are many field relo candidates, they
- * should all resolve to the same bit offset
- */
- pr_warn("prog '%s': relo #%d: field offset ambiguity: %u != %u\n",
- prog->name, relo_idx, cand_spec.bit_offset,
- targ_spec.bit_offset);
- return -EINVAL;
- } else if (cand_res.poison != targ_res.poison || cand_res.new_val != targ_res.new_val) {
- /* all candidates should result in the same relocation
- * decision and value, otherwise it's dangerous to
- * proceed due to ambiguity
- */
- pr_warn("prog '%s': relo #%d: relocation decision ambiguity: %s %u != %s %u\n",
- prog->name, relo_idx,
- cand_res.poison ? "failure" : "success", cand_res.new_val,
- targ_res.poison ? "failure" : "success", targ_res.new_val);
- return -EINVAL;
- }
-
- cands->cands[j++] = cands->cands[i];
- }
-
- /*
- * For BPF_FIELD_EXISTS relo or when used BPF program has field
- * existence checks or kernel version/config checks, it's expected
- * that we might not find any candidates. In this case, if field
- * wasn't found in any candidate, the list of candidates shouldn't
- * change at all, we'll just handle relocating appropriately,
- * depending on relo's kind.
- */
- if (j > 0)
- cands->len = j;
-
- /*
- * If no candidates were found, it might be both a programmer error,
- * as well as expected case, depending whether instruction w/
- * relocation is guarded in some way that makes it unreachable (dead
- * code) if relocation can't be resolved. This is handled in
- * bpf_core_patch_insn() uniformly by replacing that instruction with
- * BPF helper call insn (using invalid helper ID). If that instruction
- * is indeed unreachable, then it will be ignored and eliminated by
- * verifier. If it was an error, then verifier will complain and point
- * to a specific instruction number in its log.
- */
- if (j == 0) {
- pr_debug("prog '%s': relo #%d: no matching targets found\n",
- prog->name, relo_idx);
-
- /* calculate single target relo result explicitly */
- err = bpf_core_calc_relo(prog, relo, relo_idx, &local_spec, NULL, &targ_res);
- if (err)
- return err;
- }
-
-patch_insn:
- /* bpf_core_patch_insn() should know how to handle missing targ_spec */
- err = bpf_core_patch_insn(prog, relo, relo_idx, &targ_res);
- if (err) {
- pr_warn("prog '%s': relo #%d: failed to patch insn #%zu: %d\n",
- prog->name, relo_idx, relo->insn_off / BPF_INSN_SZ, err);
- return -EINVAL;
- }
-
- return 0;
+ return bpf_core_apply_relo_insn(prog_name, insn, insn_idx, relo, relo_idx, local_btf, cands);
}
static int
for (i = 0; i < obj->nr_programs; i++) {
struct bpf_program *p = &obj->programs[i];
-
+
if (!p->nr_reloc)
continue;
__bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz,
const struct bpf_object_open_opts *opts)
{
- const char *obj_name, *kconfig;
+ const char *obj_name, *kconfig, *btf_tmp_path;
struct bpf_program *prog;
struct bpf_object *obj;
char tmp_name[64];
if (IS_ERR(obj))
return obj;
+ btf_tmp_path = OPTS_GET(opts, btf_custom_path, NULL);
+ if (btf_tmp_path) {
+ if (strlen(btf_tmp_path) >= PATH_MAX) {
+ err = -ENAMETOOLONG;
+ goto out;
+ }
+ obj->btf_custom_path = strdup(btf_tmp_path);
+ if (!obj->btf_custom_path) {
+ err = -ENOMEM;
+ goto out;
+ }
+ }
+
kconfig = OPTS_GET(opts, kconfig, NULL);
if (kconfig) {
obj->kconfig = strdup(kconfig);
- if (!obj->kconfig)
- return ERR_PTR(-ENOMEM);
+ if (!obj->kconfig) {
+ err = -ENOMEM;
+ goto out;
+ }
}
err = bpf_object__elf_init(obj);
err = err ? : bpf_object__sanitize_maps(obj);
err = err ? : bpf_object__init_kern_struct_ops_maps(obj);
err = err ? : bpf_object__create_maps(obj);
- err = err ? : bpf_object__relocate(obj, attr->target_btf_path);
+ err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : attr->target_btf_path);
err = err ? : bpf_object__load_progs(obj, attr->log_level);
if (obj->gen_loader) {
return map->pin_path;
}
+const char *bpf_map__pin_path(const struct bpf_map *map)
+{
+ return map->pin_path;
+}
+
bool bpf_map__is_pinned(const struct bpf_map *map)
{
return map->pinned;
for (i = 0; i < obj->nr_maps; i++)
bpf_map__destroy(&obj->maps[i]);
+ zfree(&obj->btf_custom_path);
zfree(&obj->kconfig);
zfree(&obj->externs);
obj->nr_extern = 0;
ret = snprintf(btf_type_name, sizeof(btf_type_name),
"%s%s", prefix, name);
/* snprintf returns the number of characters written excluding the
- * the terminating null. So, if >= BTF_MAX_NAME_SIZE are written, it
+ * terminating null. So, if >= BTF_MAX_NAME_SIZE are written, it
* indicates truncation.
*/
if (ret < 0 || ret >= sizeof(btf_type_name))
struct btf *btf;
int err;
- btf = libbpf_find_kernel_btf();
+ btf = btf__load_vmlinux_btf();
err = libbpf_get_error(btf);
if (err) {
pr_warn("vmlinux BTF is not found\n");
{
struct bpf_prog_info_linear *info_linear;
struct bpf_prog_info *info;
- struct btf *btf = NULL;
- int err = -EINVAL;
+ struct btf *btf;
+ int err;
info_linear = bpf_program__get_prog_info_linear(attach_prog_fd, 0);
err = libbpf_get_error(info_linear);
attach_prog_fd);
return err;
}
+
+ err = -EINVAL;
info = &info_linear->info;
if (!info->btf_id) {
pr_warn("The target program doesn't have BTF\n");
goto out;
}
- if (btf__get_from_id(info->btf_id, &btf)) {
+ btf = btf__load_from_kernel_by_id(info->btf_id);
+ if (libbpf_get_error(btf)) {
pr_warn("Failed to get BTF of the program\n");
goto out;
}
int bpf_link__update_program(struct bpf_link *link, struct bpf_program *prog)
{
int ret;
-
+
ret = bpf_link_update(bpf_link__fd(link), bpf_program__fd(prog), NULL);
return libbpf_err_errno(ret);
}
return pfd;
}
-struct bpf_program_attach_kprobe_opts {
- bool retprobe;
- unsigned long offset;
-};
-
-static struct bpf_link*
+struct bpf_link *
bpf_program__attach_kprobe_opts(struct bpf_program *prog,
const char *func_name,
- struct bpf_program_attach_kprobe_opts *opts)
+ struct bpf_kprobe_opts *opts)
{
char errmsg[STRERR_BUFSIZE];
struct bpf_link *link;
+ unsigned long offset;
+ bool retprobe;
int pfd, err;
- pfd = perf_event_open_probe(false /* uprobe */, opts->retprobe, func_name,
- opts->offset, -1 /* pid */);
+ if (!OPTS_VALID(opts, bpf_kprobe_opts))
+ return libbpf_err_ptr(-EINVAL);
+
+ retprobe = OPTS_GET(opts, retprobe, false);
+ offset = OPTS_GET(opts, offset, 0);
+
+ pfd = perf_event_open_probe(false /* uprobe */, retprobe, func_name,
+ offset, -1 /* pid */);
if (pfd < 0) {
pr_warn("prog '%s': failed to create %s '%s' perf event: %s\n",
- prog->name, opts->retprobe ? "kretprobe" : "kprobe", func_name,
+ prog->name, retprobe ? "kretprobe" : "kprobe", func_name,
libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
return libbpf_err_ptr(pfd);
}
if (err) {
close(pfd);
pr_warn("prog '%s': failed to attach to %s '%s': %s\n",
- prog->name, opts->retprobe ? "kretprobe" : "kprobe", func_name,
+ prog->name, retprobe ? "kretprobe" : "kprobe", func_name,
libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
return libbpf_err_ptr(err);
}
bool retprobe,
const char *func_name)
{
- struct bpf_program_attach_kprobe_opts opts = {
+ DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts,
.retprobe = retprobe,
- };
+ );
return bpf_program__attach_kprobe_opts(prog, func_name, &opts);
}
static struct bpf_link *attach_kprobe(const struct bpf_sec_def *sec,
struct bpf_program *prog)
{
- struct bpf_program_attach_kprobe_opts opts;
+ DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts);
unsigned long offset = 0;
struct bpf_link *link;
const char *func_name;
func_name = prog->sec_name + sec->len;
opts.retprobe = strcmp(sec->sec, "kretprobe/") == 0;
- n = sscanf(func_name, "%m[a-zA-Z0-9_.]+%lx", &func, &offset);
+ n = sscanf(func_name, "%m[a-zA-Z0-9_.]+%li", &func, &offset);
if (n < 1) {
err = -EINVAL;
pr_warn("kprobe name is invalid: %s\n", func_name);
return libbpf_err_ptr(err);
}
if (opts.retprobe && offset != 0) {
+ free(func);
err = -EINVAL;
pr_warn("kretprobes do not support offset specification\n");
return libbpf_err_ptr(err);
* system Kconfig for CONFIG_xxx externs.
*/
const char *kconfig;
+ /* Path to the custom BTF to be used for BPF CO-RE relocations.
+ * This custom BTF completely replaces the use of vmlinux BTF
+ * for the purpose of CO-RE relocations.
+ * NOTE: any other BPF feature (e.g., fentry/fexit programs,
+ * struct_ops, etc) will need actual kernel BTF at /sys/kernel/btf/vmlinux.
+ */
+ const char *btf_custom_path;
};
-#define bpf_object_open_opts__last_field kconfig
+#define bpf_object_open_opts__last_field btf_custom_path
+
+struct bpf_kprobe_opts {
+ /* size of this struct, for forward/backward compatiblity */
+ size_t sz;
+ /* function's offset to install kprobe to */
+ unsigned long offset;
+ /* kprobe is return probe */
+ bool retprobe;
+ size_t :0;
+};
+#define bpf_kprobe_opts__last_field retprobe
LIBBPF_API struct bpf_object *bpf_object__open(const char *path);
LIBBPF_API struct bpf_object *
bpf_program__attach_kprobe(struct bpf_program *prog, bool retprobe,
const char *func_name);
LIBBPF_API struct bpf_link *
+bpf_program__attach_kprobe_opts(struct bpf_program *prog,
+ const char *func_name,
+ struct bpf_kprobe_opts *opts);
+LIBBPF_API struct bpf_link *
bpf_program__attach_uprobe(struct bpf_program *prog, bool retprobe,
pid_t pid, const char *binary_path,
size_t func_offset);
LIBBPF_API bool bpf_map__is_internal(const struct bpf_map *map);
LIBBPF_API int bpf_map__set_pin_path(struct bpf_map *map, const char *path);
LIBBPF_API const char *bpf_map__get_pin_path(const struct bpf_map *map);
+LIBBPF_API const char *bpf_map__pin_path(const struct bpf_map *map);
LIBBPF_API bool bpf_map__is_pinned(const struct bpf_map *map);
LIBBPF_API int bpf_map__pin(struct bpf_map *map, const char *path);
LIBBPF_API int bpf_map__unpin(struct bpf_map *map, const char *path);
LIBBPF_0.5.0 {
global:
bpf_map__initial_value;
+ bpf_map__pin_path;
bpf_map_lookup_and_delete_elem_flags;
+ bpf_program__attach_kprobe_opts;
bpf_object__gen_loader;
+ btf__load_from_kernel_by_id;
+ btf__load_from_kernel_by_id_split;
+ btf__load_into_kernel;
+ btf__load_module_btf;
+ btf__load_vmlinux_btf;
+ btf_dump__dump_type_data;
libbpf_set_strict_mode;
} LIBBPF_0.4.0;
#include <errno.h>
#include <linux/err.h>
#include "libbpf_legacy.h"
+#include "relo_core.h"
/* make sure libbpf doesn't use kernel-only integer typedefs */
#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
__u32 line_col;
};
-/* bpf_core_relo_kind encodes which aspect of captured field/type/enum value
- * has to be adjusted by relocations.
- */
-enum bpf_core_relo_kind {
- BPF_FIELD_BYTE_OFFSET = 0, /* field byte offset */
- BPF_FIELD_BYTE_SIZE = 1, /* field size in bytes */
- BPF_FIELD_EXISTS = 2, /* field existence in target kernel */
- BPF_FIELD_SIGNED = 3, /* field signedness (0 - unsigned, 1 - signed) */
- BPF_FIELD_LSHIFT_U64 = 4, /* bitfield-specific left bitshift */
- BPF_FIELD_RSHIFT_U64 = 5, /* bitfield-specific right bitshift */
- BPF_TYPE_ID_LOCAL = 6, /* type ID in local BPF object */
- BPF_TYPE_ID_TARGET = 7, /* type ID in target kernel */
- BPF_TYPE_EXISTS = 8, /* type existence in target kernel */
- BPF_TYPE_SIZE = 9, /* type size in bytes */
- BPF_ENUMVAL_EXISTS = 10, /* enum value existence in target kernel */
- BPF_ENUMVAL_VALUE = 11, /* enum value integer value */
-};
-
-/* The minimum bpf_core_relo checked by the loader
- *
- * CO-RE relocation captures the following data:
- * - insn_off - instruction offset (in bytes) within a BPF program that needs
- * its insn->imm field to be relocated with actual field info;
- * - type_id - BTF type ID of the "root" (containing) entity of a relocatable
- * type or field;
- * - access_str_off - offset into corresponding .BTF string section. String
- * interpretation depends on specific relocation kind:
- * - for field-based relocations, string encodes an accessed field using
- * a sequence of field and array indices, separated by colon (:). It's
- * conceptually very close to LLVM's getelementptr ([0]) instruction's
- * arguments for identifying offset to a field.
- * - for type-based relocations, strings is expected to be just "0";
- * - for enum value-based relocations, string contains an index of enum
- * value within its enum type;
- *
- * Example to provide a better feel.
- *
- * struct sample {
- * int a;
- * struct {
- * int b[10];
- * };
- * };
- *
- * struct sample *s = ...;
- * int x = &s->a; // encoded as "0:0" (a is field #0)
- * int y = &s->b[5]; // encoded as "0:1:0:5" (anon struct is field #1,
- * // b is field #0 inside anon struct, accessing elem #5)
- * int z = &s[10]->b; // encoded as "10:1" (ptr is used as an array)
- *
- * type_id for all relocs in this example will capture BTF type id of
- * `struct sample`.
- *
- * Such relocation is emitted when using __builtin_preserve_access_index()
- * Clang built-in, passing expression that captures field address, e.g.:
- *
- * bpf_probe_read(&dst, sizeof(dst),
- * __builtin_preserve_access_index(&src->a.b.c));
- *
- * In this case Clang will emit field relocation recording necessary data to
- * be able to find offset of embedded `a.b.c` field within `src` struct.
- *
- * [0] https://llvm.org/docs/LangRef.html#getelementptr-instruction
- */
-struct bpf_core_relo {
- __u32 insn_off;
- __u32 type_id;
- __u32 access_str_off;
- enum bpf_core_relo_kind kind;
-};
typedef int (*type_id_visit_fn)(__u32 *type_id, void *ctx);
typedef int (*str_off_visit_fn)(__u32 *str_off, void *ctx);
return ret;
}
+static inline bool str_is_empty(const char *s)
+{
+ return !s || !s[0];
+}
+
+static inline bool is_ldimm64_insn(struct bpf_insn *insn)
+{
+ return insn->code == (BPF_LD | BPF_IMM | BPF_DW);
+}
+
#endif /* __LIBBPF_LIBBPF_INTERNAL_H */
--- /dev/null
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+/* Copyright (c) 2019 Facebook */
+
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <ctype.h>
+#include <linux/err.h>
+
+#include "libbpf.h"
+#include "bpf.h"
+#include "btf.h"
+#include "str_error.h"
+#include "libbpf_internal.h"
+
+#define BPF_CORE_SPEC_MAX_LEN 64
+
+/* represents BPF CO-RE field or array element accessor */
+struct bpf_core_accessor {
+ __u32 type_id; /* struct/union type or array element type */
+ __u32 idx; /* field index or array index */
+ const char *name; /* field name or NULL for array accessor */
+};
+
+struct bpf_core_spec {
+ const struct btf *btf;
+ /* high-level spec: named fields and array indices only */
+ struct bpf_core_accessor spec[BPF_CORE_SPEC_MAX_LEN];
+ /* original unresolved (no skip_mods_or_typedefs) root type ID */
+ __u32 root_type_id;
+ /* CO-RE relocation kind */
+ enum bpf_core_relo_kind relo_kind;
+ /* high-level spec length */
+ int len;
+ /* raw, low-level spec: 1-to-1 with accessor spec string */
+ int raw_spec[BPF_CORE_SPEC_MAX_LEN];
+ /* raw spec length */
+ int raw_len;
+ /* field bit offset represented by spec */
+ __u32 bit_offset;
+};
+
+static bool is_flex_arr(const struct btf *btf,
+ const struct bpf_core_accessor *acc,
+ const struct btf_array *arr)
+{
+ const struct btf_type *t;
+
+ /* not a flexible array, if not inside a struct or has non-zero size */
+ if (!acc->name || arr->nelems > 0)
+ return false;
+
+ /* has to be the last member of enclosing struct */
+ t = btf__type_by_id(btf, acc->type_id);
+ return acc->idx == btf_vlen(t) - 1;
+}
+
+static const char *core_relo_kind_str(enum bpf_core_relo_kind kind)
+{
+ switch (kind) {
+ case BPF_FIELD_BYTE_OFFSET: return "byte_off";
+ case BPF_FIELD_BYTE_SIZE: return "byte_sz";
+ case BPF_FIELD_EXISTS: return "field_exists";
+ case BPF_FIELD_SIGNED: return "signed";
+ case BPF_FIELD_LSHIFT_U64: return "lshift_u64";
+ case BPF_FIELD_RSHIFT_U64: return "rshift_u64";
+ case BPF_TYPE_ID_LOCAL: return "local_type_id";
+ case BPF_TYPE_ID_TARGET: return "target_type_id";
+ case BPF_TYPE_EXISTS: return "type_exists";
+ case BPF_TYPE_SIZE: return "type_size";
+ case BPF_ENUMVAL_EXISTS: return "enumval_exists";
+ case BPF_ENUMVAL_VALUE: return "enumval_value";
+ default: return "unknown";
+ }
+}
+
+static bool core_relo_is_field_based(enum bpf_core_relo_kind kind)
+{
+ switch (kind) {
+ case BPF_FIELD_BYTE_OFFSET:
+ case BPF_FIELD_BYTE_SIZE:
+ case BPF_FIELD_EXISTS:
+ case BPF_FIELD_SIGNED:
+ case BPF_FIELD_LSHIFT_U64:
+ case BPF_FIELD_RSHIFT_U64:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool core_relo_is_type_based(enum bpf_core_relo_kind kind)
+{
+ switch (kind) {
+ case BPF_TYPE_ID_LOCAL:
+ case BPF_TYPE_ID_TARGET:
+ case BPF_TYPE_EXISTS:
+ case BPF_TYPE_SIZE:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool core_relo_is_enumval_based(enum bpf_core_relo_kind kind)
+{
+ switch (kind) {
+ case BPF_ENUMVAL_EXISTS:
+ case BPF_ENUMVAL_VALUE:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/*
+ * Turn bpf_core_relo into a low- and high-level spec representation,
+ * validating correctness along the way, as well as calculating resulting
+ * field bit offset, specified by accessor string. Low-level spec captures
+ * every single level of nestedness, including traversing anonymous
+ * struct/union members. High-level one only captures semantically meaningful
+ * "turning points": named fields and array indicies.
+ * E.g., for this case:
+ *
+ * struct sample {
+ * int __unimportant;
+ * struct {
+ * int __1;
+ * int __2;
+ * int a[7];
+ * };
+ * };
+ *
+ * struct sample *s = ...;
+ *
+ * int x = &s->a[3]; // access string = '0:1:2:3'
+ *
+ * Low-level spec has 1:1 mapping with each element of access string (it's
+ * just a parsed access string representation): [0, 1, 2, 3].
+ *
+ * High-level spec will capture only 3 points:
+ * - intial zero-index access by pointer (&s->... is the same as &s[0]...);
+ * - field 'a' access (corresponds to '2' in low-level spec);
+ * - array element #3 access (corresponds to '3' in low-level spec).
+ *
+ * Type-based relocations (TYPE_EXISTS/TYPE_SIZE,
+ * TYPE_ID_LOCAL/TYPE_ID_TARGET) don't capture any field information. Their
+ * spec and raw_spec are kept empty.
+ *
+ * Enum value-based relocations (ENUMVAL_EXISTS/ENUMVAL_VALUE) use access
+ * string to specify enumerator's value index that need to be relocated.
+ */
+static int bpf_core_parse_spec(const struct btf *btf,
+ __u32 type_id,
+ const char *spec_str,
+ enum bpf_core_relo_kind relo_kind,
+ struct bpf_core_spec *spec)
+{
+ int access_idx, parsed_len, i;
+ struct bpf_core_accessor *acc;
+ const struct btf_type *t;
+ const char *name;
+ __u32 id;
+ __s64 sz;
+
+ if (str_is_empty(spec_str) || *spec_str == ':')
+ return -EINVAL;
+
+ memset(spec, 0, sizeof(*spec));
+ spec->btf = btf;
+ spec->root_type_id = type_id;
+ spec->relo_kind = relo_kind;
+
+ /* type-based relocations don't have a field access string */
+ if (core_relo_is_type_based(relo_kind)) {
+ if (strcmp(spec_str, "0"))
+ return -EINVAL;
+ return 0;
+ }
+
+ /* parse spec_str="0:1:2:3:4" into array raw_spec=[0, 1, 2, 3, 4] */
+ while (*spec_str) {
+ if (*spec_str == ':')
+ ++spec_str;
+ if (sscanf(spec_str, "%d%n", &access_idx, &parsed_len) != 1)
+ return -EINVAL;
+ if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
+ return -E2BIG;
+ spec_str += parsed_len;
+ spec->raw_spec[spec->raw_len++] = access_idx;
+ }
+
+ if (spec->raw_len == 0)
+ return -EINVAL;
+
+ t = skip_mods_and_typedefs(btf, type_id, &id);
+ if (!t)
+ return -EINVAL;
+
+ access_idx = spec->raw_spec[0];
+ acc = &spec->spec[0];
+ acc->type_id = id;
+ acc->idx = access_idx;
+ spec->len++;
+
+ if (core_relo_is_enumval_based(relo_kind)) {
+ if (!btf_is_enum(t) || spec->raw_len > 1 || access_idx >= btf_vlen(t))
+ return -EINVAL;
+
+ /* record enumerator name in a first accessor */
+ acc->name = btf__name_by_offset(btf, btf_enum(t)[access_idx].name_off);
+ return 0;
+ }
+
+ if (!core_relo_is_field_based(relo_kind))
+ return -EINVAL;
+
+ sz = btf__resolve_size(btf, id);
+ if (sz < 0)
+ return sz;
+ spec->bit_offset = access_idx * sz * 8;
+
+ for (i = 1; i < spec->raw_len; i++) {
+ t = skip_mods_and_typedefs(btf, id, &id);
+ if (!t)
+ return -EINVAL;
+
+ access_idx = spec->raw_spec[i];
+ acc = &spec->spec[spec->len];
+
+ if (btf_is_composite(t)) {
+ const struct btf_member *m;
+ __u32 bit_offset;
+
+ if (access_idx >= btf_vlen(t))
+ return -EINVAL;
+
+ bit_offset = btf_member_bit_offset(t, access_idx);
+ spec->bit_offset += bit_offset;
+
+ m = btf_members(t) + access_idx;
+ if (m->name_off) {
+ name = btf__name_by_offset(btf, m->name_off);
+ if (str_is_empty(name))
+ return -EINVAL;
+
+ acc->type_id = id;
+ acc->idx = access_idx;
+ acc->name = name;
+ spec->len++;
+ }
+
+ id = m->type;
+ } else if (btf_is_array(t)) {
+ const struct btf_array *a = btf_array(t);
+ bool flex;
+
+ t = skip_mods_and_typedefs(btf, a->type, &id);
+ if (!t)
+ return -EINVAL;
+
+ flex = is_flex_arr(btf, acc - 1, a);
+ if (!flex && access_idx >= a->nelems)
+ return -EINVAL;
+
+ spec->spec[spec->len].type_id = id;
+ spec->spec[spec->len].idx = access_idx;
+ spec->len++;
+
+ sz = btf__resolve_size(btf, id);
+ if (sz < 0)
+ return sz;
+ spec->bit_offset += access_idx * sz * 8;
+ } else {
+ pr_warn("relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %s\n",
+ type_id, spec_str, i, id, btf_kind_str(t));
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+/* Check two types for compatibility for the purpose of field access
+ * relocation. const/volatile/restrict and typedefs are skipped to ensure we
+ * are relocating semantically compatible entities:
+ * - any two STRUCTs/UNIONs are compatible and can be mixed;
+ * - any two FWDs are compatible, if their names match (modulo flavor suffix);
+ * - any two PTRs are always compatible;
+ * - for ENUMs, names should be the same (ignoring flavor suffix) or at
+ * least one of enums should be anonymous;
+ * - for ENUMs, check sizes, names are ignored;
+ * - for INT, size and signedness are ignored;
+ * - any two FLOATs are always compatible;
+ * - for ARRAY, dimensionality is ignored, element types are checked for
+ * compatibility recursively;
+ * - everything else shouldn't be ever a target of relocation.
+ * These rules are not set in stone and probably will be adjusted as we get
+ * more experience with using BPF CO-RE relocations.
+ */
+static int bpf_core_fields_are_compat(const struct btf *local_btf,
+ __u32 local_id,
+ const struct btf *targ_btf,
+ __u32 targ_id)
+{
+ const struct btf_type *local_type, *targ_type;
+
+recur:
+ local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id);
+ targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
+ if (!local_type || !targ_type)
+ return -EINVAL;
+
+ if (btf_is_composite(local_type) && btf_is_composite(targ_type))
+ return 1;
+ if (btf_kind(local_type) != btf_kind(targ_type))
+ return 0;
+
+ switch (btf_kind(local_type)) {
+ case BTF_KIND_PTR:
+ case BTF_KIND_FLOAT:
+ return 1;
+ case BTF_KIND_FWD:
+ case BTF_KIND_ENUM: {
+ const char *local_name, *targ_name;
+ size_t local_len, targ_len;
+
+ local_name = btf__name_by_offset(local_btf,
+ local_type->name_off);
+ targ_name = btf__name_by_offset(targ_btf, targ_type->name_off);
+ local_len = bpf_core_essential_name_len(local_name);
+ targ_len = bpf_core_essential_name_len(targ_name);
+ /* one of them is anonymous or both w/ same flavor-less names */
+ return local_len == 0 || targ_len == 0 ||
+ (local_len == targ_len &&
+ strncmp(local_name, targ_name, local_len) == 0);
+ }
+ case BTF_KIND_INT:
+ /* just reject deprecated bitfield-like integers; all other
+ * integers are by default compatible between each other
+ */
+ return btf_int_offset(local_type) == 0 &&
+ btf_int_offset(targ_type) == 0;
+ case BTF_KIND_ARRAY:
+ local_id = btf_array(local_type)->type;
+ targ_id = btf_array(targ_type)->type;
+ goto recur;
+ default:
+ pr_warn("unexpected kind %d relocated, local [%d], target [%d]\n",
+ btf_kind(local_type), local_id, targ_id);
+ return 0;
+ }
+}
+
+/*
+ * Given single high-level named field accessor in local type, find
+ * corresponding high-level accessor for a target type. Along the way,
+ * maintain low-level spec for target as well. Also keep updating target
+ * bit offset.
+ *
+ * Searching is performed through recursive exhaustive enumeration of all
+ * fields of a struct/union. If there are any anonymous (embedded)
+ * structs/unions, they are recursively searched as well. If field with
+ * desired name is found, check compatibility between local and target types,
+ * before returning result.
+ *
+ * 1 is returned, if field is found.
+ * 0 is returned if no compatible field is found.
+ * <0 is returned on error.
+ */
+static int bpf_core_match_member(const struct btf *local_btf,
+ const struct bpf_core_accessor *local_acc,
+ const struct btf *targ_btf,
+ __u32 targ_id,
+ struct bpf_core_spec *spec,
+ __u32 *next_targ_id)
+{
+ const struct btf_type *local_type, *targ_type;
+ const struct btf_member *local_member, *m;
+ const char *local_name, *targ_name;
+ __u32 local_id;
+ int i, n, found;
+
+ targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
+ if (!targ_type)
+ return -EINVAL;
+ if (!btf_is_composite(targ_type))
+ return 0;
+
+ local_id = local_acc->type_id;
+ local_type = btf__type_by_id(local_btf, local_id);
+ local_member = btf_members(local_type) + local_acc->idx;
+ local_name = btf__name_by_offset(local_btf, local_member->name_off);
+
+ n = btf_vlen(targ_type);
+ m = btf_members(targ_type);
+ for (i = 0; i < n; i++, m++) {
+ __u32 bit_offset;
+
+ bit_offset = btf_member_bit_offset(targ_type, i);
+
+ /* too deep struct/union/array nesting */
+ if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
+ return -E2BIG;
+
+ /* speculate this member will be the good one */
+ spec->bit_offset += bit_offset;
+ spec->raw_spec[spec->raw_len++] = i;
+
+ targ_name = btf__name_by_offset(targ_btf, m->name_off);
+ if (str_is_empty(targ_name)) {
+ /* embedded struct/union, we need to go deeper */
+ found = bpf_core_match_member(local_btf, local_acc,
+ targ_btf, m->type,
+ spec, next_targ_id);
+ if (found) /* either found or error */
+ return found;
+ } else if (strcmp(local_name, targ_name) == 0) {
+ /* matching named field */
+ struct bpf_core_accessor *targ_acc;
+
+ targ_acc = &spec->spec[spec->len++];
+ targ_acc->type_id = targ_id;
+ targ_acc->idx = i;
+ targ_acc->name = targ_name;
+
+ *next_targ_id = m->type;
+ found = bpf_core_fields_are_compat(local_btf,
+ local_member->type,
+ targ_btf, m->type);
+ if (!found)
+ spec->len--; /* pop accessor */
+ return found;
+ }
+ /* member turned out not to be what we looked for */
+ spec->bit_offset -= bit_offset;
+ spec->raw_len--;
+ }
+
+ return 0;
+}
+
+/*
+ * Try to match local spec to a target type and, if successful, produce full
+ * target spec (high-level, low-level + bit offset).
+ */
+static int bpf_core_spec_match(struct bpf_core_spec *local_spec,
+ const struct btf *targ_btf, __u32 targ_id,
+ struct bpf_core_spec *targ_spec)
+{
+ const struct btf_type *targ_type;
+ const struct bpf_core_accessor *local_acc;
+ struct bpf_core_accessor *targ_acc;
+ int i, sz, matched;
+
+ memset(targ_spec, 0, sizeof(*targ_spec));
+ targ_spec->btf = targ_btf;
+ targ_spec->root_type_id = targ_id;
+ targ_spec->relo_kind = local_spec->relo_kind;
+
+ if (core_relo_is_type_based(local_spec->relo_kind)) {
+ return bpf_core_types_are_compat(local_spec->btf,
+ local_spec->root_type_id,
+ targ_btf, targ_id);
+ }
+
+ local_acc = &local_spec->spec[0];
+ targ_acc = &targ_spec->spec[0];
+
+ if (core_relo_is_enumval_based(local_spec->relo_kind)) {
+ size_t local_essent_len, targ_essent_len;
+ const struct btf_enum *e;
+ const char *targ_name;
+
+ /* has to resolve to an enum */
+ targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id, &targ_id);
+ if (!btf_is_enum(targ_type))
+ return 0;
+
+ local_essent_len = bpf_core_essential_name_len(local_acc->name);
+
+ for (i = 0, e = btf_enum(targ_type); i < btf_vlen(targ_type); i++, e++) {
+ targ_name = btf__name_by_offset(targ_spec->btf, e->name_off);
+ targ_essent_len = bpf_core_essential_name_len(targ_name);
+ if (targ_essent_len != local_essent_len)
+ continue;
+ if (strncmp(local_acc->name, targ_name, local_essent_len) == 0) {
+ targ_acc->type_id = targ_id;
+ targ_acc->idx = i;
+ targ_acc->name = targ_name;
+ targ_spec->len++;
+ targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx;
+ targ_spec->raw_len++;
+ return 1;
+ }
+ }
+ return 0;
+ }
+
+ if (!core_relo_is_field_based(local_spec->relo_kind))
+ return -EINVAL;
+
+ for (i = 0; i < local_spec->len; i++, local_acc++, targ_acc++) {
+ targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id,
+ &targ_id);
+ if (!targ_type)
+ return -EINVAL;
+
+ if (local_acc->name) {
+ matched = bpf_core_match_member(local_spec->btf,
+ local_acc,
+ targ_btf, targ_id,
+ targ_spec, &targ_id);
+ if (matched <= 0)
+ return matched;
+ } else {
+ /* for i=0, targ_id is already treated as array element
+ * type (because it's the original struct), for others
+ * we should find array element type first
+ */
+ if (i > 0) {
+ const struct btf_array *a;
+ bool flex;
+
+ if (!btf_is_array(targ_type))
+ return 0;
+
+ a = btf_array(targ_type);
+ flex = is_flex_arr(targ_btf, targ_acc - 1, a);
+ if (!flex && local_acc->idx >= a->nelems)
+ return 0;
+ if (!skip_mods_and_typedefs(targ_btf, a->type,
+ &targ_id))
+ return -EINVAL;
+ }
+
+ /* too deep struct/union/array nesting */
+ if (targ_spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
+ return -E2BIG;
+
+ targ_acc->type_id = targ_id;
+ targ_acc->idx = local_acc->idx;
+ targ_acc->name = NULL;
+ targ_spec->len++;
+ targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx;
+ targ_spec->raw_len++;
+
+ sz = btf__resolve_size(targ_btf, targ_id);
+ if (sz < 0)
+ return sz;
+ targ_spec->bit_offset += local_acc->idx * sz * 8;
+ }
+ }
+
+ return 1;
+}
+
+static int bpf_core_calc_field_relo(const char *prog_name,
+ const struct bpf_core_relo *relo,
+ const struct bpf_core_spec *spec,
+ __u32 *val, __u32 *field_sz, __u32 *type_id,
+ bool *validate)
+{
+ const struct bpf_core_accessor *acc;
+ const struct btf_type *t;
+ __u32 byte_off, byte_sz, bit_off, bit_sz, field_type_id;
+ const struct btf_member *m;
+ const struct btf_type *mt;
+ bool bitfield;
+ __s64 sz;
+
+ *field_sz = 0;
+
+ if (relo->kind == BPF_FIELD_EXISTS) {
+ *val = spec ? 1 : 0;
+ return 0;
+ }
+
+ if (!spec)
+ return -EUCLEAN; /* request instruction poisoning */
+
+ acc = &spec->spec[spec->len - 1];
+ t = btf__type_by_id(spec->btf, acc->type_id);
+
+ /* a[n] accessor needs special handling */
+ if (!acc->name) {
+ if (relo->kind == BPF_FIELD_BYTE_OFFSET) {
+ *val = spec->bit_offset / 8;
+ /* remember field size for load/store mem size */
+ sz = btf__resolve_size(spec->btf, acc->type_id);
+ if (sz < 0)
+ return -EINVAL;
+ *field_sz = sz;
+ *type_id = acc->type_id;
+ } else if (relo->kind == BPF_FIELD_BYTE_SIZE) {
+ sz = btf__resolve_size(spec->btf, acc->type_id);
+ if (sz < 0)
+ return -EINVAL;
+ *val = sz;
+ } else {
+ pr_warn("prog '%s': relo %d at insn #%d can't be applied to array access\n",
+ prog_name, relo->kind, relo->insn_off / 8);
+ return -EINVAL;
+ }
+ if (validate)
+ *validate = true;
+ return 0;
+ }
+
+ m = btf_members(t) + acc->idx;
+ mt = skip_mods_and_typedefs(spec->btf, m->type, &field_type_id);
+ bit_off = spec->bit_offset;
+ bit_sz = btf_member_bitfield_size(t, acc->idx);
+
+ bitfield = bit_sz > 0;
+ if (bitfield) {
+ byte_sz = mt->size;
+ byte_off = bit_off / 8 / byte_sz * byte_sz;
+ /* figure out smallest int size necessary for bitfield load */
+ while (bit_off + bit_sz - byte_off * 8 > byte_sz * 8) {
+ if (byte_sz >= 8) {
+ /* bitfield can't be read with 64-bit read */
+ pr_warn("prog '%s': relo %d at insn #%d can't be satisfied for bitfield\n",
+ prog_name, relo->kind, relo->insn_off / 8);
+ return -E2BIG;
+ }
+ byte_sz *= 2;
+ byte_off = bit_off / 8 / byte_sz * byte_sz;
+ }
+ } else {
+ sz = btf__resolve_size(spec->btf, field_type_id);
+ if (sz < 0)
+ return -EINVAL;
+ byte_sz = sz;
+ byte_off = spec->bit_offset / 8;
+ bit_sz = byte_sz * 8;
+ }
+
+ /* for bitfields, all the relocatable aspects are ambiguous and we
+ * might disagree with compiler, so turn off validation of expected
+ * value, except for signedness
+ */
+ if (validate)
+ *validate = !bitfield;
+
+ switch (relo->kind) {
+ case BPF_FIELD_BYTE_OFFSET:
+ *val = byte_off;
+ if (!bitfield) {
+ *field_sz = byte_sz;
+ *type_id = field_type_id;
+ }
+ break;
+ case BPF_FIELD_BYTE_SIZE:
+ *val = byte_sz;
+ break;
+ case BPF_FIELD_SIGNED:
+ /* enums will be assumed unsigned */
+ *val = btf_is_enum(mt) ||
+ (btf_int_encoding(mt) & BTF_INT_SIGNED);
+ if (validate)
+ *validate = true; /* signedness is never ambiguous */
+ break;
+ case BPF_FIELD_LSHIFT_U64:
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ *val = 64 - (bit_off + bit_sz - byte_off * 8);
+#else
+ *val = (8 - byte_sz) * 8 + (bit_off - byte_off * 8);
+#endif
+ break;
+ case BPF_FIELD_RSHIFT_U64:
+ *val = 64 - bit_sz;
+ if (validate)
+ *validate = true; /* right shift is never ambiguous */
+ break;
+ case BPF_FIELD_EXISTS:
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static int bpf_core_calc_type_relo(const struct bpf_core_relo *relo,
+ const struct bpf_core_spec *spec,
+ __u32 *val)
+{
+ __s64 sz;
+
+ /* type-based relos return zero when target type is not found */
+ if (!spec) {
+ *val = 0;
+ return 0;
+ }
+
+ switch (relo->kind) {
+ case BPF_TYPE_ID_TARGET:
+ *val = spec->root_type_id;
+ break;
+ case BPF_TYPE_EXISTS:
+ *val = 1;
+ break;
+ case BPF_TYPE_SIZE:
+ sz = btf__resolve_size(spec->btf, spec->root_type_id);
+ if (sz < 0)
+ return -EINVAL;
+ *val = sz;
+ break;
+ case BPF_TYPE_ID_LOCAL:
+ /* BPF_TYPE_ID_LOCAL is handled specially and shouldn't get here */
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static int bpf_core_calc_enumval_relo(const struct bpf_core_relo *relo,
+ const struct bpf_core_spec *spec,
+ __u32 *val)
+{
+ const struct btf_type *t;
+ const struct btf_enum *e;
+
+ switch (relo->kind) {
+ case BPF_ENUMVAL_EXISTS:
+ *val = spec ? 1 : 0;
+ break;
+ case BPF_ENUMVAL_VALUE:
+ if (!spec)
+ return -EUCLEAN; /* request instruction poisoning */
+ t = btf__type_by_id(spec->btf, spec->spec[0].type_id);
+ e = btf_enum(t) + spec->spec[0].idx;
+ *val = e->val;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+struct bpf_core_relo_res
+{
+ /* expected value in the instruction, unless validate == false */
+ __u32 orig_val;
+ /* new value that needs to be patched up to */
+ __u32 new_val;
+ /* relocation unsuccessful, poison instruction, but don't fail load */
+ bool poison;
+ /* some relocations can't be validated against orig_val */
+ bool validate;
+ /* for field byte offset relocations or the forms:
+ * *(T *)(rX + <off>) = rY
+ * rX = *(T *)(rY + <off>),
+ * we remember original and resolved field size to adjust direct
+ * memory loads of pointers and integers; this is necessary for 32-bit
+ * host kernel architectures, but also allows to automatically
+ * relocate fields that were resized from, e.g., u32 to u64, etc.
+ */
+ bool fail_memsz_adjust;
+ __u32 orig_sz;
+ __u32 orig_type_id;
+ __u32 new_sz;
+ __u32 new_type_id;
+};
+
+/* Calculate original and target relocation values, given local and target
+ * specs and relocation kind. These values are calculated for each candidate.
+ * If there are multiple candidates, resulting values should all be consistent
+ * with each other. Otherwise, libbpf will refuse to proceed due to ambiguity.
+ * If instruction has to be poisoned, *poison will be set to true.
+ */
+static int bpf_core_calc_relo(const char *prog_name,
+ const struct bpf_core_relo *relo,
+ int relo_idx,
+ const struct bpf_core_spec *local_spec,
+ const struct bpf_core_spec *targ_spec,
+ struct bpf_core_relo_res *res)
+{
+ int err = -EOPNOTSUPP;
+
+ res->orig_val = 0;
+ res->new_val = 0;
+ res->poison = false;
+ res->validate = true;
+ res->fail_memsz_adjust = false;
+ res->orig_sz = res->new_sz = 0;
+ res->orig_type_id = res->new_type_id = 0;
+
+ if (core_relo_is_field_based(relo->kind)) {
+ err = bpf_core_calc_field_relo(prog_name, relo, local_spec,
+ &res->orig_val, &res->orig_sz,
+ &res->orig_type_id, &res->validate);
+ err = err ?: bpf_core_calc_field_relo(prog_name, relo, targ_spec,
+ &res->new_val, &res->new_sz,
+ &res->new_type_id, NULL);
+ if (err)
+ goto done;
+ /* Validate if it's safe to adjust load/store memory size.
+ * Adjustments are performed only if original and new memory
+ * sizes differ.
+ */
+ res->fail_memsz_adjust = false;
+ if (res->orig_sz != res->new_sz) {
+ const struct btf_type *orig_t, *new_t;
+
+ orig_t = btf__type_by_id(local_spec->btf, res->orig_type_id);
+ new_t = btf__type_by_id(targ_spec->btf, res->new_type_id);
+
+ /* There are two use cases in which it's safe to
+ * adjust load/store's mem size:
+ * - reading a 32-bit kernel pointer, while on BPF
+ * size pointers are always 64-bit; in this case
+ * it's safe to "downsize" instruction size due to
+ * pointer being treated as unsigned integer with
+ * zero-extended upper 32-bits;
+ * - reading unsigned integers, again due to
+ * zero-extension is preserving the value correctly.
+ *
+ * In all other cases it's incorrect to attempt to
+ * load/store field because read value will be
+ * incorrect, so we poison relocated instruction.
+ */
+ if (btf_is_ptr(orig_t) && btf_is_ptr(new_t))
+ goto done;
+ if (btf_is_int(orig_t) && btf_is_int(new_t) &&
+ btf_int_encoding(orig_t) != BTF_INT_SIGNED &&
+ btf_int_encoding(new_t) != BTF_INT_SIGNED)
+ goto done;
+
+ /* mark as invalid mem size adjustment, but this will
+ * only be checked for LDX/STX/ST insns
+ */
+ res->fail_memsz_adjust = true;
+ }
+ } else if (core_relo_is_type_based(relo->kind)) {
+ err = bpf_core_calc_type_relo(relo, local_spec, &res->orig_val);
+ err = err ?: bpf_core_calc_type_relo(relo, targ_spec, &res->new_val);
+ } else if (core_relo_is_enumval_based(relo->kind)) {
+ err = bpf_core_calc_enumval_relo(relo, local_spec, &res->orig_val);
+ err = err ?: bpf_core_calc_enumval_relo(relo, targ_spec, &res->new_val);
+ }
+
+done:
+ if (err == -EUCLEAN) {
+ /* EUCLEAN is used to signal instruction poisoning request */
+ res->poison = true;
+ err = 0;
+ } else if (err == -EOPNOTSUPP) {
+ /* EOPNOTSUPP means unknown/unsupported relocation */
+ pr_warn("prog '%s': relo #%d: unrecognized CO-RE relocation %s (%d) at insn #%d\n",
+ prog_name, relo_idx, core_relo_kind_str(relo->kind),
+ relo->kind, relo->insn_off / 8);
+ }
+
+ return err;
+}
+
+/*
+ * Turn instruction for which CO_RE relocation failed into invalid one with
+ * distinct signature.
+ */
+static void bpf_core_poison_insn(const char *prog_name, int relo_idx,
+ int insn_idx, struct bpf_insn *insn)
+{
+ pr_debug("prog '%s': relo #%d: substituting insn #%d w/ invalid insn\n",
+ prog_name, relo_idx, insn_idx);
+ insn->code = BPF_JMP | BPF_CALL;
+ insn->dst_reg = 0;
+ insn->src_reg = 0;
+ insn->off = 0;
+ /* if this instruction is reachable (not a dead code),
+ * verifier will complain with the following message:
+ * invalid func unknown#195896080
+ */
+ insn->imm = 195896080; /* => 0xbad2310 => "bad relo" */
+}
+
+static int insn_bpf_size_to_bytes(struct bpf_insn *insn)
+{
+ switch (BPF_SIZE(insn->code)) {
+ case BPF_DW: return 8;
+ case BPF_W: return 4;
+ case BPF_H: return 2;
+ case BPF_B: return 1;
+ default: return -1;
+ }
+}
+
+static int insn_bytes_to_bpf_size(__u32 sz)
+{
+ switch (sz) {
+ case 8: return BPF_DW;
+ case 4: return BPF_W;
+ case 2: return BPF_H;
+ case 1: return BPF_B;
+ default: return -1;
+ }
+}
+
+/*
+ * Patch relocatable BPF instruction.
+ *
+ * Patched value is determined by relocation kind and target specification.
+ * For existence relocations target spec will be NULL if field/type is not found.
+ * Expected insn->imm value is determined using relocation kind and local
+ * spec, and is checked before patching instruction. If actual insn->imm value
+ * is wrong, bail out with error.
+ *
+ * Currently supported classes of BPF instruction are:
+ * 1. rX = <imm> (assignment with immediate operand);
+ * 2. rX += <imm> (arithmetic operations with immediate operand);
+ * 3. rX = <imm64> (load with 64-bit immediate value);
+ * 4. rX = *(T *)(rY + <off>), where T is one of {u8, u16, u32, u64};
+ * 5. *(T *)(rX + <off>) = rY, where T is one of {u8, u16, u32, u64};
+ * 6. *(T *)(rX + <off>) = <imm>, where T is one of {u8, u16, u32, u64}.
+ */
+static int bpf_core_patch_insn(const char *prog_name, struct bpf_insn *insn,
+ int insn_idx, const struct bpf_core_relo *relo,
+ int relo_idx, const struct bpf_core_relo_res *res)
+{
+ __u32 orig_val, new_val;
+ __u8 class;
+
+ class = BPF_CLASS(insn->code);
+
+ if (res->poison) {
+poison:
+ /* poison second part of ldimm64 to avoid confusing error from
+ * verifier about "unknown opcode 00"
+ */
+ if (is_ldimm64_insn(insn))
+ bpf_core_poison_insn(prog_name, relo_idx, insn_idx + 1, insn + 1);
+ bpf_core_poison_insn(prog_name, relo_idx, insn_idx, insn);
+ return 0;
+ }
+
+ orig_val = res->orig_val;
+ new_val = res->new_val;
+
+ switch (class) {
+ case BPF_ALU:
+ case BPF_ALU64:
+ if (BPF_SRC(insn->code) != BPF_K)
+ return -EINVAL;
+ if (res->validate && insn->imm != orig_val) {
+ pr_warn("prog '%s': relo #%d: unexpected insn #%d (ALU/ALU64) value: got %u, exp %u -> %u\n",
+ prog_name, relo_idx,
+ insn_idx, insn->imm, orig_val, new_val);
+ return -EINVAL;
+ }
+ orig_val = insn->imm;
+ insn->imm = new_val;
+ pr_debug("prog '%s': relo #%d: patched insn #%d (ALU/ALU64) imm %u -> %u\n",
+ prog_name, relo_idx, insn_idx,
+ orig_val, new_val);
+ break;
+ case BPF_LDX:
+ case BPF_ST:
+ case BPF_STX:
+ if (res->validate && insn->off != orig_val) {
+ pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDX/ST/STX) value: got %u, exp %u -> %u\n",
+ prog_name, relo_idx, insn_idx, insn->off, orig_val, new_val);
+ return -EINVAL;
+ }
+ if (new_val > SHRT_MAX) {
+ pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) value too big: %u\n",
+ prog_name, relo_idx, insn_idx, new_val);
+ return -ERANGE;
+ }
+ if (res->fail_memsz_adjust) {
+ pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) accesses field incorrectly. "
+ "Make sure you are accessing pointers, unsigned integers, or fields of matching type and size.\n",
+ prog_name, relo_idx, insn_idx);
+ goto poison;
+ }
+
+ orig_val = insn->off;
+ insn->off = new_val;
+ pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) off %u -> %u\n",
+ prog_name, relo_idx, insn_idx, orig_val, new_val);
+
+ if (res->new_sz != res->orig_sz) {
+ int insn_bytes_sz, insn_bpf_sz;
+
+ insn_bytes_sz = insn_bpf_size_to_bytes(insn);
+ if (insn_bytes_sz != res->orig_sz) {
+ pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) unexpected mem size: got %d, exp %u\n",
+ prog_name, relo_idx, insn_idx, insn_bytes_sz, res->orig_sz);
+ return -EINVAL;
+ }
+
+ insn_bpf_sz = insn_bytes_to_bpf_size(res->new_sz);
+ if (insn_bpf_sz < 0) {
+ pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) invalid new mem size: %u\n",
+ prog_name, relo_idx, insn_idx, res->new_sz);
+ return -EINVAL;
+ }
+
+ insn->code = BPF_MODE(insn->code) | insn_bpf_sz | BPF_CLASS(insn->code);
+ pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) mem_sz %u -> %u\n",
+ prog_name, relo_idx, insn_idx, res->orig_sz, res->new_sz);
+ }
+ break;
+ case BPF_LD: {
+ __u64 imm;
+
+ if (!is_ldimm64_insn(insn) ||
+ insn[0].src_reg != 0 || insn[0].off != 0 ||
+ insn[1].code != 0 || insn[1].dst_reg != 0 ||
+ insn[1].src_reg != 0 || insn[1].off != 0) {
+ pr_warn("prog '%s': relo #%d: insn #%d (LDIMM64) has unexpected form\n",
+ prog_name, relo_idx, insn_idx);
+ return -EINVAL;
+ }
+
+ imm = insn[0].imm + ((__u64)insn[1].imm << 32);
+ if (res->validate && imm != orig_val) {
+ pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDIMM64) value: got %llu, exp %u -> %u\n",
+ prog_name, relo_idx,
+ insn_idx, (unsigned long long)imm,
+ orig_val, new_val);
+ return -EINVAL;
+ }
+
+ insn[0].imm = new_val;
+ insn[1].imm = 0; /* currently only 32-bit values are supported */
+ pr_debug("prog '%s': relo #%d: patched insn #%d (LDIMM64) imm64 %llu -> %u\n",
+ prog_name, relo_idx, insn_idx,
+ (unsigned long long)imm, new_val);
+ break;
+ }
+ default:
+ pr_warn("prog '%s': relo #%d: trying to relocate unrecognized insn #%d, code:0x%x, src:0x%x, dst:0x%x, off:0x%x, imm:0x%x\n",
+ prog_name, relo_idx, insn_idx, insn->code,
+ insn->src_reg, insn->dst_reg, insn->off, insn->imm);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/* Output spec definition in the format:
+ * [<type-id>] (<type-name>) + <raw-spec> => <offset>@<spec>,
+ * where <spec> is a C-syntax view of recorded field access, e.g.: x.a[3].b
+ */
+static void bpf_core_dump_spec(int level, const struct bpf_core_spec *spec)
+{
+ const struct btf_type *t;
+ const struct btf_enum *e;
+ const char *s;
+ __u32 type_id;
+ int i;
+
+ type_id = spec->root_type_id;
+ t = btf__type_by_id(spec->btf, type_id);
+ s = btf__name_by_offset(spec->btf, t->name_off);
+
+ libbpf_print(level, "[%u] %s %s", type_id, btf_kind_str(t), str_is_empty(s) ? "<anon>" : s);
+
+ if (core_relo_is_type_based(spec->relo_kind))
+ return;
+
+ if (core_relo_is_enumval_based(spec->relo_kind)) {
+ t = skip_mods_and_typedefs(spec->btf, type_id, NULL);
+ e = btf_enum(t) + spec->raw_spec[0];
+ s = btf__name_by_offset(spec->btf, e->name_off);
+
+ libbpf_print(level, "::%s = %u", s, e->val);
+ return;
+ }
+
+ if (core_relo_is_field_based(spec->relo_kind)) {
+ for (i = 0; i < spec->len; i++) {
+ if (spec->spec[i].name)
+ libbpf_print(level, ".%s", spec->spec[i].name);
+ else if (i > 0 || spec->spec[i].idx > 0)
+ libbpf_print(level, "[%u]", spec->spec[i].idx);
+ }
+
+ libbpf_print(level, " (");
+ for (i = 0; i < spec->raw_len; i++)
+ libbpf_print(level, "%s%d", i == 0 ? "" : ":", spec->raw_spec[i]);
+
+ if (spec->bit_offset % 8)
+ libbpf_print(level, " @ offset %u.%u)",
+ spec->bit_offset / 8, spec->bit_offset % 8);
+ else
+ libbpf_print(level, " @ offset %u)", spec->bit_offset / 8);
+ return;
+ }
+}
+
+/*
+ * CO-RE relocate single instruction.
+ *
+ * The outline and important points of the algorithm:
+ * 1. For given local type, find corresponding candidate target types.
+ * Candidate type is a type with the same "essential" name, ignoring
+ * everything after last triple underscore (___). E.g., `sample`,
+ * `sample___flavor_one`, `sample___flavor_another_one`, are all candidates
+ * for each other. Names with triple underscore are referred to as
+ * "flavors" and are useful, among other things, to allow to
+ * specify/support incompatible variations of the same kernel struct, which
+ * might differ between different kernel versions and/or build
+ * configurations.
+ *
+ * N.B. Struct "flavors" could be generated by bpftool's BTF-to-C
+ * converter, when deduplicated BTF of a kernel still contains more than
+ * one different types with the same name. In that case, ___2, ___3, etc
+ * are appended starting from second name conflict. But start flavors are
+ * also useful to be defined "locally", in BPF program, to extract same
+ * data from incompatible changes between different kernel
+ * versions/configurations. For instance, to handle field renames between
+ * kernel versions, one can use two flavors of the struct name with the
+ * same common name and use conditional relocations to extract that field,
+ * depending on target kernel version.
+ * 2. For each candidate type, try to match local specification to this
+ * candidate target type. Matching involves finding corresponding
+ * high-level spec accessors, meaning that all named fields should match,
+ * as well as all array accesses should be within the actual bounds. Also,
+ * types should be compatible (see bpf_core_fields_are_compat for details).
+ * 3. It is supported and expected that there might be multiple flavors
+ * matching the spec. As long as all the specs resolve to the same set of
+ * offsets across all candidates, there is no error. If there is any
+ * ambiguity, CO-RE relocation will fail. This is necessary to accomodate
+ * imprefection of BTF deduplication, which can cause slight duplication of
+ * the same BTF type, if some directly or indirectly referenced (by
+ * pointer) type gets resolved to different actual types in different
+ * object files. If such situation occurs, deduplicated BTF will end up
+ * with two (or more) structurally identical types, which differ only in
+ * types they refer to through pointer. This should be OK in most cases and
+ * is not an error.
+ * 4. Candidate types search is performed by linearly scanning through all
+ * types in target BTF. It is anticipated that this is overall more
+ * efficient memory-wise and not significantly worse (if not better)
+ * CPU-wise compared to prebuilding a map from all local type names to
+ * a list of candidate type names. It's also sped up by caching resolved
+ * list of matching candidates per each local "root" type ID, that has at
+ * least one bpf_core_relo associated with it. This list is shared
+ * between multiple relocations for the same type ID and is updated as some
+ * of the candidates are pruned due to structural incompatibility.
+ */
+int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn,
+ int insn_idx,
+ const struct bpf_core_relo *relo,
+ int relo_idx,
+ const struct btf *local_btf,
+ struct bpf_core_cand_list *cands)
+{
+ struct bpf_core_spec local_spec, cand_spec, targ_spec = {};
+ struct bpf_core_relo_res cand_res, targ_res;
+ const struct btf_type *local_type;
+ const char *local_name;
+ __u32 local_id;
+ const char *spec_str;
+ int i, j, err;
+
+ local_id = relo->type_id;
+ local_type = btf__type_by_id(local_btf, local_id);
+ if (!local_type)
+ return -EINVAL;
+
+ local_name = btf__name_by_offset(local_btf, local_type->name_off);
+ if (!local_name)
+ return -EINVAL;
+
+ spec_str = btf__name_by_offset(local_btf, relo->access_str_off);
+ if (str_is_empty(spec_str))
+ return -EINVAL;
+
+ err = bpf_core_parse_spec(local_btf, local_id, spec_str, relo->kind, &local_spec);
+ if (err) {
+ pr_warn("prog '%s': relo #%d: parsing [%d] %s %s + %s failed: %d\n",
+ prog_name, relo_idx, local_id, btf_kind_str(local_type),
+ str_is_empty(local_name) ? "<anon>" : local_name,
+ spec_str, err);
+ return -EINVAL;
+ }
+
+ pr_debug("prog '%s': relo #%d: kind <%s> (%d), spec is ", prog_name,
+ relo_idx, core_relo_kind_str(relo->kind), relo->kind);
+ bpf_core_dump_spec(LIBBPF_DEBUG, &local_spec);
+ libbpf_print(LIBBPF_DEBUG, "\n");
+
+ /* TYPE_ID_LOCAL relo is special and doesn't need candidate search */
+ if (relo->kind == BPF_TYPE_ID_LOCAL) {
+ targ_res.validate = true;
+ targ_res.poison = false;
+ targ_res.orig_val = local_spec.root_type_id;
+ targ_res.new_val = local_spec.root_type_id;
+ goto patch_insn;
+ }
+
+ /* libbpf doesn't support candidate search for anonymous types */
+ if (str_is_empty(spec_str)) {
+ pr_warn("prog '%s': relo #%d: <%s> (%d) relocation doesn't support anonymous types\n",
+ prog_name, relo_idx, core_relo_kind_str(relo->kind), relo->kind);
+ return -EOPNOTSUPP;
+ }
+
+
+ for (i = 0, j = 0; i < cands->len; i++) {
+ err = bpf_core_spec_match(&local_spec, cands->cands[i].btf,
+ cands->cands[i].id, &cand_spec);
+ if (err < 0) {
+ pr_warn("prog '%s': relo #%d: error matching candidate #%d ",
+ prog_name, relo_idx, i);
+ bpf_core_dump_spec(LIBBPF_WARN, &cand_spec);
+ libbpf_print(LIBBPF_WARN, ": %d\n", err);
+ return err;
+ }
+
+ pr_debug("prog '%s': relo #%d: %s candidate #%d ", prog_name,
+ relo_idx, err == 0 ? "non-matching" : "matching", i);
+ bpf_core_dump_spec(LIBBPF_DEBUG, &cand_spec);
+ libbpf_print(LIBBPF_DEBUG, "\n");
+
+ if (err == 0)
+ continue;
+
+ err = bpf_core_calc_relo(prog_name, relo, relo_idx, &local_spec, &cand_spec, &cand_res);
+ if (err)
+ return err;
+
+ if (j == 0) {
+ targ_res = cand_res;
+ targ_spec = cand_spec;
+ } else if (cand_spec.bit_offset != targ_spec.bit_offset) {
+ /* if there are many field relo candidates, they
+ * should all resolve to the same bit offset
+ */
+ pr_warn("prog '%s': relo #%d: field offset ambiguity: %u != %u\n",
+ prog_name, relo_idx, cand_spec.bit_offset,
+ targ_spec.bit_offset);
+ return -EINVAL;
+ } else if (cand_res.poison != targ_res.poison || cand_res.new_val != targ_res.new_val) {
+ /* all candidates should result in the same relocation
+ * decision and value, otherwise it's dangerous to
+ * proceed due to ambiguity
+ */
+ pr_warn("prog '%s': relo #%d: relocation decision ambiguity: %s %u != %s %u\n",
+ prog_name, relo_idx,
+ cand_res.poison ? "failure" : "success", cand_res.new_val,
+ targ_res.poison ? "failure" : "success", targ_res.new_val);
+ return -EINVAL;
+ }
+
+ cands->cands[j++] = cands->cands[i];
+ }
+
+ /*
+ * For BPF_FIELD_EXISTS relo or when used BPF program has field
+ * existence checks or kernel version/config checks, it's expected
+ * that we might not find any candidates. In this case, if field
+ * wasn't found in any candidate, the list of candidates shouldn't
+ * change at all, we'll just handle relocating appropriately,
+ * depending on relo's kind.
+ */
+ if (j > 0)
+ cands->len = j;
+
+ /*
+ * If no candidates were found, it might be both a programmer error,
+ * as well as expected case, depending whether instruction w/
+ * relocation is guarded in some way that makes it unreachable (dead
+ * code) if relocation can't be resolved. This is handled in
+ * bpf_core_patch_insn() uniformly by replacing that instruction with
+ * BPF helper call insn (using invalid helper ID). If that instruction
+ * is indeed unreachable, then it will be ignored and eliminated by
+ * verifier. If it was an error, then verifier will complain and point
+ * to a specific instruction number in its log.
+ */
+ if (j == 0) {
+ pr_debug("prog '%s': relo #%d: no matching targets found\n",
+ prog_name, relo_idx);
+
+ /* calculate single target relo result explicitly */
+ err = bpf_core_calc_relo(prog_name, relo, relo_idx, &local_spec, NULL, &targ_res);
+ if (err)
+ return err;
+ }
+
+patch_insn:
+ /* bpf_core_patch_insn() should know how to handle missing targ_spec */
+ err = bpf_core_patch_insn(prog_name, insn, insn_idx, relo, relo_idx, &targ_res);
+ if (err) {
+ pr_warn("prog '%s': relo #%d: failed to patch insn #%u: %d\n",
+ prog_name, relo_idx, relo->insn_off / 8, err);
+ return -EINVAL;
+ }
+
+ return 0;
+}
--- /dev/null
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+/* Copyright (c) 2019 Facebook */
+
+#ifndef __RELO_CORE_H
+#define __RELO_CORE_H
+
+/* bpf_core_relo_kind encodes which aspect of captured field/type/enum value
+ * has to be adjusted by relocations.
+ */
+enum bpf_core_relo_kind {
+ BPF_FIELD_BYTE_OFFSET = 0, /* field byte offset */
+ BPF_FIELD_BYTE_SIZE = 1, /* field size in bytes */
+ BPF_FIELD_EXISTS = 2, /* field existence in target kernel */
+ BPF_FIELD_SIGNED = 3, /* field signedness (0 - unsigned, 1 - signed) */
+ BPF_FIELD_LSHIFT_U64 = 4, /* bitfield-specific left bitshift */
+ BPF_FIELD_RSHIFT_U64 = 5, /* bitfield-specific right bitshift */
+ BPF_TYPE_ID_LOCAL = 6, /* type ID in local BPF object */
+ BPF_TYPE_ID_TARGET = 7, /* type ID in target kernel */
+ BPF_TYPE_EXISTS = 8, /* type existence in target kernel */
+ BPF_TYPE_SIZE = 9, /* type size in bytes */
+ BPF_ENUMVAL_EXISTS = 10, /* enum value existence in target kernel */
+ BPF_ENUMVAL_VALUE = 11, /* enum value integer value */
+};
+
+/* The minimum bpf_core_relo checked by the loader
+ *
+ * CO-RE relocation captures the following data:
+ * - insn_off - instruction offset (in bytes) within a BPF program that needs
+ * its insn->imm field to be relocated with actual field info;
+ * - type_id - BTF type ID of the "root" (containing) entity of a relocatable
+ * type or field;
+ * - access_str_off - offset into corresponding .BTF string section. String
+ * interpretation depends on specific relocation kind:
+ * - for field-based relocations, string encodes an accessed field using
+ * a sequence of field and array indices, separated by colon (:). It's
+ * conceptually very close to LLVM's getelementptr ([0]) instruction's
+ * arguments for identifying offset to a field.
+ * - for type-based relocations, strings is expected to be just "0";
+ * - for enum value-based relocations, string contains an index of enum
+ * value within its enum type;
+ *
+ * Example to provide a better feel.
+ *
+ * struct sample {
+ * int a;
+ * struct {
+ * int b[10];
+ * };
+ * };
+ *
+ * struct sample *s = ...;
+ * int x = &s->a; // encoded as "0:0" (a is field #0)
+ * int y = &s->b[5]; // encoded as "0:1:0:5" (anon struct is field #1,
+ * // b is field #0 inside anon struct, accessing elem #5)
+ * int z = &s[10]->b; // encoded as "10:1" (ptr is used as an array)
+ *
+ * type_id for all relocs in this example will capture BTF type id of
+ * `struct sample`.
+ *
+ * Such relocation is emitted when using __builtin_preserve_access_index()
+ * Clang built-in, passing expression that captures field address, e.g.:
+ *
+ * bpf_probe_read(&dst, sizeof(dst),
+ * __builtin_preserve_access_index(&src->a.b.c));
+ *
+ * In this case Clang will emit field relocation recording necessary data to
+ * be able to find offset of embedded `a.b.c` field within `src` struct.
+ *
+ * [0] https://llvm.org/docs/LangRef.html#getelementptr-instruction
+ */
+struct bpf_core_relo {
+ __u32 insn_off;
+ __u32 type_id;
+ __u32 access_str_off;
+ enum bpf_core_relo_kind kind;
+};
+
+struct bpf_core_cand {
+ const struct btf *btf;
+ const struct btf_type *t;
+ const char *name;
+ __u32 id;
+};
+
+/* dynamically sized list of type IDs and its associated struct btf */
+struct bpf_core_cand_list {
+ struct bpf_core_cand *cands;
+ int len;
+};
+
+int bpf_core_apply_relo_insn(const char *prog_name,
+ struct bpf_insn *insn, int insn_idx,
+ const struct bpf_core_relo *relo, int relo_idx,
+ const struct btf *local_btf,
+ struct bpf_core_cand_list *cands);
+int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
+ const struct btf *targ_btf, __u32 targ_id);
+
+size_t bpf_core_essential_name_len(const char *name);
+#endif
free(info_linear);
return -1;
}
- if (btf__get_from_id(info->btf_id, &btf)) {
+ btf = btf__load_from_kernel_by_id(info->btf_id);
+ if (libbpf_get_error(btf)) {
pr_debug("%s: failed to get BTF of id %u, aborting\n", __func__, info->btf_id);
err = -1;
- btf = NULL;
goto out;
}
perf_env__fetch_btf(env, info->btf_id, btf);
out:
free(info_linear);
- free(btf);
+ btf__free(btf);
return err ? -1 : 0;
}
if (btf_id == 0)
goto out;
- if (btf__get_from_id(btf_id, &btf)) {
+ btf = btf__load_from_kernel_by_id(btf_id);
+ if (libbpf_get_error(btf)) {
pr_debug("%s: failed to get BTF of id %u, aborting\n",
__func__, btf_id);
goto out;
perf_env__fetch_btf(env, btf_id, btf);
out:
- free(btf);
+ btf__free(btf);
close(fd);
}
struct bpf_prog_info_linear *info_linear;
struct bpf_func_info *func_info;
const struct btf_type *t;
+ struct btf *btf = NULL;
char *name = NULL;
- struct btf *btf;
info_linear = bpf_program__get_prog_info_linear(
tgt_fd, 1UL << BPF_PROG_INFO_FUNC_INFO);
return NULL;
}
- if (info_linear->info.btf_id == 0 ||
- btf__get_from_id(info_linear->info.btf_id, &btf)) {
+ if (info_linear->info.btf_id == 0) {
pr_debug("prog FD %d doesn't have valid btf\n", tgt_fd);
goto out;
}
+ btf = btf__load_from_kernel_by_id(info_linear->info.btf_id);
+ if (libbpf_get_error(btf)) {
+ pr_debug("failed to load btf for prog FD %d\n", tgt_fd);
+ goto out;
+ }
+
func_info = u64_to_ptr(info_linear->info.func_info);
t = btf__type_by_id(btf, func_info[0].type_id);
if (!t) {
}
name = strdup(btf__name_by_offset(btf, t->name_off));
out:
+ btf__free(btf);
free(info_linear);
return name;
}
return 0;
}
+static void dump_queued_data(struct cs_etm_auxtrace *etm,
+ struct perf_record_auxtrace *event)
+{
+ struct auxtrace_buffer *buf;
+ unsigned int i;
+ /*
+ * Find all buffers with same reference in the queues and dump them.
+ * This is because the queues can contain multiple entries of the same
+ * buffer that were split on aux records.
+ */
+ for (i = 0; i < etm->queues.nr_queues; ++i)
+ list_for_each_entry(buf, &etm->queues.queue_array[i].head, list)
+ if (buf->reference == event->reference)
+ cs_etm__dump_event(etm, buf);
+}
+
static int cs_etm__process_auxtrace_event(struct perf_session *session,
union perf_event *event,
struct perf_tool *tool __maybe_unused)
cs_etm__dump_event(etm, buffer);
auxtrace_buffer__put_data(buffer);
}
- }
+ } else if (dump_trace)
+ dump_queued_data(etm, &event->auxtrace);
return 0;
}
if (dump_trace) {
cs_etm__print_auxtrace_info(auxtrace_info->priv, num_cpu);
- return 0;
}
err = cs_etm__synth_events(etm, session);
if (!(prot & PROT_EXEC))
dso__set_loaded(dso);
}
-
- nsinfo__put(dso->nsinfo);
dso->nsinfo = nsi;
if (build_id__is_defined(bid))
return perf_pmu__find_map(NULL);
}
-static bool perf_pmu__valid_suffix(char *pmu_name, char *tok)
+/*
+ * Suffix must be in form tok_{digits}, or tok{digits}, or same as pmu_name
+ * to be valid.
+ */
+static bool perf_pmu__valid_suffix(const char *pmu_name, char *tok)
{
- char *p;
+ const char *p;
if (strncmp(pmu_name, tok, strlen(tok)))
return false;
if (*p == 0)
return true;
- if (*p != '_')
- return false;
+ if (*p == '_')
+ ++p;
- ++p;
- if (*p == 0 || !isdigit(*p))
- return false;
+ /* Ensure we end in a number */
+ while (1) {
+ if (!isdigit(*p))
+ return false;
+ if (*(++p) == 0)
+ break;
+ }
return true;
}
* match "socket" in "socketX_pmunameY" and then "pmuname" in
* "pmunameY".
*/
- for (; tok; name += strlen(tok), tok = strtok_r(NULL, ",", &tmp)) {
+ while (1) {
+ char *next_tok = strtok_r(NULL, ",", &tmp);
+
name = strstr(name, tok);
- if (!name || !perf_pmu__valid_suffix((char *)name, tok)) {
+ if (!name ||
+ (!next_tok && !perf_pmu__valid_suffix(name, tok))) {
res = false;
goto out;
}
+ if (!next_tok)
+ break;
+ tok = next_tok;
+ name += strlen(tok);
}
res = true;
TARGETS += mqueue
TARGETS += nci
TARGETS += net
+TARGETS += net/af_unix
TARGETS += net/forwarding
TARGETS += net/mptcp
TARGETS += netfilter
test_cgroup_storage
test_flow_dissector
flow_dissector_load
-test_netcnt
test_tcpnotify_user
test_libbpf
test_tcp_check_syncookie_user
test_verifier_log test_dev_cgroup \
test_sock test_sockmap get_cgroup_id_user \
test_cgroup_storage \
- test_netcnt test_tcpnotify_user test_sysctl \
+ test_tcpnotify_user test_sysctl \
test_progs-no_alu32
# Also test bpf-gcc, if present
$(OUTPUT)/test_tcpnotify_user: cgroup_helpers.c trace_helpers.c
$(OUTPUT)/get_cgroup_id_user: cgroup_helpers.c
$(OUTPUT)/test_cgroup_storage: cgroup_helpers.c
-$(OUTPUT)/test_netcnt: cgroup_helpers.c
$(OUTPUT)/test_sock_fields: cgroup_helpers.c
$(OUTPUT)/test_sysctl: cgroup_helpers.c
bpf selftests, runs them (by default ``tools/testing/selftests/bpf/test_progs``) and
saves the resulting output (by default in ``~/.bpf_selftests``).
+Script dependencies:
+- clang (preferably built from sources, https://github.com/llvm/llvm-project);
+- pahole (preferably built from sources, https://git.kernel.org/pub/scm/devel/pahole/pahole.git/);
+- qemu;
+- docutils (for ``rst2man``);
+- libcap-devel.
+
For more information on about using the script, run:
.. code-block:: console
#define MAX_PERCPU_PACKETS 32
-struct percpu_net_cnt {
- __u64 packets;
- __u64 bytes;
+/* sizeof(struct bpf_local_storage_elem):
+ *
+ * It really is about 128 bytes on x86_64, but allocate more to account for
+ * possible layout changes, different architectures, etc.
+ * The kernel will wrap up to PAGE_SIZE internally anyway.
+ */
+#define SIZEOF_BPF_LOCAL_STORAGE_ELEM 256
- __u64 prev_ts;
+/* Try to estimate kernel's BPF_LOCAL_STORAGE_MAX_VALUE_SIZE: */
+#define BPF_LOCAL_STORAGE_MAX_VALUE_SIZE (0xFFFF - \
+ SIZEOF_BPF_LOCAL_STORAGE_ELEM)
- __u64 prev_packets;
- __u64 prev_bytes;
+#define PCPU_MIN_UNIT_SIZE 32768
+
+union percpu_net_cnt {
+ struct {
+ __u64 packets;
+ __u64 bytes;
+
+ __u64 prev_ts;
+
+ __u64 prev_packets;
+ __u64 prev_bytes;
+ };
+ __u8 data[PCPU_MIN_UNIT_SIZE];
};
-struct net_cnt {
- __u64 packets;
- __u64 bytes;
+union net_cnt {
+ struct {
+ __u64 packets;
+ __u64 bytes;
+ };
+ __u8 data[BPF_LOCAL_STORAGE_MAX_VALUE_SIZE];
};
#endif
#define save_errno_close(fd) ({ int __save = errno; close(fd); errno = __save; })
-int start_server(int family, int type, const char *addr_str, __u16 port,
- int timeout_ms)
+static int __start_server(int type, const struct sockaddr *addr,
+ socklen_t addrlen, int timeout_ms, bool reuseport)
{
- struct sockaddr_storage addr = {};
- socklen_t len;
+ int on = 1;
int fd;
- if (make_sockaddr(family, addr_str, port, &addr, &len))
- return -1;
-
- fd = socket(family, type, 0);
+ fd = socket(addr->sa_family, type, 0);
if (fd < 0) {
log_err("Failed to create server socket");
return -1;
if (settimeo(fd, timeout_ms))
goto error_close;
- if (bind(fd, (const struct sockaddr *)&addr, len) < 0) {
+ if (reuseport &&
+ setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &on, sizeof(on))) {
+ log_err("Failed to set SO_REUSEPORT");
+ return -1;
+ }
+
+ if (bind(fd, addr, addrlen) < 0) {
log_err("Failed to bind socket");
goto error_close;
}
return -1;
}
+int start_server(int family, int type, const char *addr_str, __u16 port,
+ int timeout_ms)
+{
+ struct sockaddr_storage addr;
+ socklen_t addrlen;
+
+ if (make_sockaddr(family, addr_str, port, &addr, &addrlen))
+ return -1;
+
+ return __start_server(type, (struct sockaddr *)&addr,
+ addrlen, timeout_ms, false);
+}
+
+int *start_reuseport_server(int family, int type, const char *addr_str,
+ __u16 port, int timeout_ms, unsigned int nr_listens)
+{
+ struct sockaddr_storage addr;
+ unsigned int nr_fds = 0;
+ socklen_t addrlen;
+ int *fds;
+
+ if (!nr_listens)
+ return NULL;
+
+ if (make_sockaddr(family, addr_str, port, &addr, &addrlen))
+ return NULL;
+
+ fds = malloc(sizeof(*fds) * nr_listens);
+ if (!fds)
+ return NULL;
+
+ fds[0] = __start_server(type, (struct sockaddr *)&addr, addrlen,
+ timeout_ms, true);
+ if (fds[0] == -1)
+ goto close_fds;
+ nr_fds = 1;
+
+ if (getsockname(fds[0], (struct sockaddr *)&addr, &addrlen))
+ goto close_fds;
+
+ for (; nr_fds < nr_listens; nr_fds++) {
+ fds[nr_fds] = __start_server(type, (struct sockaddr *)&addr,
+ addrlen, timeout_ms, true);
+ if (fds[nr_fds] == -1)
+ goto close_fds;
+ }
+
+ return fds;
+
+close_fds:
+ free_fds(fds, nr_fds);
+ return NULL;
+}
+
+void free_fds(int *fds, unsigned int nr_close_fds)
+{
+ if (fds) {
+ while (nr_close_fds)
+ close(fds[--nr_close_fds]);
+ free(fds);
+ }
+}
+
int fastopen_connect(int server_fd, const char *data, unsigned int data_len,
int timeout_ms)
{
if (family == AF_INET) {
struct sockaddr_in *sin = (void *)addr;
+ memset(addr, 0, sizeof(*sin));
sin->sin_family = AF_INET;
sin->sin_port = htons(port);
if (addr_str &&
} else if (family == AF_INET6) {
struct sockaddr_in6 *sin6 = (void *)addr;
+ memset(addr, 0, sizeof(*sin6));
sin6->sin6_family = AF_INET6;
sin6->sin6_port = htons(port);
if (addr_str &&
}
return -1;
}
+
+char *ping_command(int family)
+{
+ if (family == AF_INET6) {
+ /* On some systems 'ping' doesn't support IPv6, so use ping6 if it is present. */
+ if (!system("which ping6 >/dev/null 2>&1"))
+ return "ping6";
+ else
+ return "ping -6";
+ }
+ return "ping";
+}
int settimeo(int fd, int timeout_ms);
int start_server(int family, int type, const char *addr, __u16 port,
int timeout_ms);
+int *start_reuseport_server(int family, int type, const char *addr_str,
+ __u16 port, int timeout_ms,
+ unsigned int nr_listens);
+void free_fds(int *fds, unsigned int nr_close_fds);
int connect_to_fd(int server_fd, int timeout_ms);
int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms);
int fastopen_connect(int server_fd, const char *data, unsigned int data_len,
int timeout_ms);
int make_sockaddr(int family, const char *addr_str, __u16 port,
struct sockaddr_storage *addr, socklen_t *len);
+char *ping_command(int family);
#endif
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#define _GNU_SOURCE
+#include <sched.h>
+#include <test_progs.h>
+#include "network_helpers.h"
+#include "bpf_dctcp.skel.h"
+#include "bpf_cubic.skel.h"
+#include "bpf_iter_setsockopt.skel.h"
+
+static int create_netns(void)
+{
+ if (!ASSERT_OK(unshare(CLONE_NEWNET), "create netns"))
+ return -1;
+
+ if (!ASSERT_OK(system("ip link set dev lo up"), "bring up lo"))
+ return -1;
+
+ return 0;
+}
+
+static unsigned int set_bpf_cubic(int *fds, unsigned int nr_fds)
+{
+ unsigned int i;
+
+ for (i = 0; i < nr_fds; i++) {
+ if (setsockopt(fds[i], SOL_TCP, TCP_CONGESTION, "bpf_cubic",
+ sizeof("bpf_cubic")))
+ return i;
+ }
+
+ return nr_fds;
+}
+
+static unsigned int check_bpf_dctcp(int *fds, unsigned int nr_fds)
+{
+ char tcp_cc[16];
+ socklen_t optlen = sizeof(tcp_cc);
+ unsigned int i;
+
+ for (i = 0; i < nr_fds; i++) {
+ if (getsockopt(fds[i], SOL_TCP, TCP_CONGESTION,
+ tcp_cc, &optlen) ||
+ strcmp(tcp_cc, "bpf_dctcp"))
+ return i;
+ }
+
+ return nr_fds;
+}
+
+static int *make_established(int listen_fd, unsigned int nr_est,
+ int **paccepted_fds)
+{
+ int *est_fds, *accepted_fds;
+ unsigned int i;
+
+ est_fds = malloc(sizeof(*est_fds) * nr_est);
+ if (!est_fds)
+ return NULL;
+
+ accepted_fds = malloc(sizeof(*accepted_fds) * nr_est);
+ if (!accepted_fds) {
+ free(est_fds);
+ return NULL;
+ }
+
+ for (i = 0; i < nr_est; i++) {
+ est_fds[i] = connect_to_fd(listen_fd, 0);
+ if (est_fds[i] == -1)
+ break;
+ if (set_bpf_cubic(&est_fds[i], 1) != 1) {
+ close(est_fds[i]);
+ break;
+ }
+
+ accepted_fds[i] = accept(listen_fd, NULL, 0);
+ if (accepted_fds[i] == -1) {
+ close(est_fds[i]);
+ break;
+ }
+ }
+
+ if (!ASSERT_EQ(i, nr_est, "create established fds")) {
+ free_fds(accepted_fds, i);
+ free_fds(est_fds, i);
+ return NULL;
+ }
+
+ *paccepted_fds = accepted_fds;
+ return est_fds;
+}
+
+static unsigned short get_local_port(int fd)
+{
+ struct sockaddr_in6 addr;
+ socklen_t addrlen = sizeof(addr);
+
+ if (!getsockname(fd, &addr, &addrlen))
+ return ntohs(addr.sin6_port);
+
+ return 0;
+}
+
+static void do_bpf_iter_setsockopt(struct bpf_iter_setsockopt *iter_skel,
+ bool random_retry)
+{
+ int *reuse_listen_fds = NULL, *accepted_fds = NULL, *est_fds = NULL;
+ unsigned int nr_reuse_listens = 256, nr_est = 256;
+ int err, iter_fd = -1, listen_fd = -1;
+ char buf;
+
+ /* Prepare non-reuseport listen_fd */
+ listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0);
+ if (!ASSERT_GE(listen_fd, 0, "start_server"))
+ return;
+ if (!ASSERT_EQ(set_bpf_cubic(&listen_fd, 1), 1,
+ "set listen_fd to cubic"))
+ goto done;
+ iter_skel->bss->listen_hport = get_local_port(listen_fd);
+ if (!ASSERT_NEQ(iter_skel->bss->listen_hport, 0,
+ "get_local_port(listen_fd)"))
+ goto done;
+
+ /* Connect to non-reuseport listen_fd */
+ est_fds = make_established(listen_fd, nr_est, &accepted_fds);
+ if (!ASSERT_OK_PTR(est_fds, "create established"))
+ goto done;
+
+ /* Prepare reuseport listen fds */
+ reuse_listen_fds = start_reuseport_server(AF_INET6, SOCK_STREAM,
+ "::1", 0, 0,
+ nr_reuse_listens);
+ if (!ASSERT_OK_PTR(reuse_listen_fds, "start_reuseport_server"))
+ goto done;
+ if (!ASSERT_EQ(set_bpf_cubic(reuse_listen_fds, nr_reuse_listens),
+ nr_reuse_listens, "set reuse_listen_fds to cubic"))
+ goto done;
+ iter_skel->bss->reuse_listen_hport = get_local_port(reuse_listen_fds[0]);
+ if (!ASSERT_NEQ(iter_skel->bss->reuse_listen_hport, 0,
+ "get_local_port(reuse_listen_fds[0])"))
+ goto done;
+
+ /* Run bpf tcp iter to switch from bpf_cubic to bpf_dctcp */
+ iter_skel->bss->random_retry = random_retry;
+ iter_fd = bpf_iter_create(bpf_link__fd(iter_skel->links.change_tcp_cc));
+ if (!ASSERT_GE(iter_fd, 0, "create iter_fd"))
+ goto done;
+
+ while ((err = read(iter_fd, &buf, sizeof(buf))) == -1 &&
+ errno == EAGAIN)
+ ;
+ if (!ASSERT_OK(err, "read iter error"))
+ goto done;
+
+ /* Check reuseport listen fds for dctcp */
+ ASSERT_EQ(check_bpf_dctcp(reuse_listen_fds, nr_reuse_listens),
+ nr_reuse_listens,
+ "check reuse_listen_fds dctcp");
+
+ /* Check non reuseport listen fd for dctcp */
+ ASSERT_EQ(check_bpf_dctcp(&listen_fd, 1), 1,
+ "check listen_fd dctcp");
+
+ /* Check established fds for dctcp */
+ ASSERT_EQ(check_bpf_dctcp(est_fds, nr_est), nr_est,
+ "check est_fds dctcp");
+
+ /* Check accepted fds for dctcp */
+ ASSERT_EQ(check_bpf_dctcp(accepted_fds, nr_est), nr_est,
+ "check accepted_fds dctcp");
+
+done:
+ if (iter_fd != -1)
+ close(iter_fd);
+ if (listen_fd != -1)
+ close(listen_fd);
+ free_fds(reuse_listen_fds, nr_reuse_listens);
+ free_fds(accepted_fds, nr_est);
+ free_fds(est_fds, nr_est);
+}
+
+void test_bpf_iter_setsockopt(void)
+{
+ struct bpf_iter_setsockopt *iter_skel = NULL;
+ struct bpf_cubic *cubic_skel = NULL;
+ struct bpf_dctcp *dctcp_skel = NULL;
+ struct bpf_link *cubic_link = NULL;
+ struct bpf_link *dctcp_link = NULL;
+
+ if (create_netns())
+ return;
+
+ /* Load iter_skel */
+ iter_skel = bpf_iter_setsockopt__open_and_load();
+ if (!ASSERT_OK_PTR(iter_skel, "iter_skel"))
+ return;
+ iter_skel->links.change_tcp_cc = bpf_program__attach_iter(iter_skel->progs.change_tcp_cc, NULL);
+ if (!ASSERT_OK_PTR(iter_skel->links.change_tcp_cc, "attach iter"))
+ goto done;
+
+ /* Load bpf_cubic */
+ cubic_skel = bpf_cubic__open_and_load();
+ if (!ASSERT_OK_PTR(cubic_skel, "cubic_skel"))
+ goto done;
+ cubic_link = bpf_map__attach_struct_ops(cubic_skel->maps.cubic);
+ if (!ASSERT_OK_PTR(cubic_link, "cubic_link"))
+ goto done;
+
+ /* Load bpf_dctcp */
+ dctcp_skel = bpf_dctcp__open_and_load();
+ if (!ASSERT_OK_PTR(dctcp_skel, "dctcp_skel"))
+ goto done;
+ dctcp_link = bpf_map__attach_struct_ops(dctcp_skel->maps.dctcp);
+ if (!ASSERT_OK_PTR(dctcp_link, "dctcp_link"))
+ goto done;
+
+ do_bpf_iter_setsockopt(iter_skel, true);
+ do_bpf_iter_setsockopt(iter_skel, false);
+
+done:
+ bpf_link__destroy(cubic_link);
+ bpf_link__destroy(dctcp_link);
+ bpf_cubic__destroy(cubic_skel);
+ bpf_dctcp__destroy(dctcp_skel);
+ bpf_iter_setsockopt__destroy(iter_skel);
+}
goto done;
}
- err = btf__get_from_id(info.btf_id, &btf);
+ btf = btf__load_from_kernel_by_id(info.btf_id);
+ err = libbpf_get_error(btf);
if (CHECK(err, "cannot get btf from kernel, err: %d", err))
goto done;
fprintf(stderr, "OK");
done:
+ btf__free(btf);
free(func_info);
bpf_object__close(obj);
}
btf__free(btf);
}
+#define STRSIZE 4096
+
+static void btf_dump_snprintf(void *ctx, const char *fmt, va_list args)
+{
+ char *s = ctx, new[STRSIZE];
+
+ vsnprintf(new, STRSIZE, fmt, args);
+ if (strlen(s) < STRSIZE)
+ strncat(s, new, STRSIZE - strlen(s) - 1);
+}
+
+static int btf_dump_data(struct btf *btf, struct btf_dump *d,
+ char *name, char *prefix, __u64 flags, void *ptr,
+ size_t ptr_sz, char *str, const char *expected_val)
+{
+ DECLARE_LIBBPF_OPTS(btf_dump_type_data_opts, opts);
+ size_t type_sz;
+ __s32 type_id;
+ int ret = 0;
+
+ if (flags & BTF_F_COMPACT)
+ opts.compact = true;
+ if (flags & BTF_F_NONAME)
+ opts.skip_names = true;
+ if (flags & BTF_F_ZERO)
+ opts.emit_zeroes = true;
+ if (prefix) {
+ ASSERT_STRNEQ(name, prefix, strlen(prefix),
+ "verify prefix match");
+ name += strlen(prefix) + 1;
+ }
+ type_id = btf__find_by_name(btf, name);
+ if (!ASSERT_GE(type_id, 0, "find type id"))
+ return -ENOENT;
+ type_sz = btf__resolve_size(btf, type_id);
+ str[0] = '\0';
+ ret = btf_dump__dump_type_data(d, type_id, ptr, ptr_sz, &opts);
+ if (type_sz <= ptr_sz) {
+ if (!ASSERT_EQ(ret, type_sz, "failed/unexpected type_sz"))
+ return -EINVAL;
+ } else {
+ if (!ASSERT_EQ(ret, -E2BIG, "failed to return -E2BIG"))
+ return -EINVAL;
+ }
+ if (!ASSERT_STREQ(str, expected_val, "ensure expected/actual match"))
+ return -EFAULT;
+ return 0;
+}
+
+#define TEST_BTF_DUMP_DATA(_b, _d, _prefix, _str, _type, _flags, \
+ _expected, ...) \
+ do { \
+ char __ptrtype[64] = #_type; \
+ char *_ptrtype = (char *)__ptrtype; \
+ _type _ptrdata = __VA_ARGS__; \
+ void *_ptr = &_ptrdata; \
+ \
+ (void) btf_dump_data(_b, _d, _ptrtype, _prefix, _flags, \
+ _ptr, sizeof(_type), _str, \
+ _expected); \
+ } while (0)
+
+/* Use where expected data string matches its stringified declaration */
+#define TEST_BTF_DUMP_DATA_C(_b, _d, _prefix, _str, _type, _flags, \
+ ...) \
+ TEST_BTF_DUMP_DATA(_b, _d, _prefix, _str, _type, _flags, \
+ "(" #_type ")" #__VA_ARGS__, __VA_ARGS__)
+
+/* overflow test; pass typesize < expected type size, ensure E2BIG returned */
+#define TEST_BTF_DUMP_DATA_OVER(_b, _d, _prefix, _str, _type, _type_sz, \
+ _expected, ...) \
+ do { \
+ char __ptrtype[64] = #_type; \
+ char *_ptrtype = (char *)__ptrtype; \
+ _type _ptrdata = __VA_ARGS__; \
+ void *_ptr = &_ptrdata; \
+ \
+ (void) btf_dump_data(_b, _d, _ptrtype, _prefix, 0, \
+ _ptr, _type_sz, _str, _expected); \
+ } while (0)
+
+#define TEST_BTF_DUMP_VAR(_b, _d, _prefix, _str, _var, _type, _flags, \
+ _expected, ...) \
+ do { \
+ _type _ptrdata = __VA_ARGS__; \
+ void *_ptr = &_ptrdata; \
+ \
+ (void) btf_dump_data(_b, _d, _var, _prefix, _flags, \
+ _ptr, sizeof(_type), _str, \
+ _expected); \
+ } while (0)
+
+static void test_btf_dump_int_data(struct btf *btf, struct btf_dump *d,
+ char *str)
+{
+#ifdef __SIZEOF_INT128__
+ __int128 i = 0xffffffffffffffff;
+
+ /* this dance is required because we cannot directly initialize
+ * a 128-bit value to anything larger than a 64-bit value.
+ */
+ i = (i << 64) | (i - 1);
+#endif
+ /* simple int */
+ TEST_BTF_DUMP_DATA_C(btf, d, NULL, str, int, BTF_F_COMPACT, 1234);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, BTF_F_COMPACT | BTF_F_NONAME,
+ "1234", 1234);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, 0, "(int)1234", 1234);
+
+ /* zero value should be printed at toplevel */
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, BTF_F_COMPACT, "(int)0", 0);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, BTF_F_COMPACT | BTF_F_NONAME,
+ "0", 0);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, BTF_F_COMPACT | BTF_F_ZERO,
+ "(int)0", 0);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, int,
+ BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO,
+ "0", 0);
+ TEST_BTF_DUMP_DATA_C(btf, d, NULL, str, int, BTF_F_COMPACT, -4567);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, BTF_F_COMPACT | BTF_F_NONAME,
+ "-4567", -4567);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, 0, "(int)-4567", -4567);
+
+ TEST_BTF_DUMP_DATA_OVER(btf, d, NULL, str, int, sizeof(int)-1, "", 1);
+
+#ifdef __SIZEOF_INT128__
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, __int128, BTF_F_COMPACT,
+ "(__int128)0xffffffffffffffff",
+ 0xffffffffffffffff);
+ ASSERT_OK(btf_dump_data(btf, d, "__int128", NULL, 0, &i, 16, str,
+ "(__int128)0xfffffffffffffffffffffffffffffffe"),
+ "dump __int128");
+#endif
+}
+
+static void test_btf_dump_float_data(struct btf *btf, struct btf_dump *d,
+ char *str)
+{
+ float t1 = 1.234567;
+ float t2 = -1.234567;
+ float t3 = 0.0;
+ double t4 = 5.678912;
+ double t5 = -5.678912;
+ double t6 = 0.0;
+ long double t7 = 9.876543;
+ long double t8 = -9.876543;
+ long double t9 = 0.0;
+
+ /* since the kernel does not likely have any float types in its BTF, we
+ * will need to add some of various sizes.
+ */
+
+ ASSERT_GT(btf__add_float(btf, "test_float", 4), 0, "add float");
+ ASSERT_OK(btf_dump_data(btf, d, "test_float", NULL, 0, &t1, 4, str,
+ "(test_float)1.234567"), "dump float");
+ ASSERT_OK(btf_dump_data(btf, d, "test_float", NULL, 0, &t2, 4, str,
+ "(test_float)-1.234567"), "dump float");
+ ASSERT_OK(btf_dump_data(btf, d, "test_float", NULL, 0, &t3, 4, str,
+ "(test_float)0.000000"), "dump float");
+
+ ASSERT_GT(btf__add_float(btf, "test_double", 8), 0, "add_double");
+ ASSERT_OK(btf_dump_data(btf, d, "test_double", NULL, 0, &t4, 8, str,
+ "(test_double)5.678912"), "dump double");
+ ASSERT_OK(btf_dump_data(btf, d, "test_double", NULL, 0, &t5, 8, str,
+ "(test_double)-5.678912"), "dump double");
+ ASSERT_OK(btf_dump_data(btf, d, "test_double", NULL, 0, &t6, 8, str,
+ "(test_double)0.000000"), "dump double");
+
+ ASSERT_GT(btf__add_float(btf, "test_long_double", 16), 0, "add long double");
+ ASSERT_OK(btf_dump_data(btf, d, "test_long_double", NULL, 0, &t7, 16,
+ str, "(test_long_double)9.876543"),
+ "dump long_double");
+ ASSERT_OK(btf_dump_data(btf, d, "test_long_double", NULL, 0, &t8, 16,
+ str, "(test_long_double)-9.876543"),
+ "dump long_double");
+ ASSERT_OK(btf_dump_data(btf, d, "test_long_double", NULL, 0, &t9, 16,
+ str, "(test_long_double)0.000000"),
+ "dump long_double");
+}
+
+static void test_btf_dump_char_data(struct btf *btf, struct btf_dump *d,
+ char *str)
+{
+ /* simple char */
+ TEST_BTF_DUMP_DATA_C(btf, d, NULL, str, char, BTF_F_COMPACT, 100);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, BTF_F_COMPACT | BTF_F_NONAME,
+ "100", 100);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, 0, "(char)100", 100);
+ /* zero value should be printed at toplevel */
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, BTF_F_COMPACT,
+ "(char)0", 0);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, BTF_F_COMPACT | BTF_F_NONAME,
+ "0", 0);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, BTF_F_COMPACT | BTF_F_ZERO,
+ "(char)0", 0);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO,
+ "0", 0);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, 0, "(char)0", 0);
+
+ TEST_BTF_DUMP_DATA_OVER(btf, d, NULL, str, char, sizeof(char)-1, "", 100);
+}
+
+static void test_btf_dump_typedef_data(struct btf *btf, struct btf_dump *d,
+ char *str)
+{
+ /* simple typedef */
+ TEST_BTF_DUMP_DATA_C(btf, d, NULL, str, uint64_t, BTF_F_COMPACT, 100);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64, BTF_F_COMPACT | BTF_F_NONAME,
+ "1", 1);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64, 0, "(u64)1", 1);
+ /* zero value should be printed at toplevel */
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64, BTF_F_COMPACT, "(u64)0", 0);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64, BTF_F_COMPACT | BTF_F_NONAME,
+ "0", 0);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64, BTF_F_COMPACT | BTF_F_ZERO,
+ "(u64)0", 0);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64,
+ BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO,
+ "0", 0);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64, 0, "(u64)0", 0);
+
+ /* typedef struct */
+ TEST_BTF_DUMP_DATA_C(btf, d, NULL, str, atomic_t, BTF_F_COMPACT,
+ {.counter = (int)1,});
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, BTF_F_COMPACT | BTF_F_NONAME,
+ "{1,}", { .counter = 1 });
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, 0,
+"(atomic_t){\n"
+" .counter = (int)1,\n"
+"}",
+ {.counter = 1,});
+ /* typedef with 0 value should be printed at toplevel */
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, BTF_F_COMPACT, "(atomic_t){}",
+ {.counter = 0,});
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, BTF_F_COMPACT | BTF_F_NONAME,
+ "{}", {.counter = 0,});
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, 0,
+"(atomic_t){\n"
+"}",
+ {.counter = 0,});
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, BTF_F_COMPACT | BTF_F_ZERO,
+ "(atomic_t){.counter = (int)0,}",
+ {.counter = 0,});
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t,
+ BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO,
+ "{0,}", {.counter = 0,});
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, BTF_F_ZERO,
+"(atomic_t){\n"
+" .counter = (int)0,\n"
+"}",
+ { .counter = 0,});
+
+ /* overflow should show type but not value since it overflows */
+ TEST_BTF_DUMP_DATA_OVER(btf, d, NULL, str, atomic_t, sizeof(atomic_t)-1,
+ "(atomic_t){\n", { .counter = 1});
+}
+
+static void test_btf_dump_enum_data(struct btf *btf, struct btf_dump *d,
+ char *str)
+{
+ /* enum where enum value does (and does not) exist */
+ TEST_BTF_DUMP_DATA_C(btf, d, "enum", str, enum bpf_cmd, BTF_F_COMPACT,
+ BPF_MAP_CREATE);
+ TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd, BTF_F_COMPACT,
+ "(enum bpf_cmd)BPF_MAP_CREATE", 0);
+ TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd,
+ BTF_F_COMPACT | BTF_F_NONAME,
+ "BPF_MAP_CREATE",
+ BPF_MAP_CREATE);
+ TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd, 0,
+ "(enum bpf_cmd)BPF_MAP_CREATE",
+ BPF_MAP_CREATE);
+ TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd,
+ BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO,
+ "BPF_MAP_CREATE", 0);
+ TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd,
+ BTF_F_COMPACT | BTF_F_ZERO,
+ "(enum bpf_cmd)BPF_MAP_CREATE",
+ BPF_MAP_CREATE);
+ TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd,
+ BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO,
+ "BPF_MAP_CREATE", BPF_MAP_CREATE);
+ TEST_BTF_DUMP_DATA_C(btf, d, "enum", str, enum bpf_cmd, BTF_F_COMPACT, 2000);
+ TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd,
+ BTF_F_COMPACT | BTF_F_NONAME,
+ "2000", 2000);
+ TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd, 0,
+ "(enum bpf_cmd)2000", 2000);
+
+ TEST_BTF_DUMP_DATA_OVER(btf, d, "enum", str, enum bpf_cmd,
+ sizeof(enum bpf_cmd) - 1, "", BPF_MAP_CREATE);
+}
+
+static void test_btf_dump_struct_data(struct btf *btf, struct btf_dump *d,
+ char *str)
+{
+ DECLARE_LIBBPF_OPTS(btf_dump_type_data_opts, opts);
+ char zero_data[512] = { };
+ char type_data[512];
+ void *fops = type_data;
+ void *skb = type_data;
+ size_t type_sz;
+ __s32 type_id;
+ char *cmpstr;
+ int ret;
+
+ memset(type_data, 255, sizeof(type_data));
+
+ /* simple struct */
+ TEST_BTF_DUMP_DATA_C(btf, d, "struct", str, struct btf_enum, BTF_F_COMPACT,
+ {.name_off = (__u32)3,.val = (__s32)-1,});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum,
+ BTF_F_COMPACT | BTF_F_NONAME,
+ "{3,-1,}",
+ { .name_off = 3, .val = -1,});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum, 0,
+"(struct btf_enum){\n"
+" .name_off = (__u32)3,\n"
+" .val = (__s32)-1,\n"
+"}",
+ { .name_off = 3, .val = -1,});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum,
+ BTF_F_COMPACT | BTF_F_NONAME,
+ "{-1,}",
+ { .name_off = 0, .val = -1,});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum,
+ BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO,
+ "{0,-1,}",
+ { .name_off = 0, .val = -1,});
+ /* empty struct should be printed */
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum, BTF_F_COMPACT,
+ "(struct btf_enum){}",
+ { .name_off = 0, .val = 0,});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum,
+ BTF_F_COMPACT | BTF_F_NONAME,
+ "{}",
+ { .name_off = 0, .val = 0,});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum, 0,
+"(struct btf_enum){\n"
+"}",
+ { .name_off = 0, .val = 0,});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum,
+ BTF_F_COMPACT | BTF_F_ZERO,
+ "(struct btf_enum){.name_off = (__u32)0,.val = (__s32)0,}",
+ { .name_off = 0, .val = 0,});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum,
+ BTF_F_ZERO,
+"(struct btf_enum){\n"
+" .name_off = (__u32)0,\n"
+" .val = (__s32)0,\n"
+"}",
+ { .name_off = 0, .val = 0,});
+
+ /* struct with pointers */
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct list_head, BTF_F_COMPACT,
+ "(struct list_head){.next = (struct list_head *)0x1,}",
+ { .next = (struct list_head *)1 });
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct list_head, 0,
+"(struct list_head){\n"
+" .next = (struct list_head *)0x1,\n"
+"}",
+ { .next = (struct list_head *)1 });
+ /* NULL pointer should not be displayed */
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct list_head, BTF_F_COMPACT,
+ "(struct list_head){}",
+ { .next = (struct list_head *)0 });
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct list_head, 0,
+"(struct list_head){\n"
+"}",
+ { .next = (struct list_head *)0 });
+
+ /* struct with function pointers */
+ type_id = btf__find_by_name(btf, "file_operations");
+ if (ASSERT_GT(type_id, 0, "find type id")) {
+ type_sz = btf__resolve_size(btf, type_id);
+ str[0] = '\0';
+
+ ret = btf_dump__dump_type_data(d, type_id, fops, type_sz, &opts);
+ ASSERT_EQ(ret, type_sz,
+ "unexpected return value dumping file_operations");
+ cmpstr =
+"(struct file_operations){\n"
+" .owner = (struct module *)0xffffffffffffffff,\n"
+" .llseek = (loff_t (*)(struct file *, loff_t, int))0xffffffffffffffff,";
+
+ ASSERT_STRNEQ(str, cmpstr, strlen(cmpstr), "file_operations");
+ }
+
+ /* struct with char array */
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_prog_info, BTF_F_COMPACT,
+ "(struct bpf_prog_info){.name = (char[16])['f','o','o',],}",
+ { .name = "foo",});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_prog_info,
+ BTF_F_COMPACT | BTF_F_NONAME,
+ "{['f','o','o',],}",
+ {.name = "foo",});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_prog_info, 0,
+"(struct bpf_prog_info){\n"
+" .name = (char[16])[\n"
+" 'f',\n"
+" 'o',\n"
+" 'o',\n"
+" ],\n"
+"}",
+ {.name = "foo",});
+ /* leading null char means do not display string */
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_prog_info, BTF_F_COMPACT,
+ "(struct bpf_prog_info){}",
+ {.name = {'\0', 'f', 'o', 'o'}});
+ /* handle non-printable characters */
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_prog_info, BTF_F_COMPACT,
+ "(struct bpf_prog_info){.name = (char[16])[1,2,3,],}",
+ { .name = {1, 2, 3, 0}});
+
+ /* struct with non-char array */
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct __sk_buff, BTF_F_COMPACT,
+ "(struct __sk_buff){.cb = (__u32[5])[1,2,3,4,5,],}",
+ { .cb = {1, 2, 3, 4, 5,},});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct __sk_buff,
+ BTF_F_COMPACT | BTF_F_NONAME,
+ "{[1,2,3,4,5,],}",
+ { .cb = { 1, 2, 3, 4, 5},});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct __sk_buff, 0,
+"(struct __sk_buff){\n"
+" .cb = (__u32[5])[\n"
+" 1,\n"
+" 2,\n"
+" 3,\n"
+" 4,\n"
+" 5,\n"
+" ],\n"
+"}",
+ { .cb = { 1, 2, 3, 4, 5},});
+ /* For non-char, arrays, show non-zero values only */
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct __sk_buff, BTF_F_COMPACT,
+ "(struct __sk_buff){.cb = (__u32[5])[0,0,1,0,0,],}",
+ { .cb = { 0, 0, 1, 0, 0},});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct __sk_buff, 0,
+"(struct __sk_buff){\n"
+" .cb = (__u32[5])[\n"
+" 0,\n"
+" 0,\n"
+" 1,\n"
+" 0,\n"
+" 0,\n"
+" ],\n"
+"}",
+ { .cb = { 0, 0, 1, 0, 0},});
+
+ /* struct with bitfields */
+ TEST_BTF_DUMP_DATA_C(btf, d, "struct", str, struct bpf_insn, BTF_F_COMPACT,
+ {.code = (__u8)1,.dst_reg = (__u8)0x2,.src_reg = (__u8)0x3,.off = (__s16)4,.imm = (__s32)5,});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_insn,
+ BTF_F_COMPACT | BTF_F_NONAME,
+ "{1,0x2,0x3,4,5,}",
+ { .code = 1, .dst_reg = 0x2, .src_reg = 0x3, .off = 4,
+ .imm = 5,});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_insn, 0,
+"(struct bpf_insn){\n"
+" .code = (__u8)1,\n"
+" .dst_reg = (__u8)0x2,\n"
+" .src_reg = (__u8)0x3,\n"
+" .off = (__s16)4,\n"
+" .imm = (__s32)5,\n"
+"}",
+ {.code = 1, .dst_reg = 2, .src_reg = 3, .off = 4, .imm = 5});
+
+ /* zeroed bitfields should not be displayed */
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_insn, BTF_F_COMPACT,
+ "(struct bpf_insn){.dst_reg = (__u8)0x1,}",
+ { .code = 0, .dst_reg = 1});
+
+ /* struct with enum bitfield */
+ type_id = btf__find_by_name(btf, "fs_context");
+ if (ASSERT_GT(type_id, 0, "find fs_context")) {
+ type_sz = btf__resolve_size(btf, type_id);
+ str[0] = '\0';
+
+ opts.emit_zeroes = true;
+ ret = btf_dump__dump_type_data(d, type_id, zero_data, type_sz, &opts);
+ ASSERT_EQ(ret, type_sz,
+ "unexpected return value dumping fs_context");
+
+ ASSERT_NEQ(strstr(str, "FS_CONTEXT_FOR_MOUNT"), NULL,
+ "bitfield value not present");
+ }
+
+ /* struct with nested anon union */
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_sock_ops, BTF_F_COMPACT,
+ "(struct bpf_sock_ops){.op = (__u32)1,(union){.args = (__u32[4])[1,2,3,4,],.reply = (__u32)1,.replylong = (__u32[4])[1,2,3,4,],},}",
+ { .op = 1, .args = { 1, 2, 3, 4}});
+
+ /* union with nested struct */
+ TEST_BTF_DUMP_DATA(btf, d, "union", str, union bpf_iter_link_info, BTF_F_COMPACT,
+ "(union bpf_iter_link_info){.map = (struct){.map_fd = (__u32)1,},}",
+ { .map = { .map_fd = 1 }});
+
+ /* struct skb with nested structs/unions; because type output is so
+ * complex, we don't do a string comparison, just verify we return
+ * the type size as the amount of data displayed.
+ */
+ type_id = btf__find_by_name(btf, "sk_buff");
+ if (ASSERT_GT(type_id, 0, "find struct sk_buff")) {
+ type_sz = btf__resolve_size(btf, type_id);
+ str[0] = '\0';
+
+ ret = btf_dump__dump_type_data(d, type_id, skb, type_sz, &opts);
+ ASSERT_EQ(ret, type_sz,
+ "unexpected return value dumping sk_buff");
+ }
+
+ /* overflow bpf_sock_ops struct with final element nonzero/zero.
+ * Regardless of the value of the final field, we don't have all the
+ * data we need to display it, so we should trigger an overflow.
+ * In other words oveflow checking should trump "is field zero?"
+ * checks because if we've overflowed, it shouldn't matter what the
+ * field is - we can't trust its value so shouldn't display it.
+ */
+ TEST_BTF_DUMP_DATA_OVER(btf, d, "struct", str, struct bpf_sock_ops,
+ sizeof(struct bpf_sock_ops) - 1,
+ "(struct bpf_sock_ops){\n\t.op = (__u32)1,\n",
+ { .op = 1, .skb_tcp_flags = 2});
+ TEST_BTF_DUMP_DATA_OVER(btf, d, "struct", str, struct bpf_sock_ops,
+ sizeof(struct bpf_sock_ops) - 1,
+ "(struct bpf_sock_ops){\n\t.op = (__u32)1,\n",
+ { .op = 1, .skb_tcp_flags = 0});
+}
+
+static void test_btf_dump_var_data(struct btf *btf, struct btf_dump *d,
+ char *str)
+{
+ TEST_BTF_DUMP_VAR(btf, d, NULL, str, "cpu_number", int, BTF_F_COMPACT,
+ "int cpu_number = (int)100", 100);
+ TEST_BTF_DUMP_VAR(btf, d, NULL, str, "cpu_profile_flip", int, BTF_F_COMPACT,
+ "static int cpu_profile_flip = (int)2", 2);
+}
+
+static void test_btf_datasec(struct btf *btf, struct btf_dump *d, char *str,
+ const char *name, const char *expected_val,
+ void *data, size_t data_sz)
+{
+ DECLARE_LIBBPF_OPTS(btf_dump_type_data_opts, opts);
+ int ret = 0, cmp;
+ size_t secsize;
+ __s32 type_id;
+
+ opts.compact = true;
+
+ type_id = btf__find_by_name(btf, name);
+ if (!ASSERT_GT(type_id, 0, "find type id"))
+ return;
+
+ secsize = btf__resolve_size(btf, type_id);
+ ASSERT_EQ(secsize, 0, "verify section size");
+
+ str[0] = '\0';
+ ret = btf_dump__dump_type_data(d, type_id, data, data_sz, &opts);
+ ASSERT_EQ(ret, 0, "unexpected return value");
+
+ cmp = strcmp(str, expected_val);
+ ASSERT_EQ(cmp, 0, "ensure expected/actual match");
+}
+
+static void test_btf_dump_datasec_data(char *str)
+{
+ struct btf *btf = btf__parse("xdping_kern.o", NULL);
+ struct btf_dump_opts opts = { .ctx = str };
+ char license[4] = "GPL";
+ struct btf_dump *d;
+
+ if (!ASSERT_OK_PTR(btf, "xdping_kern.o BTF not found"))
+ return;
+
+ d = btf_dump__new(btf, NULL, &opts, btf_dump_snprintf);
+ if (!ASSERT_OK_PTR(d, "could not create BTF dump"))
+ return;
+
+ test_btf_datasec(btf, d, str, "license",
+ "SEC(\"license\") char[4] _license = (char[4])['G','P','L',];",
+ license, sizeof(license));
+}
+
void test_btf_dump() {
+ char str[STRSIZE];
+ struct btf_dump_opts opts = { .ctx = str };
+ struct btf_dump *d;
+ struct btf *btf;
int i;
for (i = 0; i < ARRAY_SIZE(btf_dump_test_cases); i++) {
}
if (test__start_subtest("btf_dump: incremental"))
test_btf_dump_incremental();
+
+ btf = libbpf_find_kernel_btf();
+ if (!ASSERT_OK_PTR(btf, "no kernel BTF found"))
+ return;
+
+ d = btf_dump__new(btf, NULL, &opts, btf_dump_snprintf);
+ if (!ASSERT_OK_PTR(d, "could not create BTF dump"))
+ return;
+
+ /* Verify type display for various types. */
+ if (test__start_subtest("btf_dump: int_data"))
+ test_btf_dump_int_data(btf, d, str);
+ if (test__start_subtest("btf_dump: float_data"))
+ test_btf_dump_float_data(btf, d, str);
+ if (test__start_subtest("btf_dump: char_data"))
+ test_btf_dump_char_data(btf, d, str);
+ if (test__start_subtest("btf_dump: typedef_data"))
+ test_btf_dump_typedef_data(btf, d, str);
+ if (test__start_subtest("btf_dump: enum_data"))
+ test_btf_dump_enum_data(btf, d, str);
+ if (test__start_subtest("btf_dump: struct_data"))
+ test_btf_dump_struct_data(btf, d, str);
+ if (test__start_subtest("btf_dump: var_data"))
+ test_btf_dump_var_data(btf, d, str);
+ btf_dump__free(d);
+ btf__free(btf);
+
+ if (test__start_subtest("btf_dump: datasec_data"))
+ test_btf_dump_datasec_data(str);
}
char btf_file[] = "/tmp/core_autosize.btf.XXXXXX";
int err, fd = -1, zero = 0;
int char_id, short_id, int_id, long_long_id, void_ptr_id, id;
+ DECLARE_LIBBPF_OPTS(bpf_object_open_opts, open_opts);
struct test_core_autosize* skel = NULL;
- struct bpf_object_load_attr load_attr = {};
struct bpf_program *prog;
struct bpf_map *bss_map;
struct btf *btf = NULL;
fd = -1;
/* open and load BPF program with custom BTF as the kernel BTF */
- skel = test_core_autosize__open();
+ open_opts.btf_custom_path = btf_file;
+ skel = test_core_autosize__open_opts(&open_opts);
if (!ASSERT_OK_PTR(skel, "skel_open"))
- return;
+ goto cleanup;
/* disable handle_signed() for now */
prog = bpf_object__find_program_by_name(skel->obj, "handle_signed");
goto cleanup;
bpf_program__set_autoload(prog, false);
- load_attr.obj = skel->obj;
- load_attr.target_btf_path = btf_file;
- err = bpf_object__load_xattr(&load_attr);
+ err = bpf_object__load(skel->obj);
if (!ASSERT_OK(err, "prog_load"))
goto cleanup;
skel = NULL;
/* now re-load with handle_signed() enabled, it should fail loading */
- skel = test_core_autosize__open();
+ open_opts.btf_custom_path = btf_file;
+ skel = test_core_autosize__open_opts(&open_opts);
if (!ASSERT_OK_PTR(skel, "skel_open"))
- return;
+ goto cleanup;
- load_attr.obj = skel->obj;
- load_attr.target_btf_path = btf_file;
- err = bpf_object__load_xattr(&load_attr);
- if (!ASSERT_ERR(err, "bad_prog_load"))
+ err = test_core_autosize__load(skel);
+ if (!ASSERT_ERR(err, "skel_load"))
goto cleanup;
cleanup:
void test_core_reloc(void)
{
const size_t mmap_sz = roundup_page(sizeof(struct data));
- struct bpf_object_load_attr load_attr = {};
+ DECLARE_LIBBPF_OPTS(bpf_object_open_opts, open_opts);
struct core_reloc_test_case *test_case;
const char *tp_name, *probe_name;
int err, i, equal;
continue;
}
- obj = bpf_object__open_file(test_case->bpf_obj_file, NULL);
+ if (test_case->btf_src_file) {
+ err = access(test_case->btf_src_file, R_OK);
+ if (!ASSERT_OK(err, "btf_src_file"))
+ goto cleanup;
+ }
+
+ open_opts.btf_custom_path = test_case->btf_src_file;
+ obj = bpf_object__open_file(test_case->bpf_obj_file, &open_opts);
if (!ASSERT_OK_PTR(obj, "obj_open"))
- continue;
+ goto cleanup;
probe_name = "raw_tracepoint/sys_enter";
tp_name = "sys_enter";
"prog '%s' not found\n", probe_name))
goto cleanup;
-
- if (test_case->btf_src_file) {
- err = access(test_case->btf_src_file, R_OK);
- if (!ASSERT_OK(err, "btf_src_file"))
- goto cleanup;
- }
-
- load_attr.obj = obj;
- load_attr.log_level = 0;
- load_attr.target_btf_path = test_case->btf_src_file;
- err = bpf_object__load_xattr(&load_attr);
+ err = bpf_object__load(obj);
if (err) {
if (!test_case->fails)
ASSERT_OK(err, "obj_load");
*/
#ifndef __x86_64__
bpf_program__set_autoload(skel->progs.test6, false);
+ bpf_program__set_autoload(skel->progs.test7, false);
#endif
err = get_func_ip_test__load(skel);
ASSERT_EQ(skel->bss->test5_result, 1, "test5_result");
#ifdef __x86_64__
ASSERT_EQ(skel->bss->test6_result, 1, "test6_result");
+ ASSERT_EQ(skel->bss->test7_result, 1, "test7_result");
#endif
cleanup:
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+
+#include <sys/sysinfo.h>
+#include <test_progs.h>
+#include "network_helpers.h"
+#include "netcnt_prog.skel.h"
+#include "netcnt_common.h"
+
+#define CG_NAME "/netcnt"
+
+void test_netcnt(void)
+{
+ union percpu_net_cnt *percpu_netcnt = NULL;
+ struct bpf_cgroup_storage_key key;
+ int map_fd, percpu_map_fd;
+ struct netcnt_prog *skel;
+ unsigned long packets;
+ union net_cnt netcnt;
+ unsigned long bytes;
+ int cpu, nproc;
+ int cg_fd = -1;
+ char cmd[128];
+
+ skel = netcnt_prog__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "netcnt_prog__open_and_load"))
+ return;
+
+ nproc = get_nprocs_conf();
+ percpu_netcnt = malloc(sizeof(*percpu_netcnt) * nproc);
+ if (!ASSERT_OK_PTR(percpu_netcnt, "malloc(percpu_netcnt)"))
+ goto err;
+
+ cg_fd = test__join_cgroup(CG_NAME);
+ if (!ASSERT_GE(cg_fd, 0, "test__join_cgroup"))
+ goto err;
+
+ skel->links.bpf_nextcnt = bpf_program__attach_cgroup(skel->progs.bpf_nextcnt, cg_fd);
+ if (!ASSERT_OK_PTR(skel->links.bpf_nextcnt,
+ "attach_cgroup(bpf_nextcnt)"))
+ goto err;
+
+ snprintf(cmd, sizeof(cmd), "%s ::1 -A -c 10000 -q > /dev/null", ping_command(AF_INET6));
+ ASSERT_OK(system(cmd), cmd);
+
+ map_fd = bpf_map__fd(skel->maps.netcnt);
+ if (!ASSERT_OK(bpf_map_get_next_key(map_fd, NULL, &key), "bpf_map_get_next_key"))
+ goto err;
+
+ if (!ASSERT_OK(bpf_map_lookup_elem(map_fd, &key, &netcnt), "bpf_map_lookup_elem(netcnt)"))
+ goto err;
+
+ percpu_map_fd = bpf_map__fd(skel->maps.percpu_netcnt);
+ if (!ASSERT_OK(bpf_map_lookup_elem(percpu_map_fd, &key, &percpu_netcnt[0]),
+ "bpf_map_lookup_elem(percpu_netcnt)"))
+ goto err;
+
+ /* Some packets can be still in per-cpu cache, but not more than
+ * MAX_PERCPU_PACKETS.
+ */
+ packets = netcnt.packets;
+ bytes = netcnt.bytes;
+ for (cpu = 0; cpu < nproc; cpu++) {
+ ASSERT_LE(percpu_netcnt[cpu].packets, MAX_PERCPU_PACKETS, "MAX_PERCPU_PACKETS");
+
+ packets += percpu_netcnt[cpu].packets;
+ bytes += percpu_netcnt[cpu].bytes;
+ }
+
+ /* No packets should be lost */
+ ASSERT_EQ(packets, 10000, "packets");
+
+ /* Let's check that bytes counter matches the number of packets
+ * multiplied by the size of ipv6 ICMP packet.
+ */
+ ASSERT_EQ(bytes, packets * 104, "bytes");
+
+err:
+ if (cg_fd != -1)
+ close(cg_fd);
+ free(percpu_netcnt);
+ netcnt_prog__destroy(skel);
+}
if (CHECK(err, "pin maps", "err %d errno %d\n", err, errno))
goto out;
+ /* get pinning path */
+ if (!ASSERT_STREQ(bpf_map__pin_path(map), pinpath, "get pin path"))
+ goto out;
+
/* set pinning path of other map and re-pin all */
map = bpf_object__find_map_by_name(obj, "nopinmap");
if (CHECK(!map, "find map", "NULL map"))
if (CHECK(err, "set pin path", "err %d errno %d\n", err, errno))
goto out;
+ /* get pinning path after set */
+ if (!ASSERT_STREQ(bpf_map__pin_path(map), custpinpath,
+ "get pin path after set"))
+ goto out;
+
/* should only pin the one unpinned map */
err = bpf_object__pin_maps(obj, NULL);
if (CHECK(err, "pin maps", "err %d errno %d\n", err, errno))
if (!test__start_subtest(title))
continue;
- /* Expect verifier failure if test name has 'fail' */
- if (strstr(title, "fail") != NULL) {
+ /* Expect verifier failure if test name has 'err' */
+ if (strstr(title, "err_") != NULL) {
libbpf_print_fn_t old_print_fn;
old_print_fn = libbpf_set_print(NULL);
#define _GNU_SOURCE
#include <arpa/inet.h>
+#include <linux/if.h>
+#include <linux/if_tun.h>
#include <linux/limits.h>
#include <linux/sysctl.h>
-#include <linux/if_tun.h>
-#include <linux/if.h>
#include <sched.h>
#include <stdbool.h>
#include <stdio.h>
-#include <sys/stat.h>
#include <sys/mount.h>
+#include <sys/stat.h>
+#include <unistd.h>
#include "test_progs.h"
#include "network_helpers.h"
static int test_ping(int family, const char *addr)
{
- const char *ping = family == AF_INET6 ? "ping6" : "ping";
-
- SYS("ip netns exec " NS_SRC " %s " PING_ARGS " %s > /dev/null", ping, addr);
+ SYS("ip netns exec " NS_SRC " %s " PING_ARGS " %s > /dev/null", ping_command(family), addr);
return 0;
fail:
return -1;
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+
+/**
+ * Test XDP bonding support
+ *
+ * Sets up two bonded veth pairs between two fresh namespaces
+ * and verifies that XDP_TX program loaded on a bond device
+ * are correctly loaded onto the slave devices and XDP_TX'd
+ * packets are balanced using bonding.
+ */
+
+#define _GNU_SOURCE
+#include <sched.h>
+#include <net/if.h>
+#include <linux/if_link.h>
+#include "test_progs.h"
+#include "network_helpers.h"
+#include <linux/if_bonding.h>
+#include <linux/limits.h>
+#include <linux/udp.h>
+
+#include "xdp_dummy.skel.h"
+#include "xdp_redirect_multi_kern.skel.h"
+#include "xdp_tx.skel.h"
+
+#define BOND1_MAC {0x00, 0x11, 0x22, 0x33, 0x44, 0x55}
+#define BOND1_MAC_STR "00:11:22:33:44:55"
+#define BOND2_MAC {0x00, 0x22, 0x33, 0x44, 0x55, 0x66}
+#define BOND2_MAC_STR "00:22:33:44:55:66"
+#define NPACKETS 100
+
+static int root_netns_fd = -1;
+
+static void restore_root_netns(void)
+{
+ ASSERT_OK(setns(root_netns_fd, CLONE_NEWNET), "restore_root_netns");
+}
+
+static int setns_by_name(char *name)
+{
+ int nsfd, err;
+ char nspath[PATH_MAX];
+
+ snprintf(nspath, sizeof(nspath), "%s/%s", "/var/run/netns", name);
+ nsfd = open(nspath, O_RDONLY | O_CLOEXEC);
+ if (nsfd < 0)
+ return -1;
+
+ err = setns(nsfd, CLONE_NEWNET);
+ close(nsfd);
+ return err;
+}
+
+static int get_rx_packets(const char *iface)
+{
+ FILE *f;
+ char line[512];
+ int iface_len = strlen(iface);
+
+ f = fopen("/proc/net/dev", "r");
+ if (!f)
+ return -1;
+
+ while (fgets(line, sizeof(line), f)) {
+ char *p = line;
+
+ while (*p == ' ')
+ p++; /* skip whitespace */
+ if (!strncmp(p, iface, iface_len)) {
+ p += iface_len;
+ if (*p++ != ':')
+ continue;
+ while (*p == ' ')
+ p++; /* skip whitespace */
+ while (*p && *p != ' ')
+ p++; /* skip rx bytes */
+ while (*p == ' ')
+ p++; /* skip whitespace */
+ fclose(f);
+ return atoi(p);
+ }
+ }
+ fclose(f);
+ return -1;
+}
+
+#define MAX_BPF_LINKS 8
+
+struct skeletons {
+ struct xdp_dummy *xdp_dummy;
+ struct xdp_tx *xdp_tx;
+ struct xdp_redirect_multi_kern *xdp_redirect_multi_kern;
+
+ int nlinks;
+ struct bpf_link *links[MAX_BPF_LINKS];
+};
+
+static int xdp_attach(struct skeletons *skeletons, struct bpf_program *prog, char *iface)
+{
+ struct bpf_link *link;
+ int ifindex;
+
+ ifindex = if_nametoindex(iface);
+ if (!ASSERT_GT(ifindex, 0, "get ifindex"))
+ return -1;
+
+ if (!ASSERT_LE(skeletons->nlinks+1, MAX_BPF_LINKS, "too many XDP programs attached"))
+ return -1;
+
+ link = bpf_program__attach_xdp(prog, ifindex);
+ if (!ASSERT_OK_PTR(link, "attach xdp program"))
+ return -1;
+
+ skeletons->links[skeletons->nlinks++] = link;
+ return 0;
+}
+
+enum {
+ BOND_ONE_NO_ATTACH = 0,
+ BOND_BOTH_AND_ATTACH,
+};
+
+static const char * const mode_names[] = {
+ [BOND_MODE_ROUNDROBIN] = "balance-rr",
+ [BOND_MODE_ACTIVEBACKUP] = "active-backup",
+ [BOND_MODE_XOR] = "balance-xor",
+ [BOND_MODE_BROADCAST] = "broadcast",
+ [BOND_MODE_8023AD] = "802.3ad",
+ [BOND_MODE_TLB] = "balance-tlb",
+ [BOND_MODE_ALB] = "balance-alb",
+};
+
+static const char * const xmit_policy_names[] = {
+ [BOND_XMIT_POLICY_LAYER2] = "layer2",
+ [BOND_XMIT_POLICY_LAYER34] = "layer3+4",
+ [BOND_XMIT_POLICY_LAYER23] = "layer2+3",
+ [BOND_XMIT_POLICY_ENCAP23] = "encap2+3",
+ [BOND_XMIT_POLICY_ENCAP34] = "encap3+4",
+};
+
+static int bonding_setup(struct skeletons *skeletons, int mode, int xmit_policy,
+ int bond_both_attach)
+{
+#define SYS(fmt, ...) \
+ ({ \
+ char cmd[1024]; \
+ snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \
+ if (!ASSERT_OK(system(cmd), cmd)) \
+ return -1; \
+ })
+
+ SYS("ip netns add ns_dst");
+ SYS("ip link add veth1_1 type veth peer name veth2_1 netns ns_dst");
+ SYS("ip link add veth1_2 type veth peer name veth2_2 netns ns_dst");
+
+ SYS("ip link add bond1 type bond mode %s xmit_hash_policy %s",
+ mode_names[mode], xmit_policy_names[xmit_policy]);
+ SYS("ip link set bond1 up address " BOND1_MAC_STR " addrgenmode none");
+ SYS("ip -netns ns_dst link add bond2 type bond mode %s xmit_hash_policy %s",
+ mode_names[mode], xmit_policy_names[xmit_policy]);
+ SYS("ip -netns ns_dst link set bond2 up address " BOND2_MAC_STR " addrgenmode none");
+
+ SYS("ip link set veth1_1 master bond1");
+ if (bond_both_attach == BOND_BOTH_AND_ATTACH) {
+ SYS("ip link set veth1_2 master bond1");
+ } else {
+ SYS("ip link set veth1_2 up addrgenmode none");
+
+ if (xdp_attach(skeletons, skeletons->xdp_dummy->progs.xdp_dummy_prog, "veth1_2"))
+ return -1;
+ }
+
+ SYS("ip -netns ns_dst link set veth2_1 master bond2");
+
+ if (bond_both_attach == BOND_BOTH_AND_ATTACH)
+ SYS("ip -netns ns_dst link set veth2_2 master bond2");
+ else
+ SYS("ip -netns ns_dst link set veth2_2 up addrgenmode none");
+
+ /* Load a dummy program on sending side as with veth peer needs to have a
+ * XDP program loaded as well.
+ */
+ if (xdp_attach(skeletons, skeletons->xdp_dummy->progs.xdp_dummy_prog, "bond1"))
+ return -1;
+
+ if (bond_both_attach == BOND_BOTH_AND_ATTACH) {
+ if (!ASSERT_OK(setns_by_name("ns_dst"), "set netns to ns_dst"))
+ return -1;
+
+ if (xdp_attach(skeletons, skeletons->xdp_tx->progs.xdp_tx, "bond2"))
+ return -1;
+
+ restore_root_netns();
+ }
+
+ return 0;
+
+#undef SYS
+}
+
+static void bonding_cleanup(struct skeletons *skeletons)
+{
+ restore_root_netns();
+ while (skeletons->nlinks) {
+ skeletons->nlinks--;
+ bpf_link__destroy(skeletons->links[skeletons->nlinks]);
+ }
+ ASSERT_OK(system("ip link delete bond1"), "delete bond1");
+ ASSERT_OK(system("ip link delete veth1_1"), "delete veth1_1");
+ ASSERT_OK(system("ip link delete veth1_2"), "delete veth1_2");
+ ASSERT_OK(system("ip netns delete ns_dst"), "delete ns_dst");
+}
+
+static int send_udp_packets(int vary_dst_ip)
+{
+ struct ethhdr eh = {
+ .h_source = BOND1_MAC,
+ .h_dest = BOND2_MAC,
+ .h_proto = htons(ETH_P_IP),
+ };
+ uint8_t buf[128] = {};
+ struct iphdr *iph = (struct iphdr *)(buf + sizeof(eh));
+ struct udphdr *uh = (struct udphdr *)(buf + sizeof(eh) + sizeof(*iph));
+ int i, s = -1;
+ int ifindex;
+
+ s = socket(AF_PACKET, SOCK_RAW, IPPROTO_RAW);
+ if (!ASSERT_GE(s, 0, "socket"))
+ goto err;
+
+ ifindex = if_nametoindex("bond1");
+ if (!ASSERT_GT(ifindex, 0, "get bond1 ifindex"))
+ goto err;
+
+ memcpy(buf, &eh, sizeof(eh));
+ iph->ihl = 5;
+ iph->version = 4;
+ iph->tos = 16;
+ iph->id = 1;
+ iph->ttl = 64;
+ iph->protocol = IPPROTO_UDP;
+ iph->saddr = 1;
+ iph->daddr = 2;
+ iph->tot_len = htons(sizeof(buf) - ETH_HLEN);
+ iph->check = 0;
+
+ for (i = 1; i <= NPACKETS; i++) {
+ int n;
+ struct sockaddr_ll saddr_ll = {
+ .sll_ifindex = ifindex,
+ .sll_halen = ETH_ALEN,
+ .sll_addr = BOND2_MAC,
+ };
+
+ /* vary the UDP destination port for even distribution with roundrobin/xor modes */
+ uh->dest++;
+
+ if (vary_dst_ip)
+ iph->daddr++;
+
+ n = sendto(s, buf, sizeof(buf), 0, (struct sockaddr *)&saddr_ll, sizeof(saddr_ll));
+ if (!ASSERT_EQ(n, sizeof(buf), "sendto"))
+ goto err;
+ }
+
+ return 0;
+
+err:
+ if (s >= 0)
+ close(s);
+ return -1;
+}
+
+static void test_xdp_bonding_with_mode(struct skeletons *skeletons, int mode, int xmit_policy)
+{
+ int bond1_rx;
+
+ if (bonding_setup(skeletons, mode, xmit_policy, BOND_BOTH_AND_ATTACH))
+ goto out;
+
+ if (send_udp_packets(xmit_policy != BOND_XMIT_POLICY_LAYER34))
+ goto out;
+
+ bond1_rx = get_rx_packets("bond1");
+ ASSERT_EQ(bond1_rx, NPACKETS, "expected more received packets");
+
+ switch (mode) {
+ case BOND_MODE_ROUNDROBIN:
+ case BOND_MODE_XOR: {
+ int veth1_rx = get_rx_packets("veth1_1");
+ int veth2_rx = get_rx_packets("veth1_2");
+ int diff = abs(veth1_rx - veth2_rx);
+
+ ASSERT_GE(veth1_rx + veth2_rx, NPACKETS, "expected more packets");
+
+ switch (xmit_policy) {
+ case BOND_XMIT_POLICY_LAYER2:
+ ASSERT_GE(diff, NPACKETS,
+ "expected packets on only one of the interfaces");
+ break;
+ case BOND_XMIT_POLICY_LAYER23:
+ case BOND_XMIT_POLICY_LAYER34:
+ ASSERT_LT(diff, NPACKETS/2,
+ "expected even distribution of packets");
+ break;
+ default:
+ PRINT_FAIL("Unimplemented xmit_policy=%d\n", xmit_policy);
+ break;
+ }
+ break;
+ }
+ case BOND_MODE_ACTIVEBACKUP: {
+ int veth1_rx = get_rx_packets("veth1_1");
+ int veth2_rx = get_rx_packets("veth1_2");
+ int diff = abs(veth1_rx - veth2_rx);
+
+ ASSERT_GE(diff, NPACKETS,
+ "expected packets on only one of the interfaces");
+ break;
+ }
+ default:
+ PRINT_FAIL("Unimplemented xmit_policy=%d\n", xmit_policy);
+ break;
+ }
+
+out:
+ bonding_cleanup(skeletons);
+}
+
+/* Test the broadcast redirection using xdp_redirect_map_multi_prog and adding
+ * all the interfaces to it and checking that broadcasting won't send the packet
+ * to neither the ingress bond device (bond2) or its slave (veth2_1).
+ */
+static void test_xdp_bonding_redirect_multi(struct skeletons *skeletons)
+{
+ static const char * const ifaces[] = {"bond2", "veth2_1", "veth2_2"};
+ int veth1_1_rx, veth1_2_rx;
+ int err;
+
+ if (bonding_setup(skeletons, BOND_MODE_ROUNDROBIN, BOND_XMIT_POLICY_LAYER23,
+ BOND_ONE_NO_ATTACH))
+ goto out;
+
+
+ if (!ASSERT_OK(setns_by_name("ns_dst"), "could not set netns to ns_dst"))
+ goto out;
+
+ /* populate the devmap with the relevant interfaces */
+ for (int i = 0; i < ARRAY_SIZE(ifaces); i++) {
+ int ifindex = if_nametoindex(ifaces[i]);
+ int map_fd = bpf_map__fd(skeletons->xdp_redirect_multi_kern->maps.map_all);
+
+ if (!ASSERT_GT(ifindex, 0, "could not get interface index"))
+ goto out;
+
+ err = bpf_map_update_elem(map_fd, &ifindex, &ifindex, 0);
+ if (!ASSERT_OK(err, "add interface to map_all"))
+ goto out;
+ }
+
+ if (xdp_attach(skeletons,
+ skeletons->xdp_redirect_multi_kern->progs.xdp_redirect_map_multi_prog,
+ "bond2"))
+ goto out;
+
+ restore_root_netns();
+
+ if (send_udp_packets(BOND_MODE_ROUNDROBIN))
+ goto out;
+
+ veth1_1_rx = get_rx_packets("veth1_1");
+ veth1_2_rx = get_rx_packets("veth1_2");
+
+ ASSERT_EQ(veth1_1_rx, 0, "expected no packets on veth1_1");
+ ASSERT_GE(veth1_2_rx, NPACKETS, "expected packets on veth1_2");
+
+out:
+ restore_root_netns();
+ bonding_cleanup(skeletons);
+}
+
+/* Test that XDP programs cannot be attached to both the bond master and slaves simultaneously */
+static void test_xdp_bonding_attach(struct skeletons *skeletons)
+{
+ struct bpf_link *link = NULL;
+ struct bpf_link *link2 = NULL;
+ int veth, bond;
+ int err;
+
+ if (!ASSERT_OK(system("ip link add veth type veth"), "add veth"))
+ goto out;
+ if (!ASSERT_OK(system("ip link add bond type bond"), "add bond"))
+ goto out;
+
+ veth = if_nametoindex("veth");
+ if (!ASSERT_GE(veth, 0, "if_nametoindex veth"))
+ goto out;
+ bond = if_nametoindex("bond");
+ if (!ASSERT_GE(bond, 0, "if_nametoindex bond"))
+ goto out;
+
+ /* enslaving with a XDP program loaded fails */
+ link = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, veth);
+ if (!ASSERT_OK_PTR(link, "attach program to veth"))
+ goto out;
+
+ err = system("ip link set veth master bond");
+ if (!ASSERT_NEQ(err, 0, "attaching slave with xdp program expected to fail"))
+ goto out;
+
+ bpf_link__destroy(link);
+ link = NULL;
+
+ err = system("ip link set veth master bond");
+ if (!ASSERT_OK(err, "set veth master"))
+ goto out;
+
+ /* attaching to slave when master has no program is allowed */
+ link = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, veth);
+ if (!ASSERT_OK_PTR(link, "attach program to slave when enslaved"))
+ goto out;
+
+ /* attaching to master not allowed when slave has program loaded */
+ link2 = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, bond);
+ if (!ASSERT_ERR_PTR(link2, "attach program to master when slave has program"))
+ goto out;
+
+ bpf_link__destroy(link);
+ link = NULL;
+
+ /* attaching XDP program to master allowed when slave has no program */
+ link = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, bond);
+ if (!ASSERT_OK_PTR(link, "attach program to master"))
+ goto out;
+
+ /* attaching to slave not allowed when master has program loaded */
+ link2 = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, bond);
+ ASSERT_ERR_PTR(link2, "attach program to slave when master has program");
+
+out:
+ bpf_link__destroy(link);
+ bpf_link__destroy(link2);
+
+ system("ip link del veth");
+ system("ip link del bond");
+}
+
+static int libbpf_debug_print(enum libbpf_print_level level,
+ const char *format, va_list args)
+{
+ if (level != LIBBPF_WARN)
+ vprintf(format, args);
+ return 0;
+}
+
+struct bond_test_case {
+ char *name;
+ int mode;
+ int xmit_policy;
+};
+
+static struct bond_test_case bond_test_cases[] = {
+ { "xdp_bonding_roundrobin", BOND_MODE_ROUNDROBIN, BOND_XMIT_POLICY_LAYER23, },
+ { "xdp_bonding_activebackup", BOND_MODE_ACTIVEBACKUP, BOND_XMIT_POLICY_LAYER23 },
+
+ { "xdp_bonding_xor_layer2", BOND_MODE_XOR, BOND_XMIT_POLICY_LAYER2, },
+ { "xdp_bonding_xor_layer23", BOND_MODE_XOR, BOND_XMIT_POLICY_LAYER23, },
+ { "xdp_bonding_xor_layer34", BOND_MODE_XOR, BOND_XMIT_POLICY_LAYER34, },
+};
+
+void test_xdp_bonding(void)
+{
+ libbpf_print_fn_t old_print_fn;
+ struct skeletons skeletons = {};
+ int i;
+
+ old_print_fn = libbpf_set_print(libbpf_debug_print);
+
+ root_netns_fd = open("/proc/self/ns/net", O_RDONLY);
+ if (!ASSERT_GE(root_netns_fd, 0, "open /proc/self/ns/net"))
+ goto out;
+
+ skeletons.xdp_dummy = xdp_dummy__open_and_load();
+ if (!ASSERT_OK_PTR(skeletons.xdp_dummy, "xdp_dummy__open_and_load"))
+ goto out;
+
+ skeletons.xdp_tx = xdp_tx__open_and_load();
+ if (!ASSERT_OK_PTR(skeletons.xdp_tx, "xdp_tx__open_and_load"))
+ goto out;
+
+ skeletons.xdp_redirect_multi_kern = xdp_redirect_multi_kern__open_and_load();
+ if (!ASSERT_OK_PTR(skeletons.xdp_redirect_multi_kern,
+ "xdp_redirect_multi_kern__open_and_load"))
+ goto out;
+
+ if (!test__start_subtest("xdp_bonding_attach"))
+ test_xdp_bonding_attach(&skeletons);
+
+ for (i = 0; i < ARRAY_SIZE(bond_test_cases); i++) {
+ struct bond_test_case *test_case = &bond_test_cases[i];
+
+ if (!test__start_subtest(test_case->name))
+ test_xdp_bonding_with_mode(
+ &skeletons,
+ test_case->mode,
+ test_case->xmit_policy);
+ }
+
+ if (!test__start_subtest("xdp_bonding_redirect_multi"))
+ test_xdp_bonding_redirect_multi(&skeletons);
+
+out:
+ xdp_dummy__destroy(skeletons.xdp_dummy);
+ xdp_tx__destroy(skeletons.xdp_tx);
+ xdp_redirect_multi_kern__destroy(skeletons.xdp_redirect_multi_kern);
+
+ libbpf_set_print(old_print_fn);
+ if (root_netns_fd >= 0)
+ close(root_netns_fd);
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include "bpf_iter.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#define bpf_tcp_sk(skc) ({ \
+ struct sock_common *_skc = skc; \
+ sk = NULL; \
+ tp = NULL; \
+ if (_skc) { \
+ tp = bpf_skc_to_tcp_sock(_skc); \
+ sk = (struct sock *)tp; \
+ } \
+ tp; \
+})
+
+unsigned short reuse_listen_hport = 0;
+unsigned short listen_hport = 0;
+char cubic_cc[TCP_CA_NAME_MAX] = "bpf_cubic";
+char dctcp_cc[TCP_CA_NAME_MAX] = "bpf_dctcp";
+bool random_retry = false;
+
+static bool tcp_cc_eq(const char *a, const char *b)
+{
+ int i;
+
+ for (i = 0; i < TCP_CA_NAME_MAX; i++) {
+ if (a[i] != b[i])
+ return false;
+ if (!a[i])
+ break;
+ }
+
+ return true;
+}
+
+SEC("iter/tcp")
+int change_tcp_cc(struct bpf_iter__tcp *ctx)
+{
+ char cur_cc[TCP_CA_NAME_MAX];
+ struct tcp_sock *tp;
+ struct sock *sk;
+ int ret;
+
+ if (!bpf_tcp_sk(ctx->sk_common))
+ return 0;
+
+ if (sk->sk_family != AF_INET6 ||
+ (sk->sk_state != TCP_LISTEN &&
+ sk->sk_state != TCP_ESTABLISHED) ||
+ (sk->sk_num != reuse_listen_hport &&
+ sk->sk_num != listen_hport &&
+ bpf_ntohs(sk->sk_dport) != listen_hport))
+ return 0;
+
+ if (bpf_getsockopt(tp, SOL_TCP, TCP_CONGESTION,
+ cur_cc, sizeof(cur_cc)))
+ return 0;
+
+ if (!tcp_cc_eq(cur_cc, cubic_cc))
+ return 0;
+
+ if (random_retry && bpf_get_prandom_u32() % 4 == 1)
+ return 1;
+
+ bpf_setsockopt(tp, SOL_TCP, TCP_CONGESTION, dctcp_cc, sizeof(dctcp_cc));
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
}
BPF_SEQ_PRINTF(seq, "%4d: %08X:%04X %08X:%04X ",
- seq_num, src, srcp, destp, destp);
+ seq_num, src, srcp, dest, destp);
BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d ",
state,
tp->write_seq - tp->snd_una, rx_queue,
#define AF_INET 2
#define AF_INET6 10
+#define SOL_TCP 6
+#define TCP_CONGESTION 13
+#define TCP_CA_NAME_MAX 16
+
#define ICSK_TIME_RETRANS 1
#define ICSK_TIME_PROBE0 3
#define ICSK_TIME_LOSS_PROBE 5
#define ir_v6_rmt_addr req.__req_common.skc_v6_daddr
#define ir_v6_loc_addr req.__req_common.skc_v6_rcv_saddr
+#define sk_num __sk_common.skc_num
+#define sk_dport __sk_common.skc_dport
#define sk_family __sk_common.skc_family
#define sk_rmem_alloc sk_backlog.rmem_alloc
#define sk_refcnt __sk_common.skc_refcnt
extern const void bpf_fentry_test4 __ksym;
extern const void bpf_modify_return_test __ksym;
extern const void bpf_fentry_test6 __ksym;
+extern const void bpf_fentry_test7 __ksym;
__u64 test1_result = 0;
SEC("fentry/bpf_fentry_test1")
test6_result = (const void *) addr == &bpf_fentry_test6 + 5;
return 0;
}
+
+__u64 test7_result = 0;
+SEC("kprobe/bpf_fentry_test7+5")
+int test7(struct pt_regs *ctx)
+{
+ __u64 addr = bpf_get_func_ip(ctx);
+
+ test7_result = (const void *) addr == &bpf_fentry_test7 + 5;
+ return 0;
+}
struct {
__uint(type, BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
__type(key, struct bpf_cgroup_storage_key);
- __type(value, struct percpu_net_cnt);
+ __type(value, union percpu_net_cnt);
} percpu_netcnt SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_CGROUP_STORAGE);
__type(key, struct bpf_cgroup_storage_key);
- __type(value, struct net_cnt);
+ __type(value, union net_cnt);
} netcnt SEC(".maps");
SEC("cgroup/skb")
int bpf_nextcnt(struct __sk_buff *skb)
{
- struct percpu_net_cnt *percpu_cnt;
+ union percpu_net_cnt *percpu_cnt;
char fmt[] = "%d %llu %llu\n";
- struct net_cnt *cnt;
+ union net_cnt *cnt;
__u64 ts, dt;
int ret;
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Isovalent, Inc. */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+struct inner {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, __u32);
+ __type(value, int);
+ __uint(max_entries, 4);
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __uint(max_entries, 0); /* This will make map creation to fail */
+ __uint(key_size, sizeof(__u32));
+ __array(values, struct inner);
+} mim SEC(".maps");
+
+SEC("xdp")
+int xdp_noop0(struct xdp_md *ctx)
+{
+ return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
return 0;
}
-SEC("classifier/fail_use_after_free")
+SEC("classifier/err_use_after_free")
int bpf_sk_lookup_uaf(struct __sk_buff *skb)
{
struct bpf_sock_tuple tuple = {};
return family;
}
-SEC("classifier/fail_modify_sk_pointer")
+SEC("classifier/err_modify_sk_pointer")
int bpf_sk_lookup_modptr(struct __sk_buff *skb)
{
struct bpf_sock_tuple tuple = {};
return 0;
}
-SEC("classifier/fail_modify_sk_or_null_pointer")
+SEC("classifier/err_modify_sk_or_null_pointer")
int bpf_sk_lookup_modptr_or_null(struct __sk_buff *skb)
{
struct bpf_sock_tuple tuple = {};
return 0;
}
-SEC("classifier/fail_no_release")
+SEC("classifier/err_no_release")
int bpf_sk_lookup_test2(struct __sk_buff *skb)
{
struct bpf_sock_tuple tuple = {};
return 0;
}
-SEC("classifier/fail_release_twice")
+SEC("classifier/err_release_twice")
int bpf_sk_lookup_test3(struct __sk_buff *skb)
{
struct bpf_sock_tuple tuple = {};
return 0;
}
-SEC("classifier/fail_release_unchecked")
+SEC("classifier/err_release_unchecked")
int bpf_sk_lookup_test4(struct __sk_buff *skb)
{
struct bpf_sock_tuple tuple = {};
bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0);
}
-SEC("classifier/fail_no_release_subcall")
+SEC("classifier/err_no_release_subcall")
int bpf_sk_lookup_test5(struct __sk_buff *skb)
{
lookup_no_release(skb);
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
-SEC("tx")
+SEC("xdp")
int xdp_tx(struct xdp_md *xdp)
{
return XDP_TX;
--- /dev/null
+#!/usr/bin/env python3
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+#
+# Copyright (C) 2021 Isovalent, Inc.
+
+import argparse
+import re
+import os, sys
+
+LINUX_ROOT = os.path.abspath(os.path.join(__file__,
+ os.pardir, os.pardir, os.pardir, os.pardir, os.pardir))
+BPFTOOL_DIR = os.path.join(LINUX_ROOT, 'tools/bpf/bpftool')
+retval = 0
+
+class BlockParser(object):
+ """
+ A parser for extracting set of values from blocks such as enums.
+ @reader: a pointer to the open file to parse
+ """
+ def __init__(self, reader):
+ self.reader = reader
+
+ def search_block(self, start_marker):
+ """
+ Search for a given structure in a file.
+ @start_marker: regex marking the beginning of a structure to parse
+ """
+ offset = self.reader.tell()
+ array_start = re.search(start_marker, self.reader.read())
+ if array_start is None:
+ raise Exception('Failed to find start of block')
+ self.reader.seek(offset + array_start.start())
+
+ def parse(self, pattern, end_marker):
+ """
+ Parse a block and return a set of values. Values to extract must be
+ on separate lines in the file.
+ @pattern: pattern used to identify the values to extract
+ @end_marker: regex marking the end of the block to parse
+ """
+ entries = set()
+ while True:
+ line = self.reader.readline()
+ if not line or re.match(end_marker, line):
+ break
+ capture = pattern.search(line)
+ if capture and pattern.groups >= 1:
+ entries.add(capture.group(1))
+ return entries
+
+class ArrayParser(BlockParser):
+ """
+ A parser for extracting dicionaries of values from some BPF-related arrays.
+ @reader: a pointer to the open file to parse
+ @array_name: name of the array to parse
+ """
+ end_marker = re.compile('^};')
+
+ def __init__(self, reader, array_name):
+ self.array_name = array_name
+ self.start_marker = re.compile(f'(static )?const char \* const {self.array_name}\[.*\] = {{\n')
+ super().__init__(reader)
+
+ def search_block(self):
+ """
+ Search for the given array in a file.
+ """
+ super().search_block(self.start_marker);
+
+ def parse(self):
+ """
+ Parse a block and return data as a dictionary. Items to extract must be
+ on separate lines in the file.
+ """
+ pattern = re.compile('\[(BPF_\w*)\]\s*= "(.*)",?$')
+ entries = {}
+ while True:
+ line = self.reader.readline()
+ if line == '' or re.match(self.end_marker, line):
+ break
+ capture = pattern.search(line)
+ if capture:
+ entries[capture.group(1)] = capture.group(2)
+ return entries
+
+class InlineListParser(BlockParser):
+ """
+ A parser for extracting set of values from inline lists.
+ """
+ def parse(self, pattern, end_marker):
+ """
+ Parse a block and return a set of values. Multiple values to extract
+ can be on a same line in the file.
+ @pattern: pattern used to identify the values to extract
+ @end_marker: regex marking the end of the block to parse
+ """
+ entries = set()
+ while True:
+ line = self.reader.readline()
+ if not line:
+ break
+ entries.update(pattern.findall(line))
+ if re.search(end_marker, line):
+ break
+ return entries
+
+class FileExtractor(object):
+ """
+ A generic reader for extracting data from a given file. This class contains
+ several helper methods that wrap arround parser objects to extract values
+ from different structures.
+ This class does not offer a way to set a filename, which is expected to be
+ defined in children classes.
+ """
+ def __init__(self):
+ self.reader = open(self.filename, 'r')
+
+ def close(self):
+ """
+ Close the file used by the parser.
+ """
+ self.reader.close()
+
+ def reset_read(self):
+ """
+ Reset the file position indicator for this parser. This is useful when
+ parsing several structures in the file without respecting the order in
+ which those structures appear in the file.
+ """
+ self.reader.seek(0)
+
+ def get_types_from_array(self, array_name):
+ """
+ Search for and parse an array associating names to BPF_* enum members,
+ for example:
+
+ const char * const prog_type_name[] = {
+ [BPF_PROG_TYPE_UNSPEC] = "unspec",
+ [BPF_PROG_TYPE_SOCKET_FILTER] = "socket_filter",
+ [BPF_PROG_TYPE_KPROBE] = "kprobe",
+ };
+
+ Return a dictionary with the enum member names as keys and the
+ associated names as values, for example:
+
+ {'BPF_PROG_TYPE_UNSPEC': 'unspec',
+ 'BPF_PROG_TYPE_SOCKET_FILTER': 'socket_filter',
+ 'BPF_PROG_TYPE_KPROBE': 'kprobe'}
+
+ @array_name: name of the array to parse
+ """
+ array_parser = ArrayParser(self.reader, array_name)
+ array_parser.search_block()
+ return array_parser.parse()
+
+ def get_enum(self, enum_name):
+ """
+ Search for and parse an enum containing BPF_* members, for example:
+
+ enum bpf_prog_type {
+ BPF_PROG_TYPE_UNSPEC,
+ BPF_PROG_TYPE_SOCKET_FILTER,
+ BPF_PROG_TYPE_KPROBE,
+ };
+
+ Return a set containing all member names, for example:
+
+ {'BPF_PROG_TYPE_UNSPEC',
+ 'BPF_PROG_TYPE_SOCKET_FILTER',
+ 'BPF_PROG_TYPE_KPROBE'}
+
+ @enum_name: name of the enum to parse
+ """
+ start_marker = re.compile(f'enum {enum_name} {{\n')
+ pattern = re.compile('^\s*(BPF_\w+),?$')
+ end_marker = re.compile('^};')
+ parser = BlockParser(self.reader)
+ parser.search_block(start_marker)
+ return parser.parse(pattern, end_marker)
+
+ def __get_description_list(self, start_marker, pattern, end_marker):
+ parser = InlineListParser(self.reader)
+ parser.search_block(start_marker)
+ return parser.parse(pattern, end_marker)
+
+ def get_rst_list(self, block_name):
+ """
+ Search for and parse a list of type names from RST documentation, for
+ example:
+
+ | *TYPE* := {
+ | **socket** | **kprobe** |
+ | **kretprobe**
+ | }
+
+ Return a set containing all type names, for example:
+
+ {'socket', 'kprobe', 'kretprobe'}
+
+ @block_name: name of the blog to parse, 'TYPE' in the example
+ """
+ start_marker = re.compile(f'\*{block_name}\* := {{')
+ pattern = re.compile('\*\*([\w/-]+)\*\*')
+ end_marker = re.compile('}\n')
+ return self.__get_description_list(start_marker, pattern, end_marker)
+
+ def get_help_list(self, block_name):
+ """
+ Search for and parse a list of type names from a help message in
+ bpftool, for example:
+
+ " TYPE := { socket | kprobe |\\n"
+ " kretprobe }\\n"
+
+ Return a set containing all type names, for example:
+
+ {'socket', 'kprobe', 'kretprobe'}
+
+ @block_name: name of the blog to parse, 'TYPE' in the example
+ """
+ start_marker = re.compile(f'"\s*{block_name} := {{')
+ pattern = re.compile('([\w/]+) [|}]')
+ end_marker = re.compile('}')
+ return self.__get_description_list(start_marker, pattern, end_marker)
+
+ def get_help_list_macro(self, macro):
+ """
+ Search for and parse a list of values from a help message starting with
+ a macro in bpftool, for example:
+
+ " " HELP_SPEC_OPTIONS " |\\n"
+ " {-f|--bpffs} | {-m|--mapcompat} | {-n|--nomount} }\\n"
+
+ Return a set containing all item names, for example:
+
+ {'-f', '--bpffs', '-m', '--mapcompat', '-n', '--nomount'}
+
+ @macro: macro starting the block, 'HELP_SPEC_OPTIONS' in the example
+ """
+ start_marker = re.compile(f'"\s*{macro}\s*" [|}}]')
+ pattern = re.compile('([\w-]+) ?(?:\||}[ }\]])')
+ end_marker = re.compile('}\\\\n')
+ return self.__get_description_list(start_marker, pattern, end_marker)
+
+ def default_options(self):
+ """
+ Return the default options contained in HELP_SPEC_OPTIONS
+ """
+ return { '-j', '--json', '-p', '--pretty', '-d', '--debug' }
+
+ def get_bashcomp_list(self, block_name):
+ """
+ Search for and parse a list of type names from a variable in bash
+ completion file, for example:
+
+ local BPFTOOL_PROG_LOAD_TYPES='socket kprobe \\
+ kretprobe'
+
+ Return a set containing all type names, for example:
+
+ {'socket', 'kprobe', 'kretprobe'}
+
+ @block_name: name of the blog to parse, 'TYPE' in the example
+ """
+ start_marker = re.compile(f'local {block_name}=\'')
+ pattern = re.compile('(?:.*=\')?([\w/]+)')
+ end_marker = re.compile('\'$')
+ return self.__get_description_list(start_marker, pattern, end_marker)
+
+class SourceFileExtractor(FileExtractor):
+ """
+ An abstract extractor for a source file with usage message.
+ This class does not offer a way to set a filename, which is expected to be
+ defined in children classes.
+ """
+ def get_options(self):
+ return self.default_options().union(self.get_help_list_macro('HELP_SPEC_OPTIONS'))
+
+class ProgFileExtractor(SourceFileExtractor):
+ """
+ An extractor for bpftool's prog.c.
+ """
+ filename = os.path.join(BPFTOOL_DIR, 'prog.c')
+
+ def get_prog_types(self):
+ return self.get_types_from_array('prog_type_name')
+
+ def get_attach_types(self):
+ return self.get_types_from_array('attach_type_strings')
+
+ def get_prog_attach_help(self):
+ return self.get_help_list('ATTACH_TYPE')
+
+class MapFileExtractor(SourceFileExtractor):
+ """
+ An extractor for bpftool's map.c.
+ """
+ filename = os.path.join(BPFTOOL_DIR, 'map.c')
+
+ def get_map_types(self):
+ return self.get_types_from_array('map_type_name')
+
+ def get_map_help(self):
+ return self.get_help_list('TYPE')
+
+class CgroupFileExtractor(SourceFileExtractor):
+ """
+ An extractor for bpftool's cgroup.c.
+ """
+ filename = os.path.join(BPFTOOL_DIR, 'cgroup.c')
+
+ def get_prog_attach_help(self):
+ return self.get_help_list('ATTACH_TYPE')
+
+class CommonFileExtractor(SourceFileExtractor):
+ """
+ An extractor for bpftool's common.c.
+ """
+ filename = os.path.join(BPFTOOL_DIR, 'common.c')
+
+ def __init__(self):
+ super().__init__()
+ self.attach_types = {}
+
+ def get_attach_types(self):
+ if not self.attach_types:
+ self.attach_types = self.get_types_from_array('attach_type_name')
+ return self.attach_types
+
+ def get_cgroup_attach_types(self):
+ if not self.attach_types:
+ self.get_attach_types()
+ cgroup_types = {}
+ for (key, value) in self.attach_types.items():
+ if key.find('BPF_CGROUP') != -1:
+ cgroup_types[key] = value
+ return cgroup_types
+
+class GenericSourceExtractor(SourceFileExtractor):
+ """
+ An extractor for generic source code files.
+ """
+ filename = ""
+
+ def __init__(self, filename):
+ self.filename = os.path.join(BPFTOOL_DIR, filename)
+ super().__init__()
+
+class BpfHeaderExtractor(FileExtractor):
+ """
+ An extractor for the UAPI BPF header.
+ """
+ filename = os.path.join(LINUX_ROOT, 'tools/include/uapi/linux/bpf.h')
+
+ def get_prog_types(self):
+ return self.get_enum('bpf_prog_type')
+
+ def get_map_types(self):
+ return self.get_enum('bpf_map_type')
+
+ def get_attach_types(self):
+ return self.get_enum('bpf_attach_type')
+
+class ManPageExtractor(FileExtractor):
+ """
+ An abstract extractor for an RST documentation page.
+ This class does not offer a way to set a filename, which is expected to be
+ defined in children classes.
+ """
+ def get_options(self):
+ return self.get_rst_list('OPTIONS')
+
+class ManProgExtractor(ManPageExtractor):
+ """
+ An extractor for bpftool-prog.rst.
+ """
+ filename = os.path.join(BPFTOOL_DIR, 'Documentation/bpftool-prog.rst')
+
+ def get_attach_types(self):
+ return self.get_rst_list('ATTACH_TYPE')
+
+class ManMapExtractor(ManPageExtractor):
+ """
+ An extractor for bpftool-map.rst.
+ """
+ filename = os.path.join(BPFTOOL_DIR, 'Documentation/bpftool-map.rst')
+
+ def get_map_types(self):
+ return self.get_rst_list('TYPE')
+
+class ManCgroupExtractor(ManPageExtractor):
+ """
+ An extractor for bpftool-cgroup.rst.
+ """
+ filename = os.path.join(BPFTOOL_DIR, 'Documentation/bpftool-cgroup.rst')
+
+ def get_attach_types(self):
+ return self.get_rst_list('ATTACH_TYPE')
+
+class ManGenericExtractor(ManPageExtractor):
+ """
+ An extractor for generic RST documentation pages.
+ """
+ filename = ""
+
+ def __init__(self, filename):
+ self.filename = os.path.join(BPFTOOL_DIR, filename)
+ super().__init__()
+
+class BashcompExtractor(FileExtractor):
+ """
+ An extractor for bpftool's bash completion file.
+ """
+ filename = os.path.join(BPFTOOL_DIR, 'bash-completion/bpftool')
+
+ def get_prog_attach_types(self):
+ return self.get_bashcomp_list('BPFTOOL_PROG_ATTACH_TYPES')
+
+ def get_map_types(self):
+ return self.get_bashcomp_list('BPFTOOL_MAP_CREATE_TYPES')
+
+ def get_cgroup_attach_types(self):
+ return self.get_bashcomp_list('BPFTOOL_CGROUP_ATTACH_TYPES')
+
+def verify(first_set, second_set, message):
+ """
+ Print all values that differ between two sets.
+ @first_set: one set to compare
+ @second_set: another set to compare
+ @message: message to print for values belonging to only one of the sets
+ """
+ global retval
+ diff = first_set.symmetric_difference(second_set)
+ if diff:
+ print(message, diff)
+ retval = 1
+
+def main():
+ # No arguments supported at this time, but print usage for -h|--help
+ argParser = argparse.ArgumentParser(description="""
+ Verify that bpftool's code, help messages, documentation and bash
+ completion are all in sync on program types, map types, attach types, and
+ options. Also check that bpftool is in sync with the UAPI BPF header.
+ """)
+ args = argParser.parse_args()
+
+ # Map types (enum)
+
+ bpf_info = BpfHeaderExtractor()
+ ref = bpf_info.get_map_types()
+
+ map_info = MapFileExtractor()
+ source_map_items = map_info.get_map_types()
+ map_types_enum = set(source_map_items.keys())
+
+ verify(ref, map_types_enum,
+ f'Comparing BPF header (enum bpf_map_type) and {MapFileExtractor.filename} (map_type_name):')
+
+ # Map types (names)
+
+ source_map_types = set(source_map_items.values())
+ source_map_types.discard('unspec')
+
+ help_map_types = map_info.get_map_help()
+ help_map_options = map_info.get_options()
+ map_info.close()
+
+ man_map_info = ManMapExtractor()
+ man_map_options = man_map_info.get_options()
+ man_map_types = man_map_info.get_map_types()
+ man_map_info.close()
+
+ bashcomp_info = BashcompExtractor()
+ bashcomp_map_types = bashcomp_info.get_map_types()
+
+ verify(source_map_types, help_map_types,
+ f'Comparing {MapFileExtractor.filename} (map_type_name) and {MapFileExtractor.filename} (do_help() TYPE):')
+ verify(source_map_types, man_map_types,
+ f'Comparing {MapFileExtractor.filename} (map_type_name) and {ManMapExtractor.filename} (TYPE):')
+ verify(help_map_options, man_map_options,
+ f'Comparing {MapFileExtractor.filename} (do_help() OPTIONS) and {ManMapExtractor.filename} (OPTIONS):')
+ verify(source_map_types, bashcomp_map_types,
+ f'Comparing {MapFileExtractor.filename} (map_type_name) and {BashcompExtractor.filename} (BPFTOOL_MAP_CREATE_TYPES):')
+
+ # Program types (enum)
+
+ ref = bpf_info.get_prog_types()
+
+ prog_info = ProgFileExtractor()
+ prog_types = set(prog_info.get_prog_types().keys())
+
+ verify(ref, prog_types,
+ f'Comparing BPF header (enum bpf_prog_type) and {ProgFileExtractor.filename} (prog_type_name):')
+
+ # Attach types (enum)
+
+ ref = bpf_info.get_attach_types()
+ bpf_info.close()
+
+ common_info = CommonFileExtractor()
+ attach_types = common_info.get_attach_types()
+
+ verify(ref, attach_types,
+ f'Comparing BPF header (enum bpf_attach_type) and {CommonFileExtractor.filename} (attach_type_name):')
+
+ # Attach types (names)
+
+ source_prog_attach_types = set(prog_info.get_attach_types().values())
+
+ help_prog_attach_types = prog_info.get_prog_attach_help()
+ help_prog_options = prog_info.get_options()
+ prog_info.close()
+
+ man_prog_info = ManProgExtractor()
+ man_prog_options = man_prog_info.get_options()
+ man_prog_attach_types = man_prog_info.get_attach_types()
+ man_prog_info.close()
+
+ bashcomp_info.reset_read() # We stopped at map types, rewind
+ bashcomp_prog_attach_types = bashcomp_info.get_prog_attach_types()
+
+ verify(source_prog_attach_types, help_prog_attach_types,
+ f'Comparing {ProgFileExtractor.filename} (attach_type_strings) and {ProgFileExtractor.filename} (do_help() ATTACH_TYPE):')
+ verify(source_prog_attach_types, man_prog_attach_types,
+ f'Comparing {ProgFileExtractor.filename} (attach_type_strings) and {ManProgExtractor.filename} (ATTACH_TYPE):')
+ verify(help_prog_options, man_prog_options,
+ f'Comparing {ProgFileExtractor.filename} (do_help() OPTIONS) and {ManProgExtractor.filename} (OPTIONS):')
+ verify(source_prog_attach_types, bashcomp_prog_attach_types,
+ f'Comparing {ProgFileExtractor.filename} (attach_type_strings) and {BashcompExtractor.filename} (BPFTOOL_PROG_ATTACH_TYPES):')
+
+ # Cgroup attach types
+
+ source_cgroup_attach_types = set(common_info.get_cgroup_attach_types().values())
+ common_info.close()
+
+ cgroup_info = CgroupFileExtractor()
+ help_cgroup_attach_types = cgroup_info.get_prog_attach_help()
+ help_cgroup_options = cgroup_info.get_options()
+ cgroup_info.close()
+
+ man_cgroup_info = ManCgroupExtractor()
+ man_cgroup_options = man_cgroup_info.get_options()
+ man_cgroup_attach_types = man_cgroup_info.get_attach_types()
+ man_cgroup_info.close()
+
+ bashcomp_cgroup_attach_types = bashcomp_info.get_cgroup_attach_types()
+ bashcomp_info.close()
+
+ verify(source_cgroup_attach_types, help_cgroup_attach_types,
+ f'Comparing {CommonFileExtractor.filename} (attach_type_strings) and {CgroupFileExtractor.filename} (do_help() ATTACH_TYPE):')
+ verify(source_cgroup_attach_types, man_cgroup_attach_types,
+ f'Comparing {CommonFileExtractor.filename} (attach_type_strings) and {ManCgroupExtractor.filename} (ATTACH_TYPE):')
+ verify(help_cgroup_options, man_cgroup_options,
+ f'Comparing {CgroupFileExtractor.filename} (do_help() OPTIONS) and {ManCgroupExtractor.filename} (OPTIONS):')
+ verify(source_cgroup_attach_types, bashcomp_cgroup_attach_types,
+ f'Comparing {CommonFileExtractor.filename} (attach_type_strings) and {BashcompExtractor.filename} (BPFTOOL_CGROUP_ATTACH_TYPES):')
+
+ # Options for remaining commands
+
+ for cmd in [ 'btf', 'feature', 'gen', 'iter', 'link', 'net', 'perf', 'struct_ops', ]:
+ source_info = GenericSourceExtractor(cmd + '.c')
+ help_cmd_options = source_info.get_options()
+ source_info.close()
+
+ man_cmd_info = ManGenericExtractor(os.path.join('Documentation', 'bpftool-' + cmd + '.rst'))
+ man_cmd_options = man_cmd_info.get_options()
+ man_cmd_info.close()
+
+ verify(help_cmd_options, man_cmd_options,
+ f'Comparing {source_info.filename} (do_help() OPTIONS) and {man_cmd_info.filename} (OPTIONS):')
+
+ source_main_info = GenericSourceExtractor('main.c')
+ help_main_options = source_main_info.get_options()
+ source_main_info.close()
+
+ man_main_info = ManGenericExtractor(os.path.join('Documentation', 'bpftool.rst'))
+ man_main_options = man_main_info.get_options()
+ man_main_info.close()
+
+ verify(help_main_options, man_main_options,
+ f'Comparing {source_main_info.filename} (do_help() OPTIONS) and {man_main_info.filename} (OPTIONS):')
+
+ sys.exit(retval)
+
+if __name__ == "__main__":
+ main()
udp = socket(AF_INET, SOCK_DGRAM, 0);
i = 0;
err = bpf_map_update_elem(fd, &i, &udp, BPF_ANY);
- if (!err) {
- printf("Failed socket SOCK_DGRAM allowed '%i:%i'\n",
+ if (err) {
+ printf("Failed socket update SOCK_DGRAM '%i:%i'\n",
i, udp);
goto out_sockmap;
}
}
#define MAPINMAP_PROG "./test_map_in_map.o"
+#define MAPINMAP_INVALID_PROG "./test_map_in_map_invalid.o"
static void test_map_in_map(void)
{
struct bpf_object *obj;
struct bpf_map *map;
int mim_fd, fd, err;
int pos = 0;
+ struct bpf_map_info info = {};
+ __u32 len = sizeof(info);
+ __u32 id = 0;
+ libbpf_print_fn_t old_print_fn;
obj = bpf_object__open(MAPINMAP_PROG);
}
close(fd);
+ fd = -1;
bpf_object__close(obj);
+
+ /* Test that failing bpf_object__create_map() destroys the inner map */
+ obj = bpf_object__open(MAPINMAP_INVALID_PROG);
+ err = libbpf_get_error(obj);
+ if (err) {
+ printf("Failed to load %s program: %d %d",
+ MAPINMAP_INVALID_PROG, err, errno);
+ goto out_map_in_map;
+ }
+
+ map = bpf_object__find_map_by_name(obj, "mim");
+ if (!map) {
+ printf("Failed to load array of maps from test prog\n");
+ goto out_map_in_map;
+ }
+
+ old_print_fn = libbpf_set_print(NULL);
+
+ err = bpf_object__load(obj);
+ if (!err) {
+ printf("Loading obj supposed to fail\n");
+ goto out_map_in_map;
+ }
+
+ libbpf_set_print(old_print_fn);
+
+ /* Iterate over all maps to check whether the internal map
+ * ("mim.internal") has been destroyed.
+ */
+ while (true) {
+ err = bpf_map_get_next_id(id, &id);
+ if (err) {
+ if (errno == ENOENT)
+ break;
+ printf("Failed to get next map: %d", errno);
+ goto out_map_in_map;
+ }
+
+ fd = bpf_map_get_fd_by_id(id);
+ if (fd < 0) {
+ if (errno == ENOENT)
+ continue;
+ printf("Failed to get map by id %u: %d", id, errno);
+ goto out_map_in_map;
+ }
+
+ err = bpf_obj_get_info_by_fd(fd, &info, &len);
+ if (err) {
+ printf("Failed to get map info by fd %d: %d", fd,
+ errno);
+ goto out_map_in_map;
+ }
+
+ if (!strcmp(info.name, "mim.inner")) {
+ printf("Inner map mim.inner was not destroyed\n");
+ goto out_map_in_map;
+ }
+ }
+
return;
out_map_in_map:
- close(fd);
+ if (fd >= 0)
+ close(fd);
exit(1);
}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <errno.h>
-#include <assert.h>
-#include <sys/sysinfo.h>
-#include <sys/time.h>
-
-#include <linux/bpf.h>
-#include <bpf/bpf.h>
-#include <bpf/libbpf.h>
-
-#include "cgroup_helpers.h"
-#include "bpf_rlimit.h"
-#include "netcnt_common.h"
-
-#define BPF_PROG "./netcnt_prog.o"
-#define TEST_CGROUP "/test-network-counters/"
-
-static int bpf_find_map(const char *test, struct bpf_object *obj,
- const char *name)
-{
- struct bpf_map *map;
-
- map = bpf_object__find_map_by_name(obj, name);
- if (!map) {
- printf("%s:FAIL:map '%s' not found\n", test, name);
- return -1;
- }
- return bpf_map__fd(map);
-}
-
-int main(int argc, char **argv)
-{
- struct percpu_net_cnt *percpu_netcnt;
- struct bpf_cgroup_storage_key key;
- int map_fd, percpu_map_fd;
- int error = EXIT_FAILURE;
- struct net_cnt netcnt;
- struct bpf_object *obj;
- int prog_fd, cgroup_fd;
- unsigned long packets;
- unsigned long bytes;
- int cpu, nproc;
- __u32 prog_cnt;
-
- nproc = get_nprocs_conf();
- percpu_netcnt = malloc(sizeof(*percpu_netcnt) * nproc);
- if (!percpu_netcnt) {
- printf("Not enough memory for per-cpu area (%d cpus)\n", nproc);
- goto err;
- }
-
- if (bpf_prog_load(BPF_PROG, BPF_PROG_TYPE_CGROUP_SKB,
- &obj, &prog_fd)) {
- printf("Failed to load bpf program\n");
- goto out;
- }
-
- cgroup_fd = cgroup_setup_and_join(TEST_CGROUP);
- if (cgroup_fd < 0)
- goto err;
-
- /* Attach bpf program */
- if (bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_INET_EGRESS, 0)) {
- printf("Failed to attach bpf program");
- goto err;
- }
-
- if (system("which ping6 &>/dev/null") == 0)
- assert(!system("ping6 ::1 -c 10000 -f -q > /dev/null"));
- else
- assert(!system("ping -6 ::1 -c 10000 -f -q > /dev/null"));
-
- if (bpf_prog_query(cgroup_fd, BPF_CGROUP_INET_EGRESS, 0, NULL, NULL,
- &prog_cnt)) {
- printf("Failed to query attached programs");
- goto err;
- }
-
- map_fd = bpf_find_map(__func__, obj, "netcnt");
- if (map_fd < 0) {
- printf("Failed to find bpf map with net counters");
- goto err;
- }
-
- percpu_map_fd = bpf_find_map(__func__, obj, "percpu_netcnt");
- if (percpu_map_fd < 0) {
- printf("Failed to find bpf map with percpu net counters");
- goto err;
- }
-
- if (bpf_map_get_next_key(map_fd, NULL, &key)) {
- printf("Failed to get key in cgroup storage\n");
- goto err;
- }
-
- if (bpf_map_lookup_elem(map_fd, &key, &netcnt)) {
- printf("Failed to lookup cgroup storage\n");
- goto err;
- }
-
- if (bpf_map_lookup_elem(percpu_map_fd, &key, &percpu_netcnt[0])) {
- printf("Failed to lookup percpu cgroup storage\n");
- goto err;
- }
-
- /* Some packets can be still in per-cpu cache, but not more than
- * MAX_PERCPU_PACKETS.
- */
- packets = netcnt.packets;
- bytes = netcnt.bytes;
- for (cpu = 0; cpu < nproc; cpu++) {
- if (percpu_netcnt[cpu].packets > MAX_PERCPU_PACKETS) {
- printf("Unexpected percpu value: %llu\n",
- percpu_netcnt[cpu].packets);
- goto err;
- }
-
- packets += percpu_netcnt[cpu].packets;
- bytes += percpu_netcnt[cpu].bytes;
- }
-
- /* No packets should be lost */
- if (packets != 10000) {
- printf("Unexpected packet count: %lu\n", packets);
- goto err;
- }
-
- /* Let's check that bytes counter matches the number of packets
- * multiplied by the size of ipv6 ICMP packet.
- */
- if (bytes != packets * 104) {
- printf("Unexpected bytes count: %lu\n", bytes);
- goto err;
- }
-
- error = 0;
- printf("test_netcnt:PASS\n");
-
-err:
- cleanup_cgroup_environment();
- free(percpu_netcnt);
-
-out:
- return error;
-}
___ok; \
})
+#define ASSERT_STRNEQ(actual, expected, len, name) ({ \
+ static int duration = 0; \
+ const char *___act = actual; \
+ const char *___exp = expected; \
+ int ___len = len; \
+ bool ___ok = strncmp(___act, ___exp, ___len) == 0; \
+ CHECK(!___ok, (name), \
+ "unexpected %s: actual '%.*s' != expected '%.*s'\n", \
+ (name), ___len, ___act, ___len, ___exp); \
+ ___ok; \
+})
+
#define ASSERT_OK(res, name) ({ \
static int duration = 0; \
long long ___res = (res); \
}
server_listen() {
- ip netns exec "${ns2}" nc "${netcat_opt}" -l -p "${port}" > "${outfile}" &
+ ip netns exec "${ns2}" nc "${netcat_opt}" -l "${port}" > "${outfile}" &
server_pid=$!
sleep 0.2
}
ip link set dev veth3 xdp pinned $BPF_DIR/progs/redirect_map_2
ip -n ns1 link set dev veth11 xdp obj xdp_dummy.o sec xdp_dummy
-ip -n ns2 link set dev veth22 xdp obj xdp_tx.o sec tx
+ip -n ns2 link set dev veth22 xdp obj xdp_tx.o sec xdp
ip -n ns3 link set dev veth33 xdp obj xdp_dummy.o sec xdp_dummy
trap cleanup EXIT
+{
+ "map access: known scalar += value_ptr unknown vs const",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, len)),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+ BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 1, 4),
+ BPF_MOV64_IMM(BPF_REG_1, 6),
+ BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x7),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_MOV64_IMM(BPF_REG_1, 3),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_hash_16b = { 5 },
+ .fixup_map_array_48b = { 8 },
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "R1 tried to add from different maps, paths or scalars",
+ .result = ACCEPT,
+ .retval = 1,
+},
+{
+ "map access: known scalar += value_ptr const vs unknown",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, len)),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+ BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 1, 2),
+ BPF_MOV64_IMM(BPF_REG_1, 3),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 3),
+ BPF_MOV64_IMM(BPF_REG_1, 6),
+ BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x7),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_hash_16b = { 5 },
+ .fixup_map_array_48b = { 8 },
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "R1 tried to add from different maps, paths or scalars",
+ .result = ACCEPT,
+ .retval = 1,
+},
+{
+ "map access: known scalar += value_ptr const vs const (ne)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, len)),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+ BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 1, 2),
+ BPF_MOV64_IMM(BPF_REG_1, 3),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_MOV64_IMM(BPF_REG_1, 5),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_hash_16b = { 5 },
+ .fixup_map_array_48b = { 8 },
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "R1 tried to add from different maps, paths or scalars",
+ .result = ACCEPT,
+ .retval = 1,
+},
+{
+ "map access: known scalar += value_ptr const vs const (eq)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, len)),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+ BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 1, 2),
+ BPF_MOV64_IMM(BPF_REG_1, 5),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_MOV64_IMM(BPF_REG_1, 5),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_hash_16b = { 5 },
+ .fixup_map_array_48b = { 8 },
+ .result = ACCEPT,
+ .retval = 1,
+},
+{
+ "map access: known scalar += value_ptr unknown vs unknown (eq)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, len)),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11),
+ BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 1, 4),
+ BPF_MOV64_IMM(BPF_REG_1, 6),
+ BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x7),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 3),
+ BPF_MOV64_IMM(BPF_REG_1, 6),
+ BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x7),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_hash_16b = { 5 },
+ .fixup_map_array_48b = { 8 },
+ .result = ACCEPT,
+ .retval = 1,
+},
+{
+ "map access: known scalar += value_ptr unknown vs unknown (lt)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, len)),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11),
+ BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 1, 4),
+ BPF_MOV64_IMM(BPF_REG_1, 6),
+ BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x3),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 3),
+ BPF_MOV64_IMM(BPF_REG_1, 6),
+ BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x7),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_hash_16b = { 5 },
+ .fixup_map_array_48b = { 8 },
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "R1 tried to add from different maps, paths or scalars",
+ .result = ACCEPT,
+ .retval = 1,
+},
+{
+ "map access: known scalar += value_ptr unknown vs unknown (gt)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, len)),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11),
+ BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 1, 4),
+ BPF_MOV64_IMM(BPF_REG_1, 6),
+ BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x7),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 3),
+ BPF_MOV64_IMM(BPF_REG_1, 6),
+ BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x3),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_hash_16b = { 5 },
+ .fixup_map_array_48b = { 8 },
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "R1 tried to add from different maps, paths or scalars",
+ .result = ACCEPT,
+ .retval = 1,
+},
{
"map access: known scalar += value_ptr from different maps",
.insns = {
/x86_64/xen_vmcall_test
/x86_64/xss_msr_test
/x86_64/vmx_pmu_msrs_test
+/access_tracking_perf_test
/demand_paging_test
/dirty_log_test
/dirty_log_perf_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_pmu_msrs_test
TEST_GEN_PROGS_x86_64 += x86_64/xen_shinfo_test
TEST_GEN_PROGS_x86_64 += x86_64/xen_vmcall_test
+TEST_GEN_PROGS_x86_64 += access_tracking_perf_test
TEST_GEN_PROGS_x86_64 += demand_paging_test
TEST_GEN_PROGS_x86_64 += dirty_log_test
TEST_GEN_PROGS_x86_64 += dirty_log_perf_test
#define VREGS_SUBLIST \
{ "vregs", .regs = vregs, .regs_n = ARRAY_SIZE(vregs), }
#define PMU_SUBLIST \
- { "pmu", .regs = pmu_regs, .regs_n = ARRAY_SIZE(pmu_regs), }
+ { "pmu", .capability = KVM_CAP_ARM_PMU_V3, .feature = KVM_ARM_VCPU_PMU_V3, \
+ .regs = pmu_regs, .regs_n = ARRAY_SIZE(pmu_regs), }
#define SVE_SUBLIST \
{ "sve", .capability = KVM_CAP_ARM_SVE, .feature = KVM_ARM_VCPU_SVE, .finalize = true, \
.regs = sve_regs, .regs_n = ARRAY_SIZE(sve_regs), \
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * access_tracking_perf_test
+ *
+ * Copyright (C) 2021, Google, Inc.
+ *
+ * This test measures the performance effects of KVM's access tracking.
+ * Access tracking is driven by the MMU notifiers test_young, clear_young, and
+ * clear_flush_young. These notifiers do not have a direct userspace API,
+ * however the clear_young notifier can be triggered by marking a pages as idle
+ * in /sys/kernel/mm/page_idle/bitmap. This test leverages that mechanism to
+ * enable access tracking on guest memory.
+ *
+ * To measure performance this test runs a VM with a configurable number of
+ * vCPUs that each touch every page in disjoint regions of memory. Performance
+ * is measured in the time it takes all vCPUs to finish touching their
+ * predefined region.
+ *
+ * Note that a deterministic correctness test of access tracking is not possible
+ * by using page_idle as it exists today. This is for a few reasons:
+ *
+ * 1. page_idle only issues clear_young notifiers, which lack a TLB flush. This
+ * means subsequent guest accesses are not guaranteed to see page table
+ * updates made by KVM until some time in the future.
+ *
+ * 2. page_idle only operates on LRU pages. Newly allocated pages are not
+ * immediately allocated to LRU lists. Instead they are held in a "pagevec",
+ * which is drained to LRU lists some time in the future. There is no
+ * userspace API to force this drain to occur.
+ *
+ * These limitations are worked around in this test by using a large enough
+ * region of memory for each vCPU such that the number of translations cached in
+ * the TLB and the number of pages held in pagevecs are a small fraction of the
+ * overall workload. And if either of those conditions are not true this test
+ * will fail rather than silently passing.
+ */
+#include <inttypes.h>
+#include <limits.h>
+#include <pthread.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "kvm_util.h"
+#include "test_util.h"
+#include "perf_test_util.h"
+#include "guest_modes.h"
+
+/* Global variable used to synchronize all of the vCPU threads. */
+static int iteration = -1;
+
+/* Defines what vCPU threads should do during a given iteration. */
+static enum {
+ /* Run the vCPU to access all its memory. */
+ ITERATION_ACCESS_MEMORY,
+ /* Mark the vCPU's memory idle in page_idle. */
+ ITERATION_MARK_IDLE,
+} iteration_work;
+
+/* Set to true when vCPU threads should exit. */
+static bool done;
+
+/* The iteration that was last completed by each vCPU. */
+static int vcpu_last_completed_iteration[KVM_MAX_VCPUS];
+
+/* Whether to overlap the regions of memory vCPUs access. */
+static bool overlap_memory_access;
+
+struct test_params {
+ /* The backing source for the region of memory. */
+ enum vm_mem_backing_src_type backing_src;
+
+ /* The amount of memory to allocate for each vCPU. */
+ uint64_t vcpu_memory_bytes;
+
+ /* The number of vCPUs to create in the VM. */
+ int vcpus;
+};
+
+static uint64_t pread_uint64(int fd, const char *filename, uint64_t index)
+{
+ uint64_t value;
+ off_t offset = index * sizeof(value);
+
+ TEST_ASSERT(pread(fd, &value, sizeof(value), offset) == sizeof(value),
+ "pread from %s offset 0x%" PRIx64 " failed!",
+ filename, offset);
+
+ return value;
+
+}
+
+#define PAGEMAP_PRESENT (1ULL << 63)
+#define PAGEMAP_PFN_MASK ((1ULL << 55) - 1)
+
+static uint64_t lookup_pfn(int pagemap_fd, struct kvm_vm *vm, uint64_t gva)
+{
+ uint64_t hva = (uint64_t) addr_gva2hva(vm, gva);
+ uint64_t entry;
+ uint64_t pfn;
+
+ entry = pread_uint64(pagemap_fd, "pagemap", hva / getpagesize());
+ if (!(entry & PAGEMAP_PRESENT))
+ return 0;
+
+ pfn = entry & PAGEMAP_PFN_MASK;
+ if (!pfn) {
+ print_skip("Looking up PFNs requires CAP_SYS_ADMIN");
+ exit(KSFT_SKIP);
+ }
+
+ return pfn;
+}
+
+static bool is_page_idle(int page_idle_fd, uint64_t pfn)
+{
+ uint64_t bits = pread_uint64(page_idle_fd, "page_idle", pfn / 64);
+
+ return !!((bits >> (pfn % 64)) & 1);
+}
+
+static void mark_page_idle(int page_idle_fd, uint64_t pfn)
+{
+ uint64_t bits = 1ULL << (pfn % 64);
+
+ TEST_ASSERT(pwrite(page_idle_fd, &bits, 8, 8 * (pfn / 64)) == 8,
+ "Set page_idle bits for PFN 0x%" PRIx64, pfn);
+}
+
+static void mark_vcpu_memory_idle(struct kvm_vm *vm, int vcpu_id)
+{
+ uint64_t base_gva = perf_test_args.vcpu_args[vcpu_id].gva;
+ uint64_t pages = perf_test_args.vcpu_args[vcpu_id].pages;
+ uint64_t page;
+ uint64_t still_idle = 0;
+ uint64_t no_pfn = 0;
+ int page_idle_fd;
+ int pagemap_fd;
+
+ /* If vCPUs are using an overlapping region, let vCPU 0 mark it idle. */
+ if (overlap_memory_access && vcpu_id)
+ return;
+
+ page_idle_fd = open("/sys/kernel/mm/page_idle/bitmap", O_RDWR);
+ TEST_ASSERT(page_idle_fd > 0, "Failed to open page_idle.");
+
+ pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
+ TEST_ASSERT(pagemap_fd > 0, "Failed to open pagemap.");
+
+ for (page = 0; page < pages; page++) {
+ uint64_t gva = base_gva + page * perf_test_args.guest_page_size;
+ uint64_t pfn = lookup_pfn(pagemap_fd, vm, gva);
+
+ if (!pfn) {
+ no_pfn++;
+ continue;
+ }
+
+ if (is_page_idle(page_idle_fd, pfn)) {
+ still_idle++;
+ continue;
+ }
+
+ mark_page_idle(page_idle_fd, pfn);
+ }
+
+ /*
+ * Assumption: Less than 1% of pages are going to be swapped out from
+ * under us during this test.
+ */
+ TEST_ASSERT(no_pfn < pages / 100,
+ "vCPU %d: No PFN for %" PRIu64 " out of %" PRIu64 " pages.",
+ vcpu_id, no_pfn, pages);
+
+ /*
+ * Test that at least 90% of memory has been marked idle (the rest might
+ * not be marked idle because the pages have not yet made it to an LRU
+ * list or the translations are still cached in the TLB). 90% is
+ * arbitrary; high enough that we ensure most memory access went through
+ * access tracking but low enough as to not make the test too brittle
+ * over time and across architectures.
+ */
+ TEST_ASSERT(still_idle < pages / 10,
+ "vCPU%d: Too many pages still idle (%"PRIu64 " out of %"
+ PRIu64 ").\n",
+ vcpu_id, still_idle, pages);
+
+ close(page_idle_fd);
+ close(pagemap_fd);
+}
+
+static void assert_ucall(struct kvm_vm *vm, uint32_t vcpu_id,
+ uint64_t expected_ucall)
+{
+ struct ucall uc;
+ uint64_t actual_ucall = get_ucall(vm, vcpu_id, &uc);
+
+ TEST_ASSERT(expected_ucall == actual_ucall,
+ "Guest exited unexpectedly (expected ucall %" PRIu64
+ ", got %" PRIu64 ")",
+ expected_ucall, actual_ucall);
+}
+
+static bool spin_wait_for_next_iteration(int *current_iteration)
+{
+ int last_iteration = *current_iteration;
+
+ do {
+ if (READ_ONCE(done))
+ return false;
+
+ *current_iteration = READ_ONCE(iteration);
+ } while (last_iteration == *current_iteration);
+
+ return true;
+}
+
+static void *vcpu_thread_main(void *arg)
+{
+ struct perf_test_vcpu_args *vcpu_args = arg;
+ struct kvm_vm *vm = perf_test_args.vm;
+ int vcpu_id = vcpu_args->vcpu_id;
+ int current_iteration = -1;
+
+ vcpu_args_set(vm, vcpu_id, 1, vcpu_id);
+
+ while (spin_wait_for_next_iteration(¤t_iteration)) {
+ switch (READ_ONCE(iteration_work)) {
+ case ITERATION_ACCESS_MEMORY:
+ vcpu_run(vm, vcpu_id);
+ assert_ucall(vm, vcpu_id, UCALL_SYNC);
+ break;
+ case ITERATION_MARK_IDLE:
+ mark_vcpu_memory_idle(vm, vcpu_id);
+ break;
+ };
+
+ vcpu_last_completed_iteration[vcpu_id] = current_iteration;
+ }
+
+ return NULL;
+}
+
+static void spin_wait_for_vcpu(int vcpu_id, int target_iteration)
+{
+ while (READ_ONCE(vcpu_last_completed_iteration[vcpu_id]) !=
+ target_iteration) {
+ continue;
+ }
+}
+
+/* The type of memory accesses to perform in the VM. */
+enum access_type {
+ ACCESS_READ,
+ ACCESS_WRITE,
+};
+
+static void run_iteration(struct kvm_vm *vm, int vcpus, const char *description)
+{
+ struct timespec ts_start;
+ struct timespec ts_elapsed;
+ int next_iteration;
+ int vcpu_id;
+
+ /* Kick off the vCPUs by incrementing iteration. */
+ next_iteration = ++iteration;
+
+ clock_gettime(CLOCK_MONOTONIC, &ts_start);
+
+ /* Wait for all vCPUs to finish the iteration. */
+ for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++)
+ spin_wait_for_vcpu(vcpu_id, next_iteration);
+
+ ts_elapsed = timespec_elapsed(ts_start);
+ pr_info("%-30s: %ld.%09lds\n",
+ description, ts_elapsed.tv_sec, ts_elapsed.tv_nsec);
+}
+
+static void access_memory(struct kvm_vm *vm, int vcpus, enum access_type access,
+ const char *description)
+{
+ perf_test_args.wr_fract = (access == ACCESS_READ) ? INT_MAX : 1;
+ sync_global_to_guest(vm, perf_test_args);
+ iteration_work = ITERATION_ACCESS_MEMORY;
+ run_iteration(vm, vcpus, description);
+}
+
+static void mark_memory_idle(struct kvm_vm *vm, int vcpus)
+{
+ /*
+ * Even though this parallelizes the work across vCPUs, this is still a
+ * very slow operation because page_idle forces the test to mark one pfn
+ * at a time and the clear_young notifier serializes on the KVM MMU
+ * lock.
+ */
+ pr_debug("Marking VM memory idle (slow)...\n");
+ iteration_work = ITERATION_MARK_IDLE;
+ run_iteration(vm, vcpus, "Mark memory idle");
+}
+
+static pthread_t *create_vcpu_threads(int vcpus)
+{
+ pthread_t *vcpu_threads;
+ int i;
+
+ vcpu_threads = malloc(vcpus * sizeof(vcpu_threads[0]));
+ TEST_ASSERT(vcpu_threads, "Failed to allocate vcpu_threads.");
+
+ for (i = 0; i < vcpus; i++) {
+ vcpu_last_completed_iteration[i] = iteration;
+ pthread_create(&vcpu_threads[i], NULL, vcpu_thread_main,
+ &perf_test_args.vcpu_args[i]);
+ }
+
+ return vcpu_threads;
+}
+
+static void terminate_vcpu_threads(pthread_t *vcpu_threads, int vcpus)
+{
+ int i;
+
+ /* Set done to signal the vCPU threads to exit */
+ done = true;
+
+ for (i = 0; i < vcpus; i++)
+ pthread_join(vcpu_threads[i], NULL);
+}
+
+static void run_test(enum vm_guest_mode mode, void *arg)
+{
+ struct test_params *params = arg;
+ struct kvm_vm *vm;
+ pthread_t *vcpu_threads;
+ int vcpus = params->vcpus;
+
+ vm = perf_test_create_vm(mode, vcpus, params->vcpu_memory_bytes,
+ params->backing_src);
+
+ perf_test_setup_vcpus(vm, vcpus, params->vcpu_memory_bytes,
+ !overlap_memory_access);
+
+ vcpu_threads = create_vcpu_threads(vcpus);
+
+ pr_info("\n");
+ access_memory(vm, vcpus, ACCESS_WRITE, "Populating memory");
+
+ /* As a control, read and write to the populated memory first. */
+ access_memory(vm, vcpus, ACCESS_WRITE, "Writing to populated memory");
+ access_memory(vm, vcpus, ACCESS_READ, "Reading from populated memory");
+
+ /* Repeat on memory that has been marked as idle. */
+ mark_memory_idle(vm, vcpus);
+ access_memory(vm, vcpus, ACCESS_WRITE, "Writing to idle memory");
+ mark_memory_idle(vm, vcpus);
+ access_memory(vm, vcpus, ACCESS_READ, "Reading from idle memory");
+
+ terminate_vcpu_threads(vcpu_threads, vcpus);
+ free(vcpu_threads);
+ perf_test_destroy_vm(vm);
+}
+
+static void help(char *name)
+{
+ puts("");
+ printf("usage: %s [-h] [-m mode] [-b vcpu_bytes] [-v vcpus] [-o] [-s mem_type]\n",
+ name);
+ puts("");
+ printf(" -h: Display this help message.");
+ guest_modes_help();
+ printf(" -b: specify the size of the memory region which should be\n"
+ " dirtied by each vCPU. e.g. 10M or 3G.\n"
+ " (default: 1G)\n");
+ printf(" -v: specify the number of vCPUs to run.\n");
+ printf(" -o: Overlap guest memory accesses instead of partitioning\n"
+ " them into a separate region of memory for each vCPU.\n");
+ printf(" -s: specify the type of memory that should be used to\n"
+ " back the guest data region.\n\n");
+ backing_src_help();
+ puts("");
+ exit(0);
+}
+
+int main(int argc, char *argv[])
+{
+ struct test_params params = {
+ .backing_src = VM_MEM_SRC_ANONYMOUS,
+ .vcpu_memory_bytes = DEFAULT_PER_VCPU_MEM_SIZE,
+ .vcpus = 1,
+ };
+ int page_idle_fd;
+ int opt;
+
+ guest_modes_append_default();
+
+ while ((opt = getopt(argc, argv, "hm:b:v:os:")) != -1) {
+ switch (opt) {
+ case 'm':
+ guest_modes_cmdline(optarg);
+ break;
+ case 'b':
+ params.vcpu_memory_bytes = parse_size(optarg);
+ break;
+ case 'v':
+ params.vcpus = atoi(optarg);
+ break;
+ case 'o':
+ overlap_memory_access = true;
+ break;
+ case 's':
+ params.backing_src = parse_backing_src_type(optarg);
+ break;
+ case 'h':
+ default:
+ help(argv[0]);
+ break;
+ }
+ }
+
+ page_idle_fd = open("/sys/kernel/mm/page_idle/bitmap", O_RDWR);
+ if (page_idle_fd < 0) {
+ print_skip("CONFIG_IDLE_PAGE_TRACKING is not enabled");
+ exit(KSFT_SKIP);
+ }
+ close(page_idle_fd);
+
+ for_each_guest_mode(run_test, ¶ms);
+
+ return 0;
+}
break;
case 'o':
p.partition_vcpu_memory_access = false;
+ break;
case 's':
p.backing_src = parse_backing_src_type(optarg);
break;
#define HV_X64_GUEST_DEBUGGING_AVAILABLE BIT(1)
#define HV_X64_PERF_MONITOR_AVAILABLE BIT(2)
#define HV_X64_CPU_DYNAMIC_PARTITIONING_AVAILABLE BIT(3)
-#define HV_X64_HYPERCALL_PARAMS_XMM_AVAILABLE BIT(4)
+#define HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE BIT(4)
#define HV_X64_GUEST_IDLE_STATE_AVAILABLE BIT(5)
#define HV_FEATURE_FREQUENCY_MSRS_AVAILABLE BIT(8)
#define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE BIT(10)
#define HV_STATUS_INVALID_CONNECTION_ID 18
#define HV_STATUS_INSUFFICIENT_BUFFERS 19
+/* hypercall options */
+#define HV_HYPERCALL_FAST_BIT BIT(16)
+
#endif /* !SELFTEST_KVM_HYPERV_H */
run_delay = get_run_delay();
pthread_create(&thread, &attr, do_steal_time, NULL);
do
- pthread_yield();
+ sched_yield();
while (get_run_delay() - run_delay < MIN_RUN_DELAY_NS);
pthread_join(thread, NULL);
run_delay = get_run_delay() - run_delay;
vcpu_set_hv_cpuid(vm, VCPU_ID);
tsc_page_gva = vm_vaddr_alloc_page(vm);
- memset(addr_gpa2hva(vm, tsc_page_gva), 0x0, getpagesize());
+ memset(addr_gva2hva(vm, tsc_page_gva), 0x0, getpagesize());
TEST_ASSERT((addr_gva2gpa(vm, tsc_page_gva) & (getpagesize() - 1)) == 0,
"TSC page has to be page aligned\n");
vcpu_args_set(vm, VCPU_ID, 2, tsc_page_gva, addr_gva2gpa(vm, tsc_page_gva));
}
static int nr_gp;
+static int nr_ud;
static inline u64 hypercall(u64 control, vm_vaddr_t input_address,
vm_vaddr_t output_address)
regs->rip = (uint64_t)&wrmsr_end;
}
+static void guest_ud_handler(struct ex_regs *regs)
+{
+ nr_ud++;
+ regs->rip += 3;
+}
+
struct msr_data {
uint32_t idx;
bool available;
struct hcall_data {
uint64_t control;
uint64_t expect;
+ bool ud_expected;
};
static void guest_msr(struct msr_data *msr)
static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall)
{
int i = 0;
+ u64 res, input, output;
wrmsr(HV_X64_MSR_GUEST_OS_ID, LINUX_OS_ID);
wrmsr(HV_X64_MSR_HYPERCALL, pgs_gpa);
while (hcall->control) {
- GUEST_ASSERT(hypercall(hcall->control, pgs_gpa,
- pgs_gpa + 4096) == hcall->expect);
+ nr_ud = 0;
+ if (!(hcall->control & HV_HYPERCALL_FAST_BIT)) {
+ input = pgs_gpa;
+ output = pgs_gpa + 4096;
+ } else {
+ input = output = 0;
+ }
+
+ res = hypercall(hcall->control, input, output);
+ if (hcall->ud_expected)
+ GUEST_ASSERT(nr_ud == 1);
+ else
+ GUEST_ASSERT(res == hcall->expect);
+
GUEST_SYNC(i++);
}
recomm.ebx = 0xfff;
hcall->expect = HV_STATUS_SUCCESS;
break;
-
case 17:
+ /* XMM fast hypercall */
+ hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT;
+ hcall->ud_expected = true;
+ break;
+ case 18:
+ feat.edx |= HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE;
+ hcall->ud_expected = false;
+ hcall->expect = HV_STATUS_SUCCESS;
+ break;
+
+ case 19:
/* END */
hcall->control = 0;
break;
/* Test hypercalls */
vm = vm_create_default(VCPU_ID, 0, guest_hcall);
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vm, VCPU_ID);
+ vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler);
+
/* Hypercall input/output */
hcall_page = vm_vaddr_alloc_pages(vm, 2);
memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize());
TEST_GEN_FILES += hwtstamp_config rxtimestamp timestamping txtimestamp
TEST_GEN_FILES += ipsec
TEST_GEN_FILES += ioam6_parser
+TEST_GEN_FILES += gro
TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls
+TEST_GEN_FILES += toeplitz
TEST_FILES := settings
--- /dev/null
+##TEST_GEN_FILES := test_unix_oob
+TEST_PROGS := test_unix_oob
+include ../../lib.mk
+
+all: $(TEST_PROGS)
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/socket.h>
+#include <arpa/inet.h>
+#include <unistd.h>
+#include <string.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <errno.h>
+#include <netinet/tcp.h>
+#include <sys/un.h>
+#include <sys/signal.h>
+#include <sys/poll.h>
+
+static int pipefd[2];
+static int signal_recvd;
+static pid_t producer_id;
+static char sock_name[32];
+
+static void sig_hand(int sn, siginfo_t *si, void *p)
+{
+ signal_recvd = sn;
+}
+
+static int set_sig_handler(int signal)
+{
+ struct sigaction sa;
+
+ sa.sa_sigaction = sig_hand;
+ sigemptyset(&sa.sa_mask);
+ sa.sa_flags = SA_SIGINFO | SA_RESTART;
+
+ return sigaction(signal, &sa, NULL);
+}
+
+static void set_filemode(int fd, int set)
+{
+ int flags = fcntl(fd, F_GETFL, 0);
+
+ if (set)
+ flags &= ~O_NONBLOCK;
+ else
+ flags |= O_NONBLOCK;
+ fcntl(fd, F_SETFL, flags);
+}
+
+static void signal_producer(int fd)
+{
+ char cmd;
+
+ cmd = 'S';
+ write(fd, &cmd, sizeof(cmd));
+}
+
+static void wait_for_signal(int fd)
+{
+ char buf[5];
+
+ read(fd, buf, 5);
+}
+
+static void die(int status)
+{
+ fflush(NULL);
+ unlink(sock_name);
+ kill(producer_id, SIGTERM);
+ exit(status);
+}
+
+int is_sioctatmark(int fd)
+{
+ int ans = -1;
+
+ if (ioctl(fd, SIOCATMARK, &ans, sizeof(ans)) < 0) {
+#ifdef DEBUG
+ perror("SIOCATMARK Failed");
+#endif
+ }
+ return ans;
+}
+
+void read_oob(int fd, char *c)
+{
+
+ *c = ' ';
+ if (recv(fd, c, sizeof(*c), MSG_OOB) < 0) {
+#ifdef DEBUG
+ perror("Reading MSG_OOB Failed");
+#endif
+ }
+}
+
+int read_data(int pfd, char *buf, int size)
+{
+ int len = 0;
+
+ memset(buf, size, '0');
+ len = read(pfd, buf, size);
+#ifdef DEBUG
+ if (len < 0)
+ perror("read failed");
+#endif
+ return len;
+}
+
+static void wait_for_data(int pfd, int event)
+{
+ struct pollfd pfds[1];
+
+ pfds[0].fd = pfd;
+ pfds[0].events = event;
+ poll(pfds, 1, -1);
+}
+
+void producer(struct sockaddr_un *consumer_addr)
+{
+ int cfd;
+ char buf[64];
+ int i;
+
+ memset(buf, 'x', sizeof(buf));
+ cfd = socket(AF_UNIX, SOCK_STREAM, 0);
+
+ wait_for_signal(pipefd[0]);
+ if (connect(cfd, (struct sockaddr *)consumer_addr,
+ sizeof(struct sockaddr)) != 0) {
+ perror("Connect failed");
+ kill(0, SIGTERM);
+ exit(1);
+ }
+
+ for (i = 0; i < 2; i++) {
+ /* Test 1: Test for SIGURG and OOB */
+ wait_for_signal(pipefd[0]);
+ memset(buf, 'x', sizeof(buf));
+ buf[63] = '@';
+ send(cfd, buf, sizeof(buf), MSG_OOB);
+
+ wait_for_signal(pipefd[0]);
+
+ /* Test 2: Test for OOB being overwitten */
+ memset(buf, 'x', sizeof(buf));
+ buf[63] = '%';
+ send(cfd, buf, sizeof(buf), MSG_OOB);
+
+ memset(buf, 'x', sizeof(buf));
+ buf[63] = '#';
+ send(cfd, buf, sizeof(buf), MSG_OOB);
+
+ wait_for_signal(pipefd[0]);
+
+ /* Test 3: Test for SIOCATMARK */
+ memset(buf, 'x', sizeof(buf));
+ buf[63] = '@';
+ send(cfd, buf, sizeof(buf), MSG_OOB);
+
+ memset(buf, 'x', sizeof(buf));
+ buf[63] = '%';
+ send(cfd, buf, sizeof(buf), MSG_OOB);
+
+ memset(buf, 'x', sizeof(buf));
+ send(cfd, buf, sizeof(buf), 0);
+
+ wait_for_signal(pipefd[0]);
+
+ /* Test 4: Test for 1byte OOB msg */
+ memset(buf, 'x', sizeof(buf));
+ buf[0] = '@';
+ send(cfd, buf, 1, MSG_OOB);
+ }
+}
+
+int
+main(int argc, char **argv)
+{
+ int lfd, pfd;
+ struct sockaddr_un consumer_addr, paddr;
+ socklen_t len = sizeof(consumer_addr);
+ char buf[1024];
+ int on = 0;
+ char oob;
+ int flags;
+ int atmark;
+ char *tmp_file;
+
+ lfd = socket(AF_UNIX, SOCK_STREAM, 0);
+ memset(&consumer_addr, 0, sizeof(consumer_addr));
+ consumer_addr.sun_family = AF_UNIX;
+ sprintf(sock_name, "unix_oob_%d", getpid());
+ unlink(sock_name);
+ strcpy(consumer_addr.sun_path, sock_name);
+
+ if ((bind(lfd, (struct sockaddr *)&consumer_addr,
+ sizeof(consumer_addr))) != 0) {
+ perror("socket bind failed");
+ exit(1);
+ }
+
+ pipe(pipefd);
+
+ listen(lfd, 1);
+
+ producer_id = fork();
+ if (producer_id == 0) {
+ producer(&consumer_addr);
+ exit(0);
+ }
+
+ set_sig_handler(SIGURG);
+ signal_producer(pipefd[1]);
+
+ pfd = accept(lfd, (struct sockaddr *) &paddr, &len);
+ fcntl(pfd, F_SETOWN, getpid());
+
+ signal_recvd = 0;
+ signal_producer(pipefd[1]);
+
+ /* Test 1:
+ * veriyf that SIGURG is
+ * delivered and 63 bytes are
+ * read and oob is '@'
+ */
+ wait_for_data(pfd, POLLIN | POLLPRI);
+ read_oob(pfd, &oob);
+ len = read_data(pfd, buf, 1024);
+ if (!signal_recvd || len != 63 || oob != '@') {
+ fprintf(stderr, "Test 1 failed sigurg %d len %d %c\n",
+ signal_recvd, len, oob);
+ die(1);
+ }
+
+ signal_recvd = 0;
+ signal_producer(pipefd[1]);
+
+ /* Test 2:
+ * Verify that the first OOB is over written by
+ * the 2nd one and the first OOB is returned as
+ * part of the read, and sigurg is received.
+ */
+ wait_for_data(pfd, POLLIN | POLLPRI);
+ len = 0;
+ while (len < 70)
+ len = recv(pfd, buf, 1024, MSG_PEEK);
+ len = read_data(pfd, buf, 1024);
+ read_oob(pfd, &oob);
+ if (!signal_recvd || len != 127 || oob != '#') {
+ fprintf(stderr, "Test 2 failed, sigurg %d len %d OOB %c\n",
+ signal_recvd, len, oob);
+ die(1);
+ }
+
+ signal_recvd = 0;
+ signal_producer(pipefd[1]);
+
+ /* Test 3:
+ * verify that 2nd oob over writes
+ * the first one and read breaks at
+ * oob boundary returning 127 bytes
+ * and sigurg is received and atmark
+ * is set.
+ * oob is '%' and second read returns
+ * 64 bytes.
+ */
+ len = 0;
+ wait_for_data(pfd, POLLIN | POLLPRI);
+ while (len < 150)
+ len = recv(pfd, buf, 1024, MSG_PEEK);
+ len = read_data(pfd, buf, 1024);
+ atmark = is_sioctatmark(pfd);
+ read_oob(pfd, &oob);
+
+ if (!signal_recvd || len != 127 || oob != '%' || atmark != 1) {
+ fprintf(stderr, "Test 3 failed, sigurg %d len %d OOB %c ",
+ "atmark %d\n", signal_recvd, len, oob, atmark);
+ die(1);
+ }
+
+ signal_recvd = 0;
+
+ len = read_data(pfd, buf, 1024);
+ if (len != 64) {
+ fprintf(stderr, "Test 3.1 failed, sigurg %d len %d OOB %c\n",
+ signal_recvd, len, oob);
+ die(1);
+ }
+
+ signal_recvd = 0;
+ signal_producer(pipefd[1]);
+
+ /* Test 4:
+ * verify that a single byte
+ * oob message is delivered.
+ * set non blocking mode and
+ * check proper error is
+ * returned and sigurg is
+ * received and correct
+ * oob is read.
+ */
+
+ set_filemode(pfd, 0);
+
+ wait_for_data(pfd, POLLIN | POLLPRI);
+ len = read_data(pfd, buf, 1024);
+ if ((len == -1) && (errno == 11))
+ len = 0;
+
+ read_oob(pfd, &oob);
+
+ if (!signal_recvd || len != 0 || oob != '@') {
+ fprintf(stderr, "Test 4 failed, sigurg %d len %d OOB %c\n",
+ signal_recvd, len, oob);
+ die(1);
+ }
+
+ set_filemode(pfd, 1);
+
+ /* Inline Testing */
+
+ on = 1;
+ if (setsockopt(pfd, SOL_SOCKET, SO_OOBINLINE, &on, sizeof(on))) {
+ perror("SO_OOBINLINE");
+ die(1);
+ }
+
+ signal_recvd = 0;
+ signal_producer(pipefd[1]);
+
+ /* Test 1 -- Inline:
+ * Check that SIGURG is
+ * delivered and 63 bytes are
+ * read and oob is '@'
+ */
+
+ wait_for_data(pfd, POLLIN | POLLPRI);
+ len = read_data(pfd, buf, 1024);
+
+ if (!signal_recvd || len != 63) {
+ fprintf(stderr, "Test 1 Inline failed, sigurg %d len %d\n",
+ signal_recvd, len);
+ die(1);
+ }
+
+ len = read_data(pfd, buf, 1024);
+
+ if (len != 1) {
+ fprintf(stderr,
+ "Test 1.1 Inline failed, sigurg %d len %d oob %c\n",
+ signal_recvd, len, oob);
+ die(1);
+ }
+
+ signal_recvd = 0;
+ signal_producer(pipefd[1]);
+
+ /* Test 2 -- Inline:
+ * Verify that the first OOB is over written by
+ * the 2nd one and read breaks correctly on
+ * 2nd OOB boundary with the first OOB returned as
+ * part of the read, and sigurg is delivered and
+ * siocatmark returns true.
+ * next read returns one byte, the oob byte
+ * and siocatmark returns false.
+ */
+ len = 0;
+ wait_for_data(pfd, POLLIN | POLLPRI);
+ while (len < 70)
+ len = recv(pfd, buf, 1024, MSG_PEEK);
+ len = read_data(pfd, buf, 1024);
+ atmark = is_sioctatmark(pfd);
+ if (len != 127 || atmark != 1 || !signal_recvd) {
+ fprintf(stderr, "Test 2 Inline failed, len %d atmark %d\n",
+ len, atmark);
+ die(1);
+ }
+
+ len = read_data(pfd, buf, 1024);
+ atmark = is_sioctatmark(pfd);
+ if (len != 1 || buf[0] != '#' || atmark == 1) {
+ fprintf(stderr, "Test 2.1 Inline failed, len %d data %c atmark %d\n",
+ len, buf[0], atmark);
+ die(1);
+ }
+
+ signal_recvd = 0;
+ signal_producer(pipefd[1]);
+
+ /* Test 3 -- Inline:
+ * verify that 2nd oob over writes
+ * the first one and read breaks at
+ * oob boundary returning 127 bytes
+ * and sigurg is received and siocatmark
+ * is true after the read.
+ * subsequent read returns 65 bytes
+ * because of oob which should be '%'.
+ */
+ len = 0;
+ wait_for_data(pfd, POLLIN | POLLPRI);
+ while (len < 126)
+ len = recv(pfd, buf, 1024, MSG_PEEK);
+ len = read_data(pfd, buf, 1024);
+ atmark = is_sioctatmark(pfd);
+ if (!signal_recvd || len != 127 || !atmark) {
+ fprintf(stderr,
+ "Test 3 Inline failed, sigurg %d len %d data %c\n",
+ signal_recvd, len, buf[0]);
+ die(1);
+ }
+
+ len = read_data(pfd, buf, 1024);
+ atmark = is_sioctatmark(pfd);
+ if (len != 65 || buf[0] != '%' || atmark != 0) {
+ fprintf(stderr,
+ "Test 3.1 Inline failed, len %d oob %c atmark %d\n",
+ len, buf[0], atmark);
+ die(1);
+ }
+
+ signal_recvd = 0;
+ signal_producer(pipefd[1]);
+
+ /* Test 4 -- Inline:
+ * verify that a single
+ * byte oob message is delivered
+ * and read returns one byte, the oob
+ * byte and sigurg is received
+ */
+ wait_for_data(pfd, POLLIN | POLLPRI);
+ len = read_data(pfd, buf, 1024);
+ if (!signal_recvd || len != 1 || buf[0] != '@') {
+ fprintf(stderr,
+ "Test 4 Inline failed, signal %d len %d data %c\n",
+ signal_recvd, len, buf[0]);
+ die(1);
+ }
+ die(0);
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This testsuite provides conformance testing for GRO coalescing.
+ *
+ * Test cases:
+ * 1.data
+ * Data packets of the same size and same header setup with correct
+ * sequence numbers coalesce. The one exception being the last data
+ * packet coalesced: it can be smaller than the rest and coalesced
+ * as long as it is in the same flow.
+ * 2.ack
+ * Pure ACK does not coalesce.
+ * 3.flags
+ * Specific test cases: no packets with PSH, SYN, URG, RST set will
+ * be coalesced.
+ * 4.tcp
+ * Packets with incorrect checksum, non-consecutive seqno and
+ * different TCP header options shouldn't coalesce. Nit: given that
+ * some extension headers have paddings, such as timestamp, headers
+ * that are padding differently would not be coalesced.
+ * 5.ip:
+ * Packets with different (ECN, TTL, TOS) header, ip options or
+ * ip fragments (ipv6) shouldn't coalesce.
+ * 6.large:
+ * Packets larger than GRO_MAX_SIZE packets shouldn't coalesce.
+ *
+ * MSS is defined as 4096 - header because if it is too small
+ * (i.e. 1500 MTU - header), it will result in many packets,
+ * increasing the "large" test case's flakiness. This is because
+ * due to time sensitivity in the coalescing window, the receiver
+ * may not coalesce all of the packets.
+ *
+ * Note the timing issue applies to all of the test cases, so some
+ * flakiness is to be expected.
+ *
+ */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <getopt.h>
+#include <linux/filter.h>
+#include <linux/if_packet.h>
+#include <linux/ipv6.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/tcp.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <string.h>
+#include <unistd.h>
+
+#define DPORT 8000
+#define SPORT 1500
+#define PAYLOAD_LEN 100
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+#define NUM_PACKETS 4
+#define START_SEQ 100
+#define START_ACK 100
+#define SIP6 "fdaa::2"
+#define DIP6 "fdaa::1"
+#define SIP4 "192.168.1.200"
+#define DIP4 "192.168.1.100"
+#define ETH_P_NONE 0
+#define TOTAL_HDR_LEN (ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct tcphdr))
+#define MSS (4096 - sizeof(struct tcphdr) - sizeof(struct ipv6hdr))
+#define MAX_PAYLOAD (IP_MAXPACKET - sizeof(struct tcphdr) - sizeof(struct ipv6hdr))
+#define NUM_LARGE_PKT (MAX_PAYLOAD / MSS)
+#define MAX_HDR_LEN (ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct tcphdr))
+
+static int proto = -1;
+static uint8_t src_mac[ETH_ALEN], dst_mac[ETH_ALEN];
+static char *testname = "data";
+static char *ifname = "eth0";
+static char *smac = "aa:00:00:00:00:02";
+static char *dmac = "aa:00:00:00:00:01";
+static bool verbose;
+static bool tx_socket = true;
+static int tcp_offset = -1;
+static int total_hdr_len = -1;
+static int ethhdr_proto = -1;
+
+static void vlog(const char *fmt, ...)
+{
+ va_list args;
+
+ if (verbose) {
+ va_start(args, fmt);
+ vfprintf(stderr, fmt, args);
+ va_end(args);
+ }
+}
+
+static void setup_sock_filter(int fd)
+{
+ const int dport_off = tcp_offset + offsetof(struct tcphdr, dest);
+ const int ethproto_off = offsetof(struct ethhdr, h_proto);
+ int optlen = 0;
+ int ipproto_off;
+ int next_off;
+
+ if (proto == PF_INET)
+ next_off = offsetof(struct iphdr, protocol);
+ else
+ next_off = offsetof(struct ipv6hdr, nexthdr);
+ ipproto_off = ETH_HLEN + next_off;
+
+ if (strcmp(testname, "ip") == 0) {
+ if (proto == PF_INET)
+ optlen = sizeof(struct ip_timestamp);
+ else
+ optlen = sizeof(struct ip6_frag);
+ }
+
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD + BPF_H + BPF_ABS, ethproto_off),
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ntohs(ethhdr_proto), 0, 7),
+ BPF_STMT(BPF_LD + BPF_B + BPF_ABS, ipproto_off),
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, IPPROTO_TCP, 0, 5),
+ BPF_STMT(BPF_LD + BPF_H + BPF_ABS, dport_off),
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, DPORT, 2, 0),
+ BPF_STMT(BPF_LD + BPF_H + BPF_ABS, dport_off + optlen),
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, DPORT, 0, 1),
+ BPF_STMT(BPF_RET + BPF_K, 0xFFFFFFFF),
+ BPF_STMT(BPF_RET + BPF_K, 0),
+ };
+
+ struct sock_fprog bpf = {
+ .len = ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+
+ if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &bpf, sizeof(bpf)) < 0)
+ error(1, errno, "error setting filter");
+}
+
+static uint32_t checksum_nofold(void *data, size_t len, uint32_t sum)
+{
+ uint16_t *words = data;
+ int i;
+
+ for (i = 0; i < len / 2; i++)
+ sum += words[i];
+ if (len & 1)
+ sum += ((char *)data)[len - 1];
+ return sum;
+}
+
+static uint16_t checksum_fold(void *data, size_t len, uint32_t sum)
+{
+ sum = checksum_nofold(data, len, sum);
+ while (sum > 0xFFFF)
+ sum = (sum & 0xFFFF) + (sum >> 16);
+ return ~sum;
+}
+
+static uint16_t tcp_checksum(void *buf, int payload_len)
+{
+ struct pseudo_header6 {
+ struct in6_addr saddr;
+ struct in6_addr daddr;
+ uint16_t protocol;
+ uint16_t payload_len;
+ } ph6;
+ struct pseudo_header4 {
+ struct in_addr saddr;
+ struct in_addr daddr;
+ uint16_t protocol;
+ uint16_t payload_len;
+ } ph4;
+ uint32_t sum = 0;
+
+ if (proto == PF_INET6) {
+ if (inet_pton(AF_INET6, SIP6, &ph6.saddr) != 1)
+ error(1, errno, "inet_pton6 source ip pseudo");
+ if (inet_pton(AF_INET6, DIP6, &ph6.daddr) != 1)
+ error(1, errno, "inet_pton6 dest ip pseudo");
+ ph6.protocol = htons(IPPROTO_TCP);
+ ph6.payload_len = htons(sizeof(struct tcphdr) + payload_len);
+
+ sum = checksum_nofold(&ph6, sizeof(ph6), 0);
+ } else if (proto == PF_INET) {
+ if (inet_pton(AF_INET, SIP4, &ph4.saddr) != 1)
+ error(1, errno, "inet_pton source ip pseudo");
+ if (inet_pton(AF_INET, DIP4, &ph4.daddr) != 1)
+ error(1, errno, "inet_pton dest ip pseudo");
+ ph4.protocol = htons(IPPROTO_TCP);
+ ph4.payload_len = htons(sizeof(struct tcphdr) + payload_len);
+
+ sum = checksum_nofold(&ph4, sizeof(ph4), 0);
+ }
+
+ return checksum_fold(buf, sizeof(struct tcphdr) + payload_len, sum);
+}
+
+static void read_MAC(uint8_t *mac_addr, char *mac)
+{
+ if (sscanf(mac, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx",
+ &mac_addr[0], &mac_addr[1], &mac_addr[2],
+ &mac_addr[3], &mac_addr[4], &mac_addr[5]) != 6)
+ error(1, 0, "sscanf");
+}
+
+static void fill_datalinklayer(void *buf)
+{
+ struct ethhdr *eth = buf;
+
+ memcpy(eth->h_dest, dst_mac, ETH_ALEN);
+ memcpy(eth->h_source, src_mac, ETH_ALEN);
+ eth->h_proto = ethhdr_proto;
+}
+
+static void fill_networklayer(void *buf, int payload_len)
+{
+ struct ipv6hdr *ip6h = buf;
+ struct iphdr *iph = buf;
+
+ if (proto == PF_INET6) {
+ memset(ip6h, 0, sizeof(*ip6h));
+
+ ip6h->version = 6;
+ ip6h->payload_len = htons(sizeof(struct tcphdr) + payload_len);
+ ip6h->nexthdr = IPPROTO_TCP;
+ ip6h->hop_limit = 8;
+ if (inet_pton(AF_INET6, SIP6, &ip6h->saddr) != 1)
+ error(1, errno, "inet_pton source ip6");
+ if (inet_pton(AF_INET6, DIP6, &ip6h->daddr) != 1)
+ error(1, errno, "inet_pton dest ip6");
+ } else if (proto == PF_INET) {
+ memset(iph, 0, sizeof(*iph));
+
+ iph->version = 4;
+ iph->ihl = 5;
+ iph->ttl = 8;
+ iph->protocol = IPPROTO_TCP;
+ iph->tot_len = htons(sizeof(struct tcphdr) +
+ payload_len + sizeof(struct iphdr));
+ iph->frag_off = htons(0x4000); /* DF = 1, MF = 0 */
+ if (inet_pton(AF_INET, SIP4, &iph->saddr) != 1)
+ error(1, errno, "inet_pton source ip");
+ if (inet_pton(AF_INET, DIP4, &iph->daddr) != 1)
+ error(1, errno, "inet_pton dest ip");
+ iph->check = checksum_fold(buf, sizeof(struct iphdr), 0);
+ }
+}
+
+static void fill_transportlayer(void *buf, int seq_offset, int ack_offset,
+ int payload_len, int fin)
+{
+ struct tcphdr *tcph = buf;
+
+ memset(tcph, 0, sizeof(*tcph));
+
+ tcph->source = htons(SPORT);
+ tcph->dest = htons(DPORT);
+ tcph->seq = ntohl(START_SEQ + seq_offset);
+ tcph->ack_seq = ntohl(START_ACK + ack_offset);
+ tcph->ack = 1;
+ tcph->fin = fin;
+ tcph->doff = 5;
+ tcph->window = htons(TCP_MAXWIN);
+ tcph->urg_ptr = 0;
+ tcph->check = tcp_checksum(tcph, payload_len);
+}
+
+static void write_packet(int fd, char *buf, int len, struct sockaddr_ll *daddr)
+{
+ int ret = -1;
+
+ ret = sendto(fd, buf, len, 0, (struct sockaddr *)daddr, sizeof(*daddr));
+ if (ret == -1)
+ error(1, errno, "sendto failure");
+ if (ret != len)
+ error(1, errno, "sendto wrong length");
+}
+
+static void create_packet(void *buf, int seq_offset, int ack_offset,
+ int payload_len, int fin)
+{
+ memset(buf, 0, total_hdr_len);
+ memset(buf + total_hdr_len, 'a', payload_len);
+ fill_transportlayer(buf + tcp_offset, seq_offset, ack_offset,
+ payload_len, fin);
+ fill_networklayer(buf + ETH_HLEN, payload_len);
+ fill_datalinklayer(buf);
+}
+
+/* send one extra flag, not first and not last pkt */
+static void send_flags(int fd, struct sockaddr_ll *daddr, int psh, int syn,
+ int rst, int urg)
+{
+ static char flag_buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ int payload_len, pkt_size, flag, i;
+ struct tcphdr *tcph;
+
+ payload_len = PAYLOAD_LEN * psh;
+ pkt_size = total_hdr_len + payload_len;
+ flag = NUM_PACKETS / 2;
+
+ create_packet(flag_buf, flag * payload_len, 0, payload_len, 0);
+
+ tcph = (struct tcphdr *)(flag_buf + tcp_offset);
+ tcph->psh = psh;
+ tcph->syn = syn;
+ tcph->rst = rst;
+ tcph->urg = urg;
+ tcph->check = 0;
+ tcph->check = tcp_checksum(tcph, payload_len);
+
+ for (i = 0; i < NUM_PACKETS + 1; i++) {
+ if (i == flag) {
+ write_packet(fd, flag_buf, pkt_size, daddr);
+ continue;
+ }
+ create_packet(buf, i * PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+ write_packet(fd, buf, total_hdr_len + PAYLOAD_LEN, daddr);
+ }
+}
+
+/* Test for data of same length, smaller than previous
+ * and of different lengths
+ */
+static void send_data_pkts(int fd, struct sockaddr_ll *daddr,
+ int payload_len1, int payload_len2)
+{
+ static char buf[ETH_HLEN + IP_MAXPACKET];
+
+ create_packet(buf, 0, 0, payload_len1, 0);
+ write_packet(fd, buf, total_hdr_len + payload_len1, daddr);
+ create_packet(buf, payload_len1, 0, payload_len2, 0);
+ write_packet(fd, buf, total_hdr_len + payload_len2, daddr);
+}
+
+/* If incoming segments make tracked segment length exceed
+ * legal IP datagram length, do not coalesce
+ */
+static void send_large(int fd, struct sockaddr_ll *daddr, int remainder)
+{
+ static char pkts[NUM_LARGE_PKT][TOTAL_HDR_LEN + MSS];
+ static char last[TOTAL_HDR_LEN + MSS];
+ static char new_seg[TOTAL_HDR_LEN + MSS];
+ int i;
+
+ for (i = 0; i < NUM_LARGE_PKT; i++)
+ create_packet(pkts[i], i * MSS, 0, MSS, 0);
+ create_packet(last, NUM_LARGE_PKT * MSS, 0, remainder, 0);
+ create_packet(new_seg, (NUM_LARGE_PKT + 1) * MSS, 0, remainder, 0);
+
+ for (i = 0; i < NUM_LARGE_PKT; i++)
+ write_packet(fd, pkts[i], total_hdr_len + MSS, daddr);
+ write_packet(fd, last, total_hdr_len + remainder, daddr);
+ write_packet(fd, new_seg, total_hdr_len + remainder, daddr);
+}
+
+/* Pure acks and dup acks don't coalesce */
+static void send_ack(int fd, struct sockaddr_ll *daddr)
+{
+ static char buf[MAX_HDR_LEN];
+
+ create_packet(buf, 0, 0, 0, 0);
+ write_packet(fd, buf, total_hdr_len, daddr);
+ write_packet(fd, buf, total_hdr_len, daddr);
+ create_packet(buf, 0, 1, 0, 0);
+ write_packet(fd, buf, total_hdr_len, daddr);
+}
+
+static void recompute_packet(char *buf, char *no_ext, int extlen)
+{
+ struct tcphdr *tcphdr = (struct tcphdr *)(buf + tcp_offset);
+ struct ipv6hdr *ip6h = (struct ipv6hdr *)(buf + ETH_HLEN);
+ struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
+
+ memmove(buf, no_ext, total_hdr_len);
+ memmove(buf + total_hdr_len + extlen,
+ no_ext + total_hdr_len, PAYLOAD_LEN);
+
+ tcphdr->doff = tcphdr->doff + (extlen / 4);
+ tcphdr->check = 0;
+ tcphdr->check = tcp_checksum(tcphdr, PAYLOAD_LEN + extlen);
+ if (proto == PF_INET) {
+ iph->tot_len = htons(ntohs(iph->tot_len) + extlen);
+ iph->check = 0;
+ iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
+ } else {
+ ip6h->payload_len = htons(ntohs(ip6h->payload_len) + extlen);
+ }
+}
+
+static void tcp_write_options(char *buf, int kind, int ts)
+{
+ struct tcp_option_ts {
+ uint8_t kind;
+ uint8_t len;
+ uint32_t tsval;
+ uint32_t tsecr;
+ } *opt_ts = (void *)buf;
+ struct tcp_option_window {
+ uint8_t kind;
+ uint8_t len;
+ uint8_t shift;
+ } *opt_window = (void *)buf;
+
+ switch (kind) {
+ case TCPOPT_NOP:
+ buf[0] = TCPOPT_NOP;
+ break;
+ case TCPOPT_WINDOW:
+ memset(opt_window, 0, sizeof(struct tcp_option_window));
+ opt_window->kind = TCPOPT_WINDOW;
+ opt_window->len = TCPOLEN_WINDOW;
+ opt_window->shift = 0;
+ break;
+ case TCPOPT_TIMESTAMP:
+ memset(opt_ts, 0, sizeof(struct tcp_option_ts));
+ opt_ts->kind = TCPOPT_TIMESTAMP;
+ opt_ts->len = TCPOLEN_TIMESTAMP;
+ opt_ts->tsval = ts;
+ opt_ts->tsecr = 0;
+ break;
+ default:
+ error(1, 0, "unimplemented TCP option");
+ break;
+ }
+}
+
+/* TCP with options is always a permutation of {TS, NOP, NOP}.
+ * Implement different orders to verify coalescing stops.
+ */
+static void add_standard_tcp_options(char *buf, char *no_ext, int ts, int order)
+{
+ switch (order) {
+ case 0:
+ tcp_write_options(buf + total_hdr_len, TCPOPT_NOP, 0);
+ tcp_write_options(buf + total_hdr_len + 1, TCPOPT_NOP, 0);
+ tcp_write_options(buf + total_hdr_len + 2 /* two NOP opts */,
+ TCPOPT_TIMESTAMP, ts);
+ break;
+ case 1:
+ tcp_write_options(buf + total_hdr_len, TCPOPT_NOP, 0);
+ tcp_write_options(buf + total_hdr_len + 1,
+ TCPOPT_TIMESTAMP, ts);
+ tcp_write_options(buf + total_hdr_len + 1 + TCPOLEN_TIMESTAMP,
+ TCPOPT_NOP, 0);
+ break;
+ case 2:
+ tcp_write_options(buf + total_hdr_len, TCPOPT_TIMESTAMP, ts);
+ tcp_write_options(buf + total_hdr_len + TCPOLEN_TIMESTAMP + 1,
+ TCPOPT_NOP, 0);
+ tcp_write_options(buf + total_hdr_len + TCPOLEN_TIMESTAMP + 2,
+ TCPOPT_NOP, 0);
+ break;
+ default:
+ error(1, 0, "unknown order");
+ break;
+ }
+ recompute_packet(buf, no_ext, TCPOLEN_TSTAMP_APPA);
+}
+
+/* Packets with invalid checksum don't coalesce. */
+static void send_changed_checksum(int fd, struct sockaddr_ll *daddr)
+{
+ static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ struct tcphdr *tcph = (struct tcphdr *)(buf + tcp_offset);
+ int pkt_size = total_hdr_len + PAYLOAD_LEN;
+
+ create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+ write_packet(fd, buf, pkt_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+ tcph->check = tcph->check - 1;
+ write_packet(fd, buf, pkt_size, daddr);
+}
+
+ /* Packets with non-consecutive sequence number don't coalesce.*/
+static void send_changed_seq(int fd, struct sockaddr_ll *daddr)
+{
+ static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ struct tcphdr *tcph = (struct tcphdr *)(buf + tcp_offset);
+ int pkt_size = total_hdr_len + PAYLOAD_LEN;
+
+ create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+ write_packet(fd, buf, pkt_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+ tcph->seq = ntohl(htonl(tcph->seq) + 1);
+ tcph->check = 0;
+ tcph->check = tcp_checksum(tcph, PAYLOAD_LEN);
+ write_packet(fd, buf, pkt_size, daddr);
+}
+
+ /* Packet with different timestamp option or different timestamps
+ * don't coalesce.
+ */
+static void send_changed_ts(int fd, struct sockaddr_ll *daddr)
+{
+ static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ static char extpkt[sizeof(buf) + TCPOLEN_TSTAMP_APPA];
+ int pkt_size = total_hdr_len + PAYLOAD_LEN + TCPOLEN_TSTAMP_APPA;
+
+ create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+ add_standard_tcp_options(extpkt, buf, 0, 0);
+ write_packet(fd, extpkt, pkt_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+ add_standard_tcp_options(extpkt, buf, 0, 0);
+ write_packet(fd, extpkt, pkt_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
+ add_standard_tcp_options(extpkt, buf, 100, 0);
+ write_packet(fd, extpkt, pkt_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN * 3, 0, PAYLOAD_LEN, 0);
+ add_standard_tcp_options(extpkt, buf, 100, 1);
+ write_packet(fd, extpkt, pkt_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN * 4, 0, PAYLOAD_LEN, 0);
+ add_standard_tcp_options(extpkt, buf, 100, 2);
+ write_packet(fd, extpkt, pkt_size, daddr);
+}
+
+/* Packet with different tcp options don't coalesce. */
+static void send_diff_opt(int fd, struct sockaddr_ll *daddr)
+{
+ static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ static char extpkt1[sizeof(buf) + TCPOLEN_TSTAMP_APPA];
+ static char extpkt2[sizeof(buf) + TCPOLEN_MAXSEG];
+ int extpkt1_size = total_hdr_len + PAYLOAD_LEN + TCPOLEN_TSTAMP_APPA;
+ int extpkt2_size = total_hdr_len + PAYLOAD_LEN + TCPOLEN_MAXSEG;
+
+ create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+ add_standard_tcp_options(extpkt1, buf, 0, 0);
+ write_packet(fd, extpkt1, extpkt1_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+ add_standard_tcp_options(extpkt1, buf, 0, 0);
+ write_packet(fd, extpkt1, extpkt1_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
+ tcp_write_options(extpkt2 + MAX_HDR_LEN, TCPOPT_NOP, 0);
+ tcp_write_options(extpkt2 + MAX_HDR_LEN + 1, TCPOPT_WINDOW, 0);
+ recompute_packet(extpkt2, buf, TCPOLEN_WINDOW + 1);
+ write_packet(fd, extpkt2, extpkt2_size, daddr);
+}
+
+static void add_ipv4_ts_option(void *buf, void *optpkt)
+{
+ struct ip_timestamp *ts = (struct ip_timestamp *)(optpkt + tcp_offset);
+ int optlen = sizeof(struct ip_timestamp);
+ struct iphdr *iph;
+
+ if (optlen % 4)
+ error(1, 0, "ipv4 timestamp length is not a multiple of 4B");
+
+ ts->ipt_code = IPOPT_TS;
+ ts->ipt_len = optlen;
+ ts->ipt_ptr = 5;
+ ts->ipt_flg = IPOPT_TS_TSONLY;
+
+ memcpy(optpkt, buf, tcp_offset);
+ memcpy(optpkt + tcp_offset + optlen, buf + tcp_offset,
+ sizeof(struct tcphdr) + PAYLOAD_LEN);
+
+ iph = (struct iphdr *)(optpkt + ETH_HLEN);
+ iph->ihl = 5 + (optlen / 4);
+ iph->tot_len = htons(ntohs(iph->tot_len) + optlen);
+ iph->check = 0;
+ iph->check = checksum_fold(iph, sizeof(struct iphdr) + optlen, 0);
+}
+
+/* IPv4 options shouldn't coalesce */
+static void send_ip_options(int fd, struct sockaddr_ll *daddr)
+{
+ static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ static char optpkt[sizeof(buf) + sizeof(struct ip_timestamp)];
+ int optlen = sizeof(struct ip_timestamp);
+ int pkt_size = total_hdr_len + PAYLOAD_LEN + optlen;
+
+ create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+ write_packet(fd, buf, total_hdr_len + PAYLOAD_LEN, daddr);
+
+ create_packet(buf, PAYLOAD_LEN * 1, 0, PAYLOAD_LEN, 0);
+ add_ipv4_ts_option(buf, optpkt);
+ write_packet(fd, optpkt, pkt_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
+ write_packet(fd, buf, total_hdr_len + PAYLOAD_LEN, daddr);
+}
+
+/* IPv4 fragments shouldn't coalesce */
+static void send_fragment4(int fd, struct sockaddr_ll *daddr)
+{
+ static char buf[IP_MAXPACKET];
+ struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
+ int pkt_size = total_hdr_len + PAYLOAD_LEN;
+
+ create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+ write_packet(fd, buf, pkt_size, daddr);
+
+ /* Once fragmented, packet would retain the total_len.
+ * Tcp header is prepared as if rest of data is in follow-up frags,
+ * but follow up frags aren't actually sent.
+ */
+ memset(buf + total_hdr_len, 'a', PAYLOAD_LEN * 2);
+ fill_transportlayer(buf + tcp_offset, PAYLOAD_LEN, 0, PAYLOAD_LEN * 2, 0);
+ fill_networklayer(buf + ETH_HLEN, PAYLOAD_LEN);
+ fill_datalinklayer(buf);
+
+ iph->frag_off = htons(0x6000); // DF = 1, MF = 1
+ iph->check = 0;
+ iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
+ write_packet(fd, buf, pkt_size, daddr);
+}
+
+/* IPv4 packets with different ttl don't coalesce.*/
+static void send_changed_ttl(int fd, struct sockaddr_ll *daddr)
+{
+ int pkt_size = total_hdr_len + PAYLOAD_LEN;
+ static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
+
+ create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+ write_packet(fd, buf, pkt_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+ iph->ttl = 7;
+ iph->check = 0;
+ iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
+ write_packet(fd, buf, pkt_size, daddr);
+}
+
+/* Packets with different tos don't coalesce.*/
+static void send_changed_tos(int fd, struct sockaddr_ll *daddr)
+{
+ int pkt_size = total_hdr_len + PAYLOAD_LEN;
+ static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
+ struct ipv6hdr *ip6h = (struct ipv6hdr *)(buf + ETH_HLEN);
+
+ create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+ write_packet(fd, buf, pkt_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+ if (proto == PF_INET) {
+ iph->tos = 1;
+ iph->check = 0;
+ iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
+ } else if (proto == PF_INET6) {
+ ip6h->priority = 0xf;
+ }
+ write_packet(fd, buf, pkt_size, daddr);
+}
+
+/* Packets with different ECN don't coalesce.*/
+static void send_changed_ECN(int fd, struct sockaddr_ll *daddr)
+{
+ int pkt_size = total_hdr_len + PAYLOAD_LEN;
+ static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
+
+ create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+ write_packet(fd, buf, pkt_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+ if (proto == PF_INET) {
+ buf[ETH_HLEN + 1] ^= 0x2; // ECN set to 10
+ iph->check = 0;
+ iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
+ } else {
+ buf[ETH_HLEN + 1] ^= 0x20; // ECN set to 10
+ }
+ write_packet(fd, buf, pkt_size, daddr);
+}
+
+/* IPv6 fragments and packets with extensions don't coalesce.*/
+static void send_fragment6(int fd, struct sockaddr_ll *daddr)
+{
+ static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ static char extpkt[MAX_HDR_LEN + PAYLOAD_LEN +
+ sizeof(struct ip6_frag)];
+ struct ipv6hdr *ip6h = (struct ipv6hdr *)(buf + ETH_HLEN);
+ struct ip6_frag *frag = (void *)(extpkt + tcp_offset);
+ int extlen = sizeof(struct ip6_frag);
+ int bufpkt_len = total_hdr_len + PAYLOAD_LEN;
+ int extpkt_len = bufpkt_len + extlen;
+ int i;
+
+ for (i = 0; i < 2; i++) {
+ create_packet(buf, PAYLOAD_LEN * i, 0, PAYLOAD_LEN, 0);
+ write_packet(fd, buf, bufpkt_len, daddr);
+ }
+
+ create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
+ memset(extpkt, 0, extpkt_len);
+
+ ip6h->nexthdr = IPPROTO_FRAGMENT;
+ ip6h->payload_len = htons(ntohs(ip6h->payload_len) + extlen);
+ frag->ip6f_nxt = IPPROTO_TCP;
+
+ memcpy(extpkt, buf, tcp_offset);
+ memcpy(extpkt + tcp_offset + extlen, buf + tcp_offset,
+ sizeof(struct tcphdr) + PAYLOAD_LEN);
+ write_packet(fd, extpkt, extpkt_len, daddr);
+
+ create_packet(buf, PAYLOAD_LEN * 3, 0, PAYLOAD_LEN, 0);
+ write_packet(fd, buf, bufpkt_len, daddr);
+}
+
+static void bind_packetsocket(int fd)
+{
+ struct sockaddr_ll daddr = {};
+
+ daddr.sll_family = AF_PACKET;
+ daddr.sll_protocol = ethhdr_proto;
+ daddr.sll_ifindex = if_nametoindex(ifname);
+ if (daddr.sll_ifindex == 0)
+ error(1, errno, "if_nametoindex");
+
+ if (bind(fd, (void *)&daddr, sizeof(daddr)) < 0)
+ error(1, errno, "could not bind socket");
+}
+
+static void set_timeout(int fd)
+{
+ struct timeval timeout;
+
+ timeout.tv_sec = 120;
+ timeout.tv_usec = 0;
+ if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, (char *)&timeout,
+ sizeof(timeout)) < 0)
+ error(1, errno, "cannot set timeout, setsockopt failed");
+}
+
+static void check_recv_pkts(int fd, int *correct_payload,
+ int correct_num_pkts)
+{
+ static char buffer[IP_MAXPACKET + ETH_HLEN + 1];
+ struct iphdr *iph = (struct iphdr *)(buffer + ETH_HLEN);
+ struct ipv6hdr *ip6h = (struct ipv6hdr *)(buffer + ETH_HLEN);
+ struct tcphdr *tcph;
+ bool bad_packet = false;
+ int tcp_ext_len = 0;
+ int ip_ext_len = 0;
+ int pkt_size = -1;
+ int data_len = 0;
+ int num_pkt = 0;
+ int i;
+
+ vlog("Expected {");
+ for (i = 0; i < correct_num_pkts; i++)
+ vlog("%d ", correct_payload[i]);
+ vlog("}, Total %d packets\nReceived {", correct_num_pkts);
+
+ while (1) {
+ pkt_size = recv(fd, buffer, IP_MAXPACKET + ETH_HLEN + 1, 0);
+ if (pkt_size < 0)
+ error(1, errno, "could not receive");
+
+ if (iph->version == 4)
+ ip_ext_len = (iph->ihl - 5) * 4;
+ else if (ip6h->version == 6 && ip6h->nexthdr != IPPROTO_TCP)
+ ip_ext_len = sizeof(struct ip6_frag);
+
+ tcph = (struct tcphdr *)(buffer + tcp_offset + ip_ext_len);
+
+ if (tcph->fin)
+ break;
+
+ tcp_ext_len = (tcph->doff - 5) * 4;
+ data_len = pkt_size - total_hdr_len - tcp_ext_len - ip_ext_len;
+ /* Min ethernet frame payload is 46(ETH_ZLEN - ETH_HLEN) by RFC 802.3.
+ * Ipv4/tcp packets without at least 6 bytes of data will be padded.
+ * Packet sockets are protocol agnostic, and will not trim the padding.
+ */
+ if (pkt_size == ETH_ZLEN && iph->version == 4) {
+ data_len = ntohs(iph->tot_len)
+ - sizeof(struct tcphdr) - sizeof(struct iphdr);
+ }
+ vlog("%d ", data_len);
+ if (data_len != correct_payload[num_pkt]) {
+ vlog("[!=%d]", correct_payload[num_pkt]);
+ bad_packet = true;
+ }
+ num_pkt++;
+ }
+ vlog("}, Total %d packets.\n", num_pkt);
+ if (num_pkt != correct_num_pkts)
+ error(1, 0, "incorrect number of packets");
+ if (bad_packet)
+ error(1, 0, "incorrect packet geometry");
+
+ printf("Test succeeded\n\n");
+}
+
+static void gro_sender(void)
+{
+ static char fin_pkt[MAX_HDR_LEN];
+ struct sockaddr_ll daddr = {};
+ int txfd = -1;
+
+ txfd = socket(PF_PACKET, SOCK_RAW, IPPROTO_RAW);
+ if (txfd < 0)
+ error(1, errno, "socket creation");
+
+ memset(&daddr, 0, sizeof(daddr));
+ daddr.sll_ifindex = if_nametoindex(ifname);
+ if (daddr.sll_ifindex == 0)
+ error(1, errno, "if_nametoindex");
+ daddr.sll_family = AF_PACKET;
+ memcpy(daddr.sll_addr, dst_mac, ETH_ALEN);
+ daddr.sll_halen = ETH_ALEN;
+ create_packet(fin_pkt, PAYLOAD_LEN * 2, 0, 0, 1);
+
+ if (strcmp(testname, "data") == 0) {
+ send_data_pkts(txfd, &daddr, PAYLOAD_LEN, PAYLOAD_LEN);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ send_data_pkts(txfd, &daddr, PAYLOAD_LEN, PAYLOAD_LEN / 2);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ send_data_pkts(txfd, &daddr, PAYLOAD_LEN / 2, PAYLOAD_LEN);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ } else if (strcmp(testname, "ack") == 0) {
+ send_ack(txfd, &daddr);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ } else if (strcmp(testname, "flags") == 0) {
+ send_flags(txfd, &daddr, 1, 0, 0, 0);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ send_flags(txfd, &daddr, 0, 1, 0, 0);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ send_flags(txfd, &daddr, 0, 0, 1, 0);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ send_flags(txfd, &daddr, 0, 0, 0, 1);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ } else if (strcmp(testname, "tcp") == 0) {
+ send_changed_checksum(txfd, &daddr);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ send_changed_seq(txfd, &daddr);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ send_changed_ts(txfd, &daddr);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ send_diff_opt(txfd, &daddr);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ } else if (strcmp(testname, "ip") == 0) {
+ send_changed_ECN(txfd, &daddr);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ send_changed_tos(txfd, &daddr);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ if (proto == PF_INET) {
+ /* Modified packets may be received out of order.
+ * Sleep function added to enforce test boundaries
+ * so that fin pkts are not received prior to other pkts.
+ */
+ sleep(1);
+ send_changed_ttl(txfd, &daddr);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ sleep(1);
+ send_ip_options(txfd, &daddr);
+ sleep(1);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ sleep(1);
+ send_fragment4(txfd, &daddr);
+ sleep(1);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ } else if (proto == PF_INET6) {
+ send_fragment6(txfd, &daddr);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ }
+ } else if (strcmp(testname, "large") == 0) {
+ /* 20 is the difference between min iphdr size
+ * and min ipv6hdr size. Like MAX_HDR_SIZE,
+ * MAX_PAYLOAD is defined with the larger header of the two.
+ */
+ int offset = proto == PF_INET ? 20 : 0;
+ int remainder = (MAX_PAYLOAD + offset) % MSS;
+
+ send_large(txfd, &daddr, remainder);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ send_large(txfd, &daddr, remainder + 1);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ } else {
+ error(1, 0, "Unknown testcase");
+ }
+
+ if (close(txfd))
+ error(1, errno, "socket close");
+}
+
+static void gro_receiver(void)
+{
+ static int correct_payload[NUM_PACKETS];
+ int rxfd = -1;
+
+ rxfd = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_NONE));
+ if (rxfd < 0)
+ error(1, 0, "socket creation");
+ setup_sock_filter(rxfd);
+ set_timeout(rxfd);
+ bind_packetsocket(rxfd);
+
+ memset(correct_payload, 0, sizeof(correct_payload));
+
+ if (strcmp(testname, "data") == 0) {
+ printf("pure data packet of same size: ");
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ check_recv_pkts(rxfd, correct_payload, 1);
+
+ printf("large data packets followed by a smaller one: ");
+ correct_payload[0] = PAYLOAD_LEN * 1.5;
+ check_recv_pkts(rxfd, correct_payload, 1);
+
+ printf("small data packets followed by a larger one: ");
+ correct_payload[0] = PAYLOAD_LEN / 2;
+ correct_payload[1] = PAYLOAD_LEN;
+ check_recv_pkts(rxfd, correct_payload, 2);
+ } else if (strcmp(testname, "ack") == 0) {
+ printf("duplicate ack and pure ack: ");
+ check_recv_pkts(rxfd, correct_payload, 3);
+ } else if (strcmp(testname, "flags") == 0) {
+ correct_payload[0] = PAYLOAD_LEN * 3;
+ correct_payload[1] = PAYLOAD_LEN * 2;
+
+ printf("psh flag ends coalescing: ");
+ check_recv_pkts(rxfd, correct_payload, 2);
+
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ correct_payload[1] = 0;
+ correct_payload[2] = PAYLOAD_LEN * 2;
+ printf("syn flag ends coalescing: ");
+ check_recv_pkts(rxfd, correct_payload, 3);
+
+ printf("rst flag ends coalescing: ");
+ check_recv_pkts(rxfd, correct_payload, 3);
+
+ printf("urg flag ends coalescing: ");
+ check_recv_pkts(rxfd, correct_payload, 3);
+ } else if (strcmp(testname, "tcp") == 0) {
+ correct_payload[0] = PAYLOAD_LEN;
+ correct_payload[1] = PAYLOAD_LEN;
+ correct_payload[2] = PAYLOAD_LEN;
+ correct_payload[3] = PAYLOAD_LEN;
+
+ printf("changed checksum does not coalesce: ");
+ check_recv_pkts(rxfd, correct_payload, 2);
+
+ printf("Wrong Seq number doesn't coalesce: ");
+ check_recv_pkts(rxfd, correct_payload, 2);
+
+ printf("Different timestamp doesn't coalesce: ");
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ check_recv_pkts(rxfd, correct_payload, 4);
+
+ printf("Different options doesn't coalesce: ");
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ check_recv_pkts(rxfd, correct_payload, 2);
+ } else if (strcmp(testname, "ip") == 0) {
+ correct_payload[0] = PAYLOAD_LEN;
+ correct_payload[1] = PAYLOAD_LEN;
+
+ printf("different ECN doesn't coalesce: ");
+ check_recv_pkts(rxfd, correct_payload, 2);
+
+ printf("different tos doesn't coalesce: ");
+ check_recv_pkts(rxfd, correct_payload, 2);
+
+ if (proto == PF_INET) {
+ printf("different ttl doesn't coalesce: ");
+ check_recv_pkts(rxfd, correct_payload, 2);
+
+ printf("ip options doesn't coalesce: ");
+ correct_payload[2] = PAYLOAD_LEN;
+ check_recv_pkts(rxfd, correct_payload, 3);
+
+ printf("fragmented ip4 doesn't coalesce: ");
+ check_recv_pkts(rxfd, correct_payload, 2);
+ } else if (proto == PF_INET6) {
+ /* GRO doesn't check for ipv6 hop limit when flushing.
+ * Hence no corresponding test to the ipv4 case.
+ */
+ printf("fragmented ip6 doesn't coalesce: ");
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ check_recv_pkts(rxfd, correct_payload, 2);
+ }
+ } else if (strcmp(testname, "large") == 0) {
+ int offset = proto == PF_INET ? 20 : 0;
+ int remainder = (MAX_PAYLOAD + offset) % MSS;
+
+ correct_payload[0] = (MAX_PAYLOAD + offset);
+ correct_payload[1] = remainder;
+ printf("Shouldn't coalesce if exceed IP max pkt size: ");
+ check_recv_pkts(rxfd, correct_payload, 2);
+
+ /* last segment sent individually, doesn't start new segment */
+ correct_payload[0] = correct_payload[0] - remainder;
+ correct_payload[1] = remainder + 1;
+ correct_payload[2] = remainder + 1;
+ check_recv_pkts(rxfd, correct_payload, 3);
+ } else {
+ error(1, 0, "Test case error, should never trigger");
+ }
+
+ if (close(rxfd))
+ error(1, 0, "socket close");
+}
+
+static void parse_args(int argc, char **argv)
+{
+ static const struct option opts[] = {
+ { "dmac", required_argument, NULL, 'D' },
+ { "iface", required_argument, NULL, 'i' },
+ { "ipv4", no_argument, NULL, '4' },
+ { "ipv6", no_argument, NULL, '6' },
+ { "rx", no_argument, NULL, 'r' },
+ { "smac", required_argument, NULL, 'S' },
+ { "test", required_argument, NULL, 't' },
+ { "verbose", no_argument, NULL, 'v' },
+ { 0, 0, 0, 0 }
+ };
+ int c;
+
+ while ((c = getopt_long(argc, argv, "46D:i:rS:t:v", opts, NULL)) != -1) {
+ switch (c) {
+ case '4':
+ proto = PF_INET;
+ ethhdr_proto = htons(ETH_P_IP);
+ break;
+ case '6':
+ proto = PF_INET6;
+ ethhdr_proto = htons(ETH_P_IPV6);
+ break;
+ case 'D':
+ dmac = optarg;
+ break;
+ case 'i':
+ ifname = optarg;
+ break;
+ case 'r':
+ tx_socket = false;
+ break;
+ case 'S':
+ smac = optarg;
+ break;
+ case 't':
+ testname = optarg;
+ break;
+ case 'v':
+ verbose = true;
+ break;
+ default:
+ error(1, 0, "%s invalid option %c\n", __func__, c);
+ break;
+ }
+ }
+}
+
+int main(int argc, char **argv)
+{
+ parse_args(argc, argv);
+
+ if (proto == PF_INET) {
+ tcp_offset = ETH_HLEN + sizeof(struct iphdr);
+ total_hdr_len = tcp_offset + sizeof(struct tcphdr);
+ } else if (proto == PF_INET6) {
+ tcp_offset = ETH_HLEN + sizeof(struct ipv6hdr);
+ total_hdr_len = MAX_HDR_LEN;
+ } else {
+ error(1, 0, "Protocol family is not ipv4 or ipv6");
+ }
+
+ read_MAC(src_mac, smac);
+ read_MAC(dst_mac, dmac);
+
+ if (tx_socket)
+ gro_sender();
+ else
+ gro_receiver();
+ return 0;
+}
--- /dev/null
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source setup_loopback.sh
+readonly SERVER_MAC="aa:00:00:00:00:02"
+readonly CLIENT_MAC="aa:00:00:00:00:01"
+readonly TESTS=("data" "ack" "flags" "tcp" "ip" "large")
+readonly PROTOS=("ipv4" "ipv6")
+dev="eth0"
+test="all"
+proto="ipv4"
+
+setup_interrupt() {
+ # Use timer on host to trigger the network stack
+ # Also disable device interrupt to not depend on NIC interrupt
+ # Reduce test flakiness caused by unexpected interrupts
+ echo 100000 >"${FLUSH_PATH}"
+ echo 50 >"${IRQ_PATH}"
+}
+
+setup_ns() {
+ # Set up server_ns namespace and client_ns namespace
+ setup_macvlan_ns "${dev}" server_ns server "${SERVER_MAC}"
+ setup_macvlan_ns "${dev}" client_ns client "${CLIENT_MAC}"
+}
+
+cleanup_ns() {
+ cleanup_macvlan_ns server_ns server client_ns client
+}
+
+setup() {
+ setup_loopback_environment "${dev}"
+ setup_interrupt
+}
+
+cleanup() {
+ cleanup_loopback "${dev}"
+
+ echo "${FLUSH_TIMEOUT}" >"${FLUSH_PATH}"
+ echo "${HARD_IRQS}" >"${IRQ_PATH}"
+}
+
+run_test() {
+ local server_pid=0
+ local exit_code=0
+ local protocol=$1
+ local test=$2
+ local ARGS=( "--${protocol}" "--dmac" "${SERVER_MAC}" \
+ "--smac" "${CLIENT_MAC}" "--test" "${test}" "--verbose" )
+
+ setup_ns
+ # Each test is run 3 times to deflake, because given the receive timing,
+ # not all packets that should coalesce will be considered in the same flow
+ # on every try.
+ for tries in {1..3}; do
+ # Actual test starts here
+ ip netns exec server_ns ./gro "${ARGS[@]}" "--rx" "--iface" "server" \
+ 1>>log.txt &
+ server_pid=$!
+ sleep 0.5 # to allow for socket init
+ ip netns exec client_ns ./gro "${ARGS[@]}" "--iface" "client" \
+ 1>>log.txt
+ wait "${server_pid}"
+ exit_code=$?
+ if [[ "${exit_code}" -eq 0 ]]; then
+ break;
+ fi
+ done
+ cleanup_ns
+ echo ${exit_code}
+}
+
+run_all_tests() {
+ local failed_tests=()
+ for proto in "${PROTOS[@]}"; do
+ for test in "${TESTS[@]}"; do
+ echo "running test ${proto} ${test}" >&2
+ exit_code=$(run_test $proto $test)
+ if [[ "${exit_code}" -ne 0 ]]; then
+ failed_tests+=("${proto}_${test}")
+ fi;
+ done;
+ done
+ if [[ ${#failed_tests[@]} -ne 0 ]]; then
+ echo "failed tests: ${failed_tests[*]}. \
+ Please see log.txt for more logs"
+ exit 1
+ else
+ echo "All Tests Succeeded!"
+ fi;
+}
+
+usage() {
+ echo "Usage: $0 \
+ [-i <DEV>] \
+ [-t data|ack|flags|tcp|ip|large] \
+ [-p <ipv4|ipv6>]" 1>&2;
+ exit 1;
+}
+
+while getopts "i:t:p:" opt; do
+ case "${opt}" in
+ i)
+ dev="${OPTARG}"
+ ;;
+ t)
+ test="${OPTARG}"
+ ;;
+ p)
+ proto="${OPTARG}"
+ ;;
+ *)
+ usage
+ ;;
+ esac
+done
+
+readonly FLUSH_PATH="/sys/class/net/${dev}/gro_flush_timeout"
+readonly IRQ_PATH="/sys/class/net/${dev}/napi_defer_hard_irqs"
+readonly FLUSH_TIMEOUT="$(< ${FLUSH_PATH})"
+readonly HARD_IRQS="$(< ${IRQ_PATH})"
+setup
+trap cleanup EXIT
+if [[ "${test}" == "all" ]]; then
+ run_all_tests
+else
+ run_test "${proto}" "${test}"
+fi;
MONITOR_ACQUIRE,
EXPIRE_STATE,
EXPIRE_POLICY,
+ SPDINFO_ATTRS,
};
const char *desc_name[] = {
"create tunnel",
"alloc spi",
"monitor acquire",
"expire state",
- "expire policy"
+ "expire policy",
+ "spdinfo attributes",
+ ""
};
struct xfrm_desc {
enum desc_type type;
return ret;
}
+static int xfrm_spdinfo_set_thresh(int xfrm_sock, uint32_t *seq,
+ unsigned thresh4_l, unsigned thresh4_r,
+ unsigned thresh6_l, unsigned thresh6_r,
+ bool add_bad_attr)
+
+{
+ struct {
+ struct nlmsghdr nh;
+ union {
+ uint32_t unused;
+ int error;
+ };
+ char attrbuf[MAX_PAYLOAD];
+ } req;
+ struct xfrmu_spdhthresh thresh;
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.unused));
+ req.nh.nlmsg_type = XFRM_MSG_NEWSPDINFO;
+ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+ req.nh.nlmsg_seq = (*seq)++;
+
+ thresh.lbits = thresh4_l;
+ thresh.rbits = thresh4_r;
+ if (rtattr_pack(&req.nh, sizeof(req), XFRMA_SPD_IPV4_HTHRESH, &thresh, sizeof(thresh)))
+ return -1;
+
+ thresh.lbits = thresh6_l;
+ thresh.rbits = thresh6_r;
+ if (rtattr_pack(&req.nh, sizeof(req), XFRMA_SPD_IPV6_HTHRESH, &thresh, sizeof(thresh)))
+ return -1;
+
+ if (add_bad_attr) {
+ BUILD_BUG_ON(XFRMA_IF_ID <= XFRMA_SPD_MAX + 1);
+ if (rtattr_pack(&req.nh, sizeof(req), XFRMA_IF_ID, NULL, 0)) {
+ pr_err("adding attribute failed: no space");
+ return -1;
+ }
+ }
+
+ if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) {
+ pr_err("send()");
+ return -1;
+ }
+
+ if (recv(xfrm_sock, &req, sizeof(req), 0) < 0) {
+ pr_err("recv()");
+ return -1;
+ } else if (req.nh.nlmsg_type != NLMSG_ERROR) {
+ printk("expected NLMSG_ERROR, got %d", (int)req.nh.nlmsg_type);
+ return -1;
+ }
+
+ if (req.error) {
+ printk("NLMSG_ERROR: %d: %s", req.error, strerror(-req.error));
+ return -1;
+ }
+
+ return 0;
+}
+
+static int xfrm_spdinfo_attrs(int xfrm_sock, uint32_t *seq)
+{
+ struct {
+ struct nlmsghdr nh;
+ union {
+ uint32_t unused;
+ int error;
+ };
+ char attrbuf[MAX_PAYLOAD];
+ } req;
+
+ if (xfrm_spdinfo_set_thresh(xfrm_sock, seq, 32, 31, 120, 16, false)) {
+ pr_err("Can't set SPD HTHRESH");
+ return KSFT_FAIL;
+ }
+
+ memset(&req, 0, sizeof(req));
+
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.unused));
+ req.nh.nlmsg_type = XFRM_MSG_GETSPDINFO;
+ req.nh.nlmsg_flags = NLM_F_REQUEST;
+ req.nh.nlmsg_seq = (*seq)++;
+ if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) {
+ pr_err("send()");
+ return KSFT_FAIL;
+ }
+
+ if (recv(xfrm_sock, &req, sizeof(req), 0) < 0) {
+ pr_err("recv()");
+ return KSFT_FAIL;
+ } else if (req.nh.nlmsg_type == XFRM_MSG_NEWSPDINFO) {
+ size_t len = NLMSG_PAYLOAD(&req.nh, sizeof(req.unused));
+ struct rtattr *attr = (void *)req.attrbuf;
+ int got_thresh = 0;
+
+ for (; RTA_OK(attr, len); attr = RTA_NEXT(attr, len)) {
+ if (attr->rta_type == XFRMA_SPD_IPV4_HTHRESH) {
+ struct xfrmu_spdhthresh *t = RTA_DATA(attr);
+
+ got_thresh++;
+ if (t->lbits != 32 || t->rbits != 31) {
+ pr_err("thresh differ: %u, %u",
+ t->lbits, t->rbits);
+ return KSFT_FAIL;
+ }
+ }
+ if (attr->rta_type == XFRMA_SPD_IPV6_HTHRESH) {
+ struct xfrmu_spdhthresh *t = RTA_DATA(attr);
+
+ got_thresh++;
+ if (t->lbits != 120 || t->rbits != 16) {
+ pr_err("thresh differ: %u, %u",
+ t->lbits, t->rbits);
+ return KSFT_FAIL;
+ }
+ }
+ }
+ if (got_thresh != 2) {
+ pr_err("only %d thresh returned by XFRM_MSG_GETSPDINFO", got_thresh);
+ return KSFT_FAIL;
+ }
+ } else if (req.nh.nlmsg_type != NLMSG_ERROR) {
+ printk("expected NLMSG_ERROR, got %d", (int)req.nh.nlmsg_type);
+ return KSFT_FAIL;
+ } else {
+ printk("NLMSG_ERROR: %d: %s", req.error, strerror(-req.error));
+ return -1;
+ }
+
+ /* Restore the default */
+ if (xfrm_spdinfo_set_thresh(xfrm_sock, seq, 32, 32, 128, 128, false)) {
+ pr_err("Can't restore SPD HTHRESH");
+ return KSFT_FAIL;
+ }
+
+ /*
+ * At this moment xfrm uses nlmsg_parse_deprecated(), which
+ * implies NL_VALIDATE_LIBERAL - ignoring attributes with
+ * (type > maxtype). nla_parse_depricated_strict() would enforce
+ * it. Or even stricter nla_parse().
+ * Right now it's not expected to fail, but to be ignored.
+ */
+ if (xfrm_spdinfo_set_thresh(xfrm_sock, seq, 32, 32, 128, 128, true))
+ return KSFT_PASS;
+
+ return KSFT_PASS;
+}
+
static int child_serv(int xfrm_sock, uint32_t *seq,
unsigned int nr, int cmd_fd, void *buf, struct xfrm_desc *desc)
{
case EXPIRE_POLICY:
ret = xfrm_expire_policy(xfrm_sock, &seq, nr, &desc);
break;
+ case SPDINFO_ATTRS:
+ ret = xfrm_spdinfo_attrs(xfrm_sock, &seq);
+ break;
default:
printk("Unknown desc type %d", desc.type);
exit(KSFT_FAIL);
* sizeof(xfrm_user_polexpire) = 168 | sizeof(xfrm_user_polexpire) = 176
*
* Check the affected by the UABI difference structures.
+ * Also, check translation for xfrm_set_spdinfo: it has it's own attributes
+ * which needs to be correctly copied, but not translated.
*/
-const unsigned int compat_plan = 4;
+const unsigned int compat_plan = 5;
static int write_compat_struct_tests(int test_desc_fd)
{
struct xfrm_desc desc = {};
if (__write_desc(test_desc_fd, &desc))
return -1;
+ desc.type = SPDINFO_ATTRS;
+ if (__write_desc(test_desc_fd, &desc))
+ return -1;
+
return 0;
}
echo "raw gso min size"
./in_netns.sh ./psock_snd -v -c -g -l "${mss_exceeds}"
-echo "raw gso min size - 1 (expected to fail)"
-(! ./in_netns.sh ./psock_snd -v -c -g -l "${mss}")
-
echo "raw gso max size"
./in_netns.sh ./psock_snd -v -c -g -l "${max_mss}"
--- /dev/null
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+netdev_check_for_carrier() {
+ local -r dev="$1"
+
+ for i in {1..5}; do
+ carrier="$(cat /sys/class/net/${dev}/carrier)"
+ if [[ "${carrier}" -ne 1 ]] ; then
+ echo "carrier not ready yet..." >&2
+ sleep 1
+ else
+ echo "carrier ready" >&2
+ break
+ fi
+ done
+ echo "${carrier}"
+}
+
+# Assumes that there is no existing ipvlan device on the physical device
+setup_loopback_environment() {
+ local dev="$1"
+
+ # Fail hard if cannot turn on loopback mode for current NIC
+ ethtool -K "${dev}" loopback on || exit 1
+ sleep 1
+
+ # Check for the carrier
+ carrier=$(netdev_check_for_carrier ${dev})
+ if [[ "${carrier}" -ne 1 ]] ; then
+ echo "setup_loopback_environment failed"
+ exit 1
+ fi
+}
+
+setup_macvlan_ns(){
+ local -r link_dev="$1"
+ local -r ns_name="$2"
+ local -r ns_dev="$3"
+ local -r ns_mac="$4"
+ local -r addr="$5"
+
+ ip link add link "${link_dev}" dev "${ns_dev}" \
+ address "${ns_mac}" type macvlan
+ exit_code=$?
+ if [[ "${exit_code}" -ne 0 ]]; then
+ echo "setup_macvlan_ns failed"
+ exit $exit_code
+ fi
+
+ [[ -e /var/run/netns/"${ns_name}" ]] || ip netns add "${ns_name}"
+ ip link set dev "${ns_dev}" netns "${ns_name}"
+ ip -netns "${ns_name}" link set dev "${ns_dev}" up
+ if [[ -n "${addr}" ]]; then
+ ip -netns "${ns_name}" addr add dev "${ns_dev}" "${addr}"
+ fi
+
+ sleep 1
+}
+
+cleanup_macvlan_ns(){
+ while (( $# >= 2 )); do
+ ns_name="$1"
+ ns_dev="$2"
+ ip -netns "${ns_name}" link del dev "${ns_dev}"
+ ip netns del "${ns_name}"
+ shift 2
+ done
+}
+
+cleanup_loopback(){
+ local -r dev="$1"
+
+ ethtool -K "${dev}" loopback off
+ sleep 1
+
+ # Check for the carrier
+ carrier=$(netdev_check_for_carrier ${dev})
+ if [[ "${carrier}" -ne 1 ]] ; then
+ echo "setup_loopback_environment failed"
+ exit 1
+ fi
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/* Toeplitz test
+ *
+ * 1. Read packets and their rx_hash using PF_PACKET/TPACKET_V3
+ * 2. Compute the rx_hash in software based on the packet contents
+ * 3. Compare the two
+ *
+ * Optionally, either '-C $rx_irq_cpu_list' or '-r $rps_bitmap' may be given.
+ *
+ * If '-C $rx_irq_cpu_list' is given, also
+ *
+ * 4. Identify the cpu on which the packet arrived with PACKET_FANOUT_CPU
+ * 5. Compute the rxqueue that RSS would select based on this rx_hash
+ * 6. Using the $rx_irq_cpu_list map, identify the arriving cpu based on rxq irq
+ * 7. Compare the cpus from 4 and 6
+ *
+ * Else if '-r $rps_bitmap' is given, also
+ *
+ * 4. Identify the cpu on which the packet arrived with PACKET_FANOUT_CPU
+ * 5. Compute the cpu that RPS should select based on rx_hash and $rps_bitmap
+ * 6. Compare the cpus from 4 and 5
+ */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <linux/filter.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <net/if.h>
+#include <netdb.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <poll.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/sysinfo.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#define TOEPLITZ_KEY_MIN_LEN 40
+#define TOEPLITZ_KEY_MAX_LEN 60
+
+#define TOEPLITZ_STR_LEN(K) (((K) * 3) - 1) /* hex encoded: AA:BB:CC:...:ZZ */
+#define TOEPLITZ_STR_MIN_LEN TOEPLITZ_STR_LEN(TOEPLITZ_KEY_MIN_LEN)
+#define TOEPLITZ_STR_MAX_LEN TOEPLITZ_STR_LEN(TOEPLITZ_KEY_MAX_LEN)
+
+#define FOUR_TUPLE_MAX_LEN ((sizeof(struct in6_addr) * 2) + (sizeof(uint16_t) * 2))
+
+#define RSS_MAX_CPUS (1 << 16) /* real constraint is PACKET_FANOUT_MAX */
+
+#define RPS_MAX_CPUS 16UL /* must be a power of 2 */
+
+/* configuration options (cmdline arguments) */
+static uint16_t cfg_dport = 8000;
+static int cfg_family = AF_INET6;
+static char *cfg_ifname = "eth0";
+static int cfg_num_queues;
+static int cfg_num_rps_cpus;
+static bool cfg_sink;
+static int cfg_type = SOCK_STREAM;
+static int cfg_timeout_msec = 1000;
+static bool cfg_verbose;
+
+/* global vars */
+static int num_cpus;
+static int ring_block_nr;
+static int ring_block_sz;
+
+/* stats */
+static int frames_received;
+static int frames_nohash;
+static int frames_error;
+
+#define log_verbose(args...) do { if (cfg_verbose) fprintf(stderr, args); } while (0)
+
+/* tpacket ring */
+struct ring_state {
+ int fd;
+ char *mmap;
+ int idx;
+ int cpu;
+};
+
+static unsigned int rx_irq_cpus[RSS_MAX_CPUS]; /* map from rxq to cpu */
+static int rps_silo_to_cpu[RPS_MAX_CPUS];
+static unsigned char toeplitz_key[TOEPLITZ_KEY_MAX_LEN];
+static struct ring_state rings[RSS_MAX_CPUS];
+
+static inline uint32_t toeplitz(const unsigned char *four_tuple,
+ const unsigned char *key)
+{
+ int i, bit, ret = 0;
+ uint32_t key32;
+
+ key32 = ntohl(*((uint32_t *)key));
+ key += 4;
+
+ for (i = 0; i < FOUR_TUPLE_MAX_LEN; i++) {
+ for (bit = 7; bit >= 0; bit--) {
+ if (four_tuple[i] & (1 << bit))
+ ret ^= key32;
+
+ key32 <<= 1;
+ key32 |= !!(key[0] & (1 << bit));
+ }
+ key++;
+ }
+
+ return ret;
+}
+
+/* Compare computed cpu with arrival cpu from packet_fanout_cpu */
+static void verify_rss(uint32_t rx_hash, int cpu)
+{
+ int queue = rx_hash % cfg_num_queues;
+
+ log_verbose(" rxq %d (cpu %d)", queue, rx_irq_cpus[queue]);
+ if (rx_irq_cpus[queue] != cpu) {
+ log_verbose(". error: rss cpu mismatch (%d)", cpu);
+ frames_error++;
+ }
+}
+
+static void verify_rps(uint64_t rx_hash, int cpu)
+{
+ int silo = (rx_hash * cfg_num_rps_cpus) >> 32;
+
+ log_verbose(" silo %d (cpu %d)", silo, rps_silo_to_cpu[silo]);
+ if (rps_silo_to_cpu[silo] != cpu) {
+ log_verbose(". error: rps cpu mismatch (%d)", cpu);
+ frames_error++;
+ }
+}
+
+static void log_rxhash(int cpu, uint32_t rx_hash,
+ const char *addrs, int addr_len)
+{
+ char saddr[INET6_ADDRSTRLEN], daddr[INET6_ADDRSTRLEN];
+ uint16_t *ports;
+
+ if (!inet_ntop(cfg_family, addrs, saddr, sizeof(saddr)) ||
+ !inet_ntop(cfg_family, addrs + addr_len, daddr, sizeof(daddr)))
+ error(1, 0, "address parse error");
+
+ ports = (void *)addrs + (addr_len * 2);
+ log_verbose("cpu %d: rx_hash 0x%08x [saddr %s daddr %s sport %02hu dport %02hu]",
+ cpu, rx_hash, saddr, daddr,
+ ntohs(ports[0]), ntohs(ports[1]));
+}
+
+/* Compare computed rxhash with rxhash received from tpacket_v3 */
+static void verify_rxhash(const char *pkt, uint32_t rx_hash, int cpu)
+{
+ unsigned char four_tuple[FOUR_TUPLE_MAX_LEN] = {0};
+ uint32_t rx_hash_sw;
+ const char *addrs;
+ int addr_len;
+
+ if (cfg_family == AF_INET) {
+ addr_len = sizeof(struct in_addr);
+ addrs = pkt + offsetof(struct iphdr, saddr);
+ } else {
+ addr_len = sizeof(struct in6_addr);
+ addrs = pkt + offsetof(struct ip6_hdr, ip6_src);
+ }
+
+ memcpy(four_tuple, addrs, (addr_len * 2) + (sizeof(uint16_t) * 2));
+ rx_hash_sw = toeplitz(four_tuple, toeplitz_key);
+
+ if (cfg_verbose)
+ log_rxhash(cpu, rx_hash, addrs, addr_len);
+
+ if (rx_hash != rx_hash_sw) {
+ log_verbose(" != expected 0x%x\n", rx_hash_sw);
+ frames_error++;
+ return;
+ }
+
+ log_verbose(" OK");
+ if (cfg_num_queues)
+ verify_rss(rx_hash, cpu);
+ else if (cfg_num_rps_cpus)
+ verify_rps(rx_hash, cpu);
+ log_verbose("\n");
+}
+
+static char *recv_frame(const struct ring_state *ring, char *frame)
+{
+ struct tpacket3_hdr *hdr = (void *)frame;
+
+ if (hdr->hv1.tp_rxhash)
+ verify_rxhash(frame + hdr->tp_net, hdr->hv1.tp_rxhash,
+ ring->cpu);
+ else
+ frames_nohash++;
+
+ return frame + hdr->tp_next_offset;
+}
+
+/* A single TPACKET_V3 block can hold multiple frames */
+static void recv_block(struct ring_state *ring)
+{
+ struct tpacket_block_desc *block;
+ char *frame;
+ int i;
+
+ block = (void *)(ring->mmap + ring->idx * ring_block_sz);
+ if (!(block->hdr.bh1.block_status & TP_STATUS_USER))
+ return;
+
+ frame = (char *)block;
+ frame += block->hdr.bh1.offset_to_first_pkt;
+
+ for (i = 0; i < block->hdr.bh1.num_pkts; i++) {
+ frame = recv_frame(ring, frame);
+ frames_received++;
+ }
+
+ block->hdr.bh1.block_status = TP_STATUS_KERNEL;
+ ring->idx = (ring->idx + 1) % ring_block_nr;
+}
+
+/* simple test: sleep once unconditionally and then process all rings */
+static void process_rings(void)
+{
+ int i;
+
+ usleep(1000 * cfg_timeout_msec);
+
+ for (i = 0; i < num_cpus; i++)
+ recv_block(&rings[i]);
+
+ fprintf(stderr, "count: pass=%u nohash=%u fail=%u\n",
+ frames_received - frames_nohash - frames_error,
+ frames_nohash, frames_error);
+}
+
+static char *setup_ring(int fd)
+{
+ struct tpacket_req3 req3 = {0};
+ void *ring;
+
+ req3.tp_retire_blk_tov = cfg_timeout_msec;
+ req3.tp_feature_req_word = TP_FT_REQ_FILL_RXHASH;
+
+ req3.tp_frame_size = 2048;
+ req3.tp_frame_nr = 1 << 10;
+ req3.tp_block_nr = 2;
+
+ req3.tp_block_size = req3.tp_frame_size * req3.tp_frame_nr;
+ req3.tp_block_size /= req3.tp_block_nr;
+
+ if (setsockopt(fd, SOL_PACKET, PACKET_RX_RING, &req3, sizeof(req3)))
+ error(1, errno, "setsockopt PACKET_RX_RING");
+
+ ring_block_sz = req3.tp_block_size;
+ ring_block_nr = req3.tp_block_nr;
+
+ ring = mmap(0, req3.tp_block_size * req3.tp_block_nr,
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_LOCKED | MAP_POPULATE, fd, 0);
+ if (ring == MAP_FAILED)
+ error(1, 0, "mmap failed");
+
+ return ring;
+}
+
+static void __set_filter(int fd, int off_proto, uint8_t proto, int off_dport)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD + BPF_B + BPF_ABS, SKF_AD_OFF + SKF_AD_PKTTYPE),
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, PACKET_HOST, 0, 4),
+ BPF_STMT(BPF_LD + BPF_B + BPF_ABS, off_proto),
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, proto, 0, 2),
+ BPF_STMT(BPF_LD + BPF_H + BPF_ABS, off_dport),
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, cfg_dport, 1, 0),
+ BPF_STMT(BPF_RET + BPF_K, 0),
+ BPF_STMT(BPF_RET + BPF_K, 0xFFFF),
+ };
+ struct sock_fprog prog = {};
+
+ prog.filter = filter;
+ prog.len = sizeof(filter) / sizeof(struct sock_filter);
+ if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog)))
+ error(1, errno, "setsockopt filter");
+}
+
+/* filter on transport protocol and destination port */
+static void set_filter(int fd)
+{
+ const int off_dport = offsetof(struct tcphdr, dest); /* same for udp */
+ uint8_t proto;
+
+ proto = cfg_type == SOCK_STREAM ? IPPROTO_TCP : IPPROTO_UDP;
+ if (cfg_family == AF_INET)
+ __set_filter(fd, offsetof(struct iphdr, protocol), proto,
+ sizeof(struct iphdr) + off_dport);
+ else
+ __set_filter(fd, offsetof(struct ip6_hdr, ip6_nxt), proto,
+ sizeof(struct ip6_hdr) + off_dport);
+}
+
+/* drop everything: used temporarily during setup */
+static void set_filter_null(int fd)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_RET + BPF_K, 0),
+ };
+ struct sock_fprog prog = {};
+
+ prog.filter = filter;
+ prog.len = sizeof(filter) / sizeof(struct sock_filter);
+ if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog)))
+ error(1, errno, "setsockopt filter");
+}
+
+static int create_ring(char **ring)
+{
+ struct fanout_args args = {
+ .id = 1,
+ .type_flags = PACKET_FANOUT_CPU,
+ .max_num_members = RSS_MAX_CPUS
+ };
+ struct sockaddr_ll ll = { 0 };
+ int fd, val;
+
+ fd = socket(PF_PACKET, SOCK_DGRAM, 0);
+ if (fd == -1)
+ error(1, errno, "socket creation failed");
+
+ val = TPACKET_V3;
+ if (setsockopt(fd, SOL_PACKET, PACKET_VERSION, &val, sizeof(val)))
+ error(1, errno, "setsockopt PACKET_VERSION");
+ *ring = setup_ring(fd);
+
+ /* block packets until all rings are added to the fanout group:
+ * else packets can arrive during setup and get misclassified
+ */
+ set_filter_null(fd);
+
+ ll.sll_family = AF_PACKET;
+ ll.sll_ifindex = if_nametoindex(cfg_ifname);
+ ll.sll_protocol = cfg_family == AF_INET ? htons(ETH_P_IP) :
+ htons(ETH_P_IPV6);
+ if (bind(fd, (void *)&ll, sizeof(ll)))
+ error(1, errno, "bind");
+
+ /* must come after bind: verifies all programs in group match */
+ if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &args, sizeof(args))) {
+ /* on failure, retry using old API if that is sufficient:
+ * it has a hard limit of 256 sockets, so only try if
+ * (a) only testing rxhash, not RSS or (b) <= 256 cpus.
+ * in this API, the third argument is left implicit.
+ */
+ if (cfg_num_queues || num_cpus > 256 ||
+ setsockopt(fd, SOL_PACKET, PACKET_FANOUT,
+ &args, sizeof(uint32_t)))
+ error(1, errno, "setsockopt PACKET_FANOUT cpu");
+ }
+
+ return fd;
+}
+
+/* setup inet(6) socket to blackhole the test traffic, if arg '-s' */
+static int setup_sink(void)
+{
+ int fd, val;
+
+ fd = socket(cfg_family, cfg_type, 0);
+ if (fd == -1)
+ error(1, errno, "socket %d.%d", cfg_family, cfg_type);
+
+ val = 1 << 20;
+ if (setsockopt(fd, SOL_SOCKET, SO_RCVBUFFORCE, &val, sizeof(val)))
+ error(1, errno, "setsockopt rcvbuf");
+
+ return fd;
+}
+
+static void setup_rings(void)
+{
+ int i;
+
+ for (i = 0; i < num_cpus; i++) {
+ rings[i].cpu = i;
+ rings[i].fd = create_ring(&rings[i].mmap);
+ }
+
+ /* accept packets once all rings in the fanout group are up */
+ for (i = 0; i < num_cpus; i++)
+ set_filter(rings[i].fd);
+}
+
+static void cleanup_rings(void)
+{
+ int i;
+
+ for (i = 0; i < num_cpus; i++) {
+ if (munmap(rings[i].mmap, ring_block_nr * ring_block_sz))
+ error(1, errno, "munmap");
+ if (close(rings[i].fd))
+ error(1, errno, "close");
+ }
+}
+
+static void parse_cpulist(const char *arg)
+{
+ do {
+ rx_irq_cpus[cfg_num_queues++] = strtol(arg, NULL, 10);
+
+ arg = strchr(arg, ',');
+ if (!arg)
+ break;
+ arg++; // skip ','
+ } while (1);
+}
+
+static void show_cpulist(void)
+{
+ int i;
+
+ for (i = 0; i < cfg_num_queues; i++)
+ fprintf(stderr, "rxq %d: cpu %d\n", i, rx_irq_cpus[i]);
+}
+
+static void show_silos(void)
+{
+ int i;
+
+ for (i = 0; i < cfg_num_rps_cpus; i++)
+ fprintf(stderr, "silo %d: cpu %d\n", i, rps_silo_to_cpu[i]);
+}
+
+static void parse_toeplitz_key(const char *str, int slen, unsigned char *key)
+{
+ int i, ret, off;
+
+ if (slen < TOEPLITZ_STR_MIN_LEN ||
+ slen > TOEPLITZ_STR_MAX_LEN + 1)
+ error(1, 0, "invalid toeplitz key");
+
+ for (i = 0, off = 0; off < slen; i++, off += 3) {
+ ret = sscanf(str + off, "%hhx", &key[i]);
+ if (ret != 1)
+ error(1, 0, "key parse error at %d off %d len %d",
+ i, off, slen);
+ }
+}
+
+static void parse_rps_bitmap(const char *arg)
+{
+ unsigned long bitmap;
+ int i;
+
+ bitmap = strtoul(arg, NULL, 0);
+
+ if (bitmap & ~(RPS_MAX_CPUS - 1))
+ error(1, 0, "rps bitmap 0x%lx out of bounds 0..%lu",
+ bitmap, RPS_MAX_CPUS - 1);
+
+ for (i = 0; i < RPS_MAX_CPUS; i++)
+ if (bitmap & 1UL << i)
+ rps_silo_to_cpu[cfg_num_rps_cpus++] = i;
+}
+
+static void parse_opts(int argc, char **argv)
+{
+ static struct option long_options[] = {
+ {"dport", required_argument, 0, 'd'},
+ {"cpus", required_argument, 0, 'C'},
+ {"key", required_argument, 0, 'k'},
+ {"iface", required_argument, 0, 'i'},
+ {"ipv4", no_argument, 0, '4'},
+ {"ipv6", no_argument, 0, '6'},
+ {"sink", no_argument, 0, 's'},
+ {"tcp", no_argument, 0, 't'},
+ {"timeout", required_argument, 0, 'T'},
+ {"udp", no_argument, 0, 'u'},
+ {"verbose", no_argument, 0, 'v'},
+ {"rps", required_argument, 0, 'r'},
+ {0, 0, 0, 0}
+ };
+ bool have_toeplitz = false;
+ int index, c;
+
+ while ((c = getopt_long(argc, argv, "46C:d:i:k:r:stT:u:v", long_options, &index)) != -1) {
+ switch (c) {
+ case '4':
+ cfg_family = AF_INET;
+ break;
+ case '6':
+ cfg_family = AF_INET6;
+ break;
+ case 'C':
+ parse_cpulist(optarg);
+ break;
+ case 'd':
+ cfg_dport = strtol(optarg, NULL, 0);
+ break;
+ case 'i':
+ cfg_ifname = optarg;
+ break;
+ case 'k':
+ parse_toeplitz_key(optarg, strlen(optarg),
+ toeplitz_key);
+ have_toeplitz = true;
+ break;
+ case 'r':
+ parse_rps_bitmap(optarg);
+ break;
+ case 's':
+ cfg_sink = true;
+ break;
+ case 't':
+ cfg_type = SOCK_STREAM;
+ break;
+ case 'T':
+ cfg_timeout_msec = strtol(optarg, NULL, 0);
+ break;
+ case 'u':
+ cfg_type = SOCK_DGRAM;
+ break;
+ case 'v':
+ cfg_verbose = true;
+ break;
+
+ default:
+ error(1, 0, "unknown option %c", optopt);
+ break;
+ }
+ }
+
+ if (!have_toeplitz)
+ error(1, 0, "Must supply rss key ('-k')");
+
+ num_cpus = get_nprocs();
+ if (num_cpus > RSS_MAX_CPUS)
+ error(1, 0, "increase RSS_MAX_CPUS");
+
+ if (cfg_num_queues && cfg_num_rps_cpus)
+ error(1, 0,
+ "Can't supply both RSS cpus ('-C') and RPS map ('-r')");
+ if (cfg_verbose) {
+ show_cpulist();
+ show_silos();
+ }
+}
+
+int main(int argc, char **argv)
+{
+ const int min_tests = 10;
+ int fd_sink = -1;
+
+ parse_opts(argc, argv);
+
+ if (cfg_sink)
+ fd_sink = setup_sink();
+
+ setup_rings();
+ process_rings();
+ cleanup_rings();
+
+ if (cfg_sink && close(fd_sink))
+ error(1, errno, "close sink");
+
+ if (frames_received - frames_nohash < min_tests)
+ error(1, 0, "too few frames for verification");
+
+ return frames_error;
+}
--- /dev/null
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# extended toeplitz test: test rxhash plus, optionally, either (1) rss mapping
+# from rxhash to rx queue ('-rss') or (2) rps mapping from rxhash to cpu
+# ('-rps <rps_map>')
+#
+# irq-pattern-prefix can be derived from /sys/kernel/irq/*/action,
+# which is a driver-specific encoding.
+#
+# invoke as ./toeplitz.sh (-i <iface>) -u|-t -4|-6 \
+# [(-rss -irq_prefix <irq-pattern-prefix>)|(-rps <rps_map>)]
+
+source setup_loopback.sh
+readonly SERVER_IP4="192.168.1.200/24"
+readonly SERVER_IP6="fda8::1/64"
+readonly SERVER_MAC="aa:00:00:00:00:02"
+
+readonly CLIENT_IP4="192.168.1.100/24"
+readonly CLIENT_IP6="fda8::2/64"
+readonly CLIENT_MAC="aa:00:00:00:00:01"
+
+PORT=8000
+KEY="$(</proc/sys/net/core/netdev_rss_key)"
+TEST_RSS=false
+RPS_MAP=""
+PROTO_FLAG=""
+IP_FLAG=""
+DEV="eth0"
+
+# Return the number of rxqs among which RSS is configured to spread packets.
+# This is determined by reading the RSS indirection table using ethtool.
+get_rss_cfg_num_rxqs() {
+ echo $(ethtool -x "${DEV}" |
+ egrep [[:space:]]+[0-9]+:[[:space:]]+ |
+ cut -d: -f2- |
+ awk '{$1=$1};1' |
+ tr ' ' '\n' |
+ sort -u |
+ wc -l)
+}
+
+# Return a list of the receive irq handler cpus.
+# The list is ordered by the irqs, so first rxq-0 cpu, then rxq-1 cpu, etc.
+# Reads /sys/kernel/irq/ in order, so algorithm depends on
+# irq_{rxq-0} < irq_{rxq-1}, etc.
+get_rx_irq_cpus() {
+ CPUS=""
+ # sort so that irq 2 is read before irq 10
+ SORTED_IRQS=$(for i in /sys/kernel/irq/*; do echo $i; done | sort -V)
+ # Consider only as many queues as RSS actually uses. We assume that
+ # if RSS_CFG_NUM_RXQS=N, then RSS uses rxqs 0-(N-1).
+ RSS_CFG_NUM_RXQS=$(get_rss_cfg_num_rxqs)
+ RXQ_COUNT=0
+
+ for i in ${SORTED_IRQS}
+ do
+ [[ "${RXQ_COUNT}" -lt "${RSS_CFG_NUM_RXQS}" ]] || break
+ # lookup relevant IRQs by action name
+ [[ -e "$i/actions" ]] || continue
+ cat "$i/actions" | grep -q "${IRQ_PATTERN}" || continue
+ irqname=$(<"$i/actions")
+
+ # does the IRQ get called
+ irqcount=$(cat "$i/per_cpu_count" | tr -d '0,')
+ [[ -n "${irqcount}" ]] || continue
+
+ # lookup CPU
+ irq=$(basename "$i")
+ cpu=$(cat "/proc/irq/$irq/smp_affinity_list")
+
+ if [[ -z "${CPUS}" ]]; then
+ CPUS="${cpu}"
+ else
+ CPUS="${CPUS},${cpu}"
+ fi
+ RXQ_COUNT=$((RXQ_COUNT+1))
+ done
+
+ echo "${CPUS}"
+}
+
+get_disable_rfs_cmd() {
+ echo "echo 0 > /proc/sys/net/core/rps_sock_flow_entries;"
+}
+
+get_set_rps_bitmaps_cmd() {
+ CMD=""
+ for i in /sys/class/net/${DEV}/queues/rx-*/rps_cpus
+ do
+ CMD="${CMD} echo $1 > ${i};"
+ done
+
+ echo "${CMD}"
+}
+
+get_disable_rps_cmd() {
+ echo "$(get_set_rps_bitmaps_cmd 0)"
+}
+
+die() {
+ echo "$1"
+ exit 1
+}
+
+check_nic_rxhash_enabled() {
+ local -r pattern="receive-hashing:\ on"
+
+ ethtool -k "${DEV}" | grep -q "${pattern}" || die "rxhash must be enabled"
+}
+
+parse_opts() {
+ local prog=$0
+ shift 1
+
+ while [[ "$1" =~ "-" ]]; do
+ if [[ "$1" = "-irq_prefix" ]]; then
+ shift
+ IRQ_PATTERN="^$1-[0-9]*$"
+ elif [[ "$1" = "-u" || "$1" = "-t" ]]; then
+ PROTO_FLAG="$1"
+ elif [[ "$1" = "-4" ]]; then
+ IP_FLAG="$1"
+ SERVER_IP="${SERVER_IP4}"
+ CLIENT_IP="${CLIENT_IP4}"
+ elif [[ "$1" = "-6" ]]; then
+ IP_FLAG="$1"
+ SERVER_IP="${SERVER_IP6}"
+ CLIENT_IP="${CLIENT_IP6}"
+ elif [[ "$1" = "-rss" ]]; then
+ TEST_RSS=true
+ elif [[ "$1" = "-rps" ]]; then
+ shift
+ RPS_MAP="$1"
+ elif [[ "$1" = "-i" ]]; then
+ shift
+ DEV="$1"
+ else
+ die "Usage: ${prog} (-i <iface>) -u|-t -4|-6 \
+ [(-rss -irq_prefix <irq-pattern-prefix>)|(-rps <rps_map>)]"
+ fi
+ shift
+ done
+}
+
+setup() {
+ setup_loopback_environment "${DEV}"
+
+ # Set up server_ns namespace and client_ns namespace
+ setup_macvlan_ns "${DEV}" server_ns server \
+ "${SERVER_MAC}" "${SERVER_IP}"
+ setup_macvlan_ns "${DEV}" client_ns client \
+ "${CLIENT_MAC}" "${CLIENT_IP}"
+}
+
+cleanup() {
+ cleanup_macvlan_ns server_ns server client_ns client
+ cleanup_loopback "${DEV}"
+}
+
+parse_opts $0 $@
+
+setup
+trap cleanup EXIT
+
+check_nic_rxhash_enabled
+
+# Actual test starts here
+if [[ "${TEST_RSS}" = true ]]; then
+ # RPS/RFS must be disabled because they move packets between cpus,
+ # which breaks the PACKET_FANOUT_CPU identification of RSS decisions.
+ eval "$(get_disable_rfs_cmd) $(get_disable_rps_cmd)" \
+ ip netns exec server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \
+ -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 \
+ -C "$(get_rx_irq_cpus)" -s -v &
+elif [[ ! -z "${RPS_MAP}" ]]; then
+ eval "$(get_disable_rfs_cmd) $(get_set_rps_bitmaps_cmd ${RPS_MAP})" \
+ ip netns exec server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \
+ -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 \
+ -r "0x${RPS_MAP}" -s -v &
+else
+ ip netns exec server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \
+ -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 -s -v &
+fi
+
+server_pid=$!
+
+ip netns exec client_ns ./toeplitz_client.sh "${PROTO_FLAG}" \
+ "${IP_FLAG}" "${SERVER_IP%%/*}" "${PORT}" &
+
+client_pid=$!
+
+wait "${server_pid}"
+exit_code=$?
+kill -9 "${client_pid}"
+if [[ "${exit_code}" -eq 0 ]]; then
+ echo "Test Succeeded!"
+fi
+exit "${exit_code}"
--- /dev/null
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# A simple program for generating traffic for the toeplitz test.
+#
+# This program sends packets periodically for, conservatively, 20 seconds. The
+# intent is for the calling program to kill this program once it is no longer
+# needed, rather than waiting for the 20 second expiration.
+
+send_traffic() {
+ expiration=$((SECONDS+20))
+ while [[ "${SECONDS}" -lt "${expiration}" ]]
+ do
+ if [[ "${PROTO}" == "-u" ]]; then
+ echo "msg $i" | nc "${IPVER}" -u -w 0 "${ADDR}" "${PORT}"
+ else
+ echo "msg $i" | nc "${IPVER}" -w 0 "${ADDR}" "${PORT}"
+ fi
+ sleep 0.001
+ done
+}
+
+PROTO=$1
+IPVER=$2
+ADDR=$3
+PORT=$4
+
+send_traffic
--- /dev/null
+[
+ {
+ "id": "ce7d",
+ "name": "Add mq Qdisc to multi-queue device (4 queues)",
+ "category": [
+ "qdisc",
+ "mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 4\" > /sys/bus/netdevsim/new_device"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mq",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1",
+ "matchCount": "4",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "2f82",
+ "name": "Add mq Qdisc to multi-queue device (256 queues)",
+ "category": [
+ "qdisc",
+ "mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 256\" > /sys/bus/netdevsim/new_device"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mq",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-9,a-f][0-9,a-f]{0,2} bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1",
+ "matchCount": "256",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "c525",
+ "name": "Add duplicate mq Qdisc",
+ "category": [
+ "qdisc",
+ "mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 4\" > /sys/bus/netdevsim/new_device",
+ "$TC qdisc add dev $ETH root handle 1: mq"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mq",
+ "expExitCode": "2",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1",
+ "matchCount": "4",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "128a",
+ "name": "Delete nonexistent mq Qdisc",
+ "category": [
+ "qdisc",
+ "mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 4\" > /sys/bus/netdevsim/new_device"
+ ],
+ "cmdUnderTest": "$TC qdisc del dev $ETH root handle 1: mq",
+ "expExitCode": "2",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1",
+ "matchCount": "0",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "03a9",
+ "name": "Delete mq Qdisc twice",
+ "category": [
+ "qdisc",
+ "mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 4\" > /sys/bus/netdevsim/new_device",
+ "$TC qdisc add dev $ETH root handle 1: mq",
+ "$TC qdisc del dev $ETH root handle 1: mq"
+ ],
+ "cmdUnderTest": "$TC qdisc del dev $ETH root handle 1: mq",
+ "expExitCode": "2",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1",
+ "matchCount": "0",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "be0f",
+ "name": "Add mq Qdisc to single-queue device",
+ "category": [
+ "qdisc",
+ "mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1\" > /sys/bus/netdevsim/new_device"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mq",
+ "expExitCode": "2",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1",
+ "matchCount": "0",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ }
+]
'DEV1': 'v0p1',
'DEV2': '',
'DUMMY': 'dummy1',
+ 'ETH': 'eth0',
'BATCH_FILE': './batch.txt',
'BATCH_DIR': 'tmp',
# Length of time in seconds to wait before terminating a command
static void anon_allocate_area(void **alloc_area)
{
- if (posix_memalign(alloc_area, page_size, nr_pages * page_size))
- err("posix_memalign() failed");
+ *alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ if (*alloc_area == MAP_FAILED)
+ err("mmap of anonymous memory failed");
}
static void noop_alias_mapping(__u64 *start, size_t len, unsigned long offset)
static int kvm_create_vm_debugfs(struct kvm *kvm, int fd)
{
+ static DEFINE_MUTEX(kvm_debugfs_lock);
+ struct dentry *dent;
char dir_name[ITOA_MAX_LEN * 2];
struct kvm_stat_data *stat_data;
const struct _kvm_stats_desc *pdesc;
return 0;
snprintf(dir_name, sizeof(dir_name), "%d-%d", task_pid_nr(current), fd);
- kvm->debugfs_dentry = debugfs_create_dir(dir_name, kvm_debugfs_dir);
+ mutex_lock(&kvm_debugfs_lock);
+ dent = debugfs_lookup(dir_name, kvm_debugfs_dir);
+ if (dent) {
+ pr_warn_ratelimited("KVM: debugfs: duplicate directory %s\n", dir_name);
+ dput(dent);
+ mutex_unlock(&kvm_debugfs_lock);
+ return 0;
+ }
+ dent = debugfs_create_dir(dir_name, kvm_debugfs_dir);
+ mutex_unlock(&kvm_debugfs_lock);
+ if (IS_ERR(dent))
+ return 0;
+ kvm->debugfs_dentry = dent;
kvm->debugfs_stat_data = kcalloc(kvm_debugfs_num_entries,
sizeof(*kvm->debugfs_stat_data),
GFP_KERNEL_ACCOUNT);
++vcpu->stat.generic.halt_poll_invalid;
goto out;
}
+ cpu_relax();
poll_end = cur = ktime_get();
} while (kvm_vcpu_can_poll(cur, stop));
}
};
};
+struct compat_kvm_clear_dirty_log {
+ __u32 slot;
+ __u32 num_pages;
+ __u64 first_page;
+ union {
+ compat_uptr_t dirty_bitmap; /* one bit per page */
+ __u64 padding2;
+ };
+};
+
static long kvm_vm_compat_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
if (kvm->mm != current->mm)
return -EIO;
switch (ioctl) {
+#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
+ case KVM_CLEAR_DIRTY_LOG: {
+ struct compat_kvm_clear_dirty_log compat_log;
+ struct kvm_clear_dirty_log log;
+
+ if (copy_from_user(&compat_log, (void __user *)arg,
+ sizeof(compat_log)))
+ return -EFAULT;
+ log.slot = compat_log.slot;
+ log.num_pages = compat_log.num_pages;
+ log.first_page = compat_log.first_page;
+ log.padding2 = compat_log.padding2;
+ log.dirty_bitmap = compat_ptr(compat_log.dirty_bitmap);
+
+ r = kvm_vm_ioctl_clear_dirty_log(kvm, &log);
+ break;
+ }
+#endif
case KVM_GET_DIRTY_LOG: {
struct compat_kvm_dirty_log compat_log;
struct kvm_dirty_log log;
}
add_uevent_var(env, "PID=%d", kvm->userspace_pid);
- if (!IS_ERR_OR_NULL(kvm->debugfs_dentry)) {
+ if (kvm->debugfs_dentry) {
char *tmp, *p = kmalloc(PATH_MAX, GFP_KERNEL_ACCOUNT);
if (p) {