Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc
authorDavid S. Miller <davem@davemloft.net>
Mon, 8 Jun 2020 00:11:41 +0000 (17:11 -0700)
committerDavid S. Miller <davem@davemloft.net>
Mon, 8 Jun 2020 00:11:41 +0000 (17:11 -0700)
300 files changed:
Documentation/networking/devlink/ice.rst
Documentation/virt/kvm/index.rst
Documentation/virt/kvm/running-nested-guests.rst [new file with mode: 0644]
MAINTAINERS
Makefile
arch/arm/include/asm/futex.h
arch/arm64/kvm/guest.c
arch/arm64/kvm/hyp/entry.S
arch/arm64/kvm/hyp/hyp-entry.S
arch/arm64/kvm/hyp/sysreg-sr.c
arch/arm64/mm/hugetlbpage.c
arch/powerpc/kvm/powerpc.c
arch/riscv/include/asm/csr.h
arch/riscv/include/asm/hwcap.h
arch/riscv/include/asm/set_memory.h
arch/riscv/kernel/cpu_ops.c
arch/riscv/kernel/cpufeature.c
arch/riscv/kernel/smp.c
arch/riscv/kernel/vdso/Makefile
arch/riscv/kernel/vdso/note.S [new file with mode: 0644]
arch/riscv/mm/init.c
arch/s390/kvm/kvm-s390.c
arch/s390/kvm/priv.c
arch/sparc/include/asm/page_32.h
arch/sparc/include/asm/pgalloc_32.h
arch/sparc/include/asm/pgtable_32.h
arch/sparc/include/asm/pgtsrmmu.h
arch/sparc/include/asm/viking.h
arch/sparc/kernel/head_32.S
arch/sparc/kernel/ptrace_32.c
arch/sparc/kernel/ptrace_64.c
arch/sparc/kernel/sys_sparc32.c
arch/sparc/mm/hypersparc.S
arch/sparc/mm/init_32.c
arch/sparc/mm/srmmu.c
arch/sparc/mm/viking.S
arch/x86/entry/calling.h
arch/x86/entry/entry_64.S
arch/x86/include/asm/ftrace.h
arch/x86/include/asm/kvm_host.h
arch/x86/include/asm/unwind.h
arch/x86/kernel/apic/apic.c
arch/x86/kernel/dumpstack_64.c
arch/x86/kernel/unwind_frame.c
arch/x86/kernel/unwind_orc.c
arch/x86/kvm/ioapic.c
arch/x86/kvm/svm/sev.c
arch/x86/kvm/svm/svm.c
arch/x86/kvm/vmx/nested.c
arch/x86/kvm/vmx/vmenter.S
arch/x86/kvm/x86.c
arch/x86/mm/pat/set_memory.c
block/bfq-iosched.c
block/blk-cgroup.c
block/blk-iocost.c
crypto/lrw.c
crypto/xts.c
drivers/amba/bus.c
drivers/base/component.c
drivers/base/core.c
drivers/base/dd.c
drivers/base/platform.c
drivers/bus/mhi/core/init.c
drivers/bus/mhi/core/internal.h
drivers/bus/mhi/core/main.c
drivers/bus/mhi/core/pm.c
drivers/crypto/chelsio/chcr_ktls.c
drivers/firmware/efi/tpm.c
drivers/gpio/gpio-pca953x.c
drivers/gpio/gpio-tegra.c
drivers/gpio/gpiolib.c
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
drivers/gpu/drm/amd/display/dc/core/dc.c
drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c
drivers/gpu/drm/amd/display/dc/os_types.h
drivers/gpu/drm/drm_hdcp.c
drivers/gpu/drm/ingenic/ingenic-drm.c
drivers/gpu/drm/sun4i/sun6i_mipi_dsi.c
drivers/gpu/drm/virtio/virtgpu_drv.h
drivers/gpu/drm/virtio/virtgpu_gem.c
drivers/gpu/drm/virtio/virtgpu_ioctl.c
drivers/interconnect/qcom/osm-l3.c
drivers/interconnect/qcom/sdm845.c
drivers/iommu/amd_iommu.c
drivers/iommu/amd_iommu_types.h
drivers/iommu/virtio-iommu.c
drivers/misc/mei/hw-me.c
drivers/misc/mei/hw-me.h
drivers/misc/mei/pci-me.c
drivers/most/core.c
drivers/net/dsa/mv88e6xxx/Kconfig
drivers/net/dsa/mv88e6xxx/chip.c
drivers/net/dsa/ocelot/felix.c
drivers/net/dsa/ocelot/felix.h
drivers/net/dsa/ocelot/felix_vsc9959.c
drivers/net/dsa/sja1105/Kconfig
drivers/net/dsa/sja1105/sja1105_ptp.c
drivers/net/ethernet/amazon/ena/ena_netdev.h
drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
drivers/net/ethernet/broadcom/bgmac-platform.c
drivers/net/ethernet/broadcom/bnxt/bnxt.c
drivers/net/ethernet/broadcom/bnxt/bnxt.h
drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h
drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
drivers/net/ethernet/cadence/Kconfig
drivers/net/ethernet/cadence/macb_main.c
drivers/net/ethernet/cavium/Kconfig
drivers/net/ethernet/chelsio/cxgb4/sge.c
drivers/net/ethernet/freescale/enetc/enetc_pci_mdio.c
drivers/net/ethernet/ibm/ibmvnic.c
drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c
drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
drivers/net/ethernet/mellanox/mlx4/main.c
drivers/net/ethernet/mellanox/mlx5/core/cmd.c
drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
drivers/net/ethernet/moxa/moxart_ether.c
drivers/net/ethernet/mscc/ocelot.c
drivers/net/ethernet/mscc/ocelot_regs.c
drivers/net/ethernet/natsemi/jazzsonic.c
drivers/net/ethernet/netronome/nfp/abm/main.c
drivers/net/ethernet/pensando/ionic/ionic_debugfs.c
drivers/net/ethernet/pensando/ionic/ionic_lif.c
drivers/net/ethernet/stmicro/stmmac/dwmac5.c
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
drivers/net/ethernet/ti/Kconfig
drivers/net/ethernet/ti/am65-cpsw-nuss.c
drivers/net/ethernet/toshiba/tc35815.c
drivers/net/gtp.c
drivers/net/hyperv/netvsc_drv.c
drivers/net/ipa/gsi.c
drivers/net/ipa/gsi_reg.h
drivers/net/ipa/ipa_endpoint.c
drivers/net/macsec.c
drivers/net/phy/dp83640.c
drivers/net/phy/dp83822.c
drivers/net/phy/dp83tc811.c
drivers/net/phy/marvell10g.c
drivers/net/usb/qmi_wwan.c
drivers/net/wireguard/queueing.c
drivers/net/wireguard/receive.c
drivers/net/wireguard/selftest/ratelimiter.c
drivers/net/wireguard/send.c
drivers/net/wireguard/socket.c
drivers/nvme/host/core.c
drivers/nvme/host/pci.c
drivers/phy/qualcomm/phy-qcom-qusb2.c
drivers/phy/qualcomm/phy-qcom-usb-hs-28nm.c
drivers/platform/chrome/cros_ec_sensorhub.c
drivers/platform/chrome/cros_ec_sensorhub_ring.c
drivers/regulator/core.c
drivers/s390/net/qeth_core_main.c
drivers/sbus/char/oradax.c
drivers/scsi/ibmvscsi/ibmvfc.c
drivers/scsi/ibmvscsi/ibmvscsi.c
drivers/scsi/qla2xxx/qla_attr.c
drivers/scsi/qla2xxx/qla_mbx.c
drivers/staging/gasket/gasket_core.c
drivers/staging/ks7010/TODO
drivers/thunderbolt/usb4.c
drivers/tty/serial/bcm63xx_uart.c
drivers/tty/serial/xilinx_uartps.c
drivers/tty/vt/vt.c
drivers/usb/chipidea/ci_hdrc_msm.c
drivers/usb/core/devio.c
drivers/usb/core/message.c
drivers/usb/serial/garmin_gps.c
drivers/usb/serial/qcserial.c
drivers/usb/storage/unusual_uas.h
drivers/usb/typec/mux/intel_pmc_mux.c
drivers/vhost/vsock.c
fs/ceph/caps.c
fs/ceph/debugfs.c
fs/ceph/mds_client.c
fs/ceph/quota.c
fs/configfs/dir.c
fs/coredump.c
fs/eventpoll.c
fs/gfs2/bmap.c
fs/gfs2/glock.c
fs/gfs2/inode.c
fs/gfs2/log.c
fs/gfs2/lops.c
fs/gfs2/meta_io.c
fs/gfs2/quota.c
fs/gfs2/quota.h
fs/gfs2/super.c
fs/gfs2/util.c
fs/io_uring.c
fs/splice.c
fs/vboxsf/super.c
include/drm/drm_modes.h
include/linux/amba/bus.h
include/linux/backing-dev-defs.h
include/linux/backing-dev.h
include/linux/lsm_hook_defs.h
include/linux/mhi.h
include/linux/platform_data/cros_ec_sensorhub.h
include/linux/platform_device.h
include/linux/sunrpc/gss_api.h
include/linux/sunrpc/gss_krb5.h
include/linux/sunrpc/xdr.h
include/linux/tcp.h
include/linux/virtio_net.h
include/linux/virtio_vsock.h
include/net/flow_offload.h
include/net/inet_ecn.h
include/net/ip6_fib.h
include/net/mptcp.h
include/net/net_namespace.h
include/net/sch_generic.h
include/soc/mscc/ocelot.h
include/trace/events/gpu_mem.h
include/trace/events/wbt.h
init/Kconfig
init/initramfs.c
init/main.c
ipc/mqueue.c
kernel/kcov.c
kernel/trace/Kconfig
kernel/trace/preemptirq_delay_test.c
kernel/trace/trace.c
kernel/trace/trace_boot.c
kernel/trace/trace_kprobe.c
kernel/umh.c
lib/Kconfig.ubsan
mm/Kconfig
mm/backing-dev.c
mm/memcontrol.c
mm/page_alloc.c
mm/percpu.c
mm/slub.c
mm/vmscan.c
net/atm/common.c
net/atm/lec.c
net/batman-adv/bat_v_ogm.c
net/batman-adv/network-coding.c
net/batman-adv/sysfs.c
net/bridge/br_netlink.c
net/core/devlink.c
net/core/drop_monitor.c
net/core/neighbour.c
net/core/sock.c
net/dsa/dsa2.c
net/dsa/master.c
net/dsa/slave.c
net/hsr/hsr_slave.c
net/ipv4/tcp_input.c
net/ipv6/route.c
net/ipv6/seg6.c
net/mptcp/options.c
net/mptcp/protocol.c
net/mptcp/protocol.h
net/mptcp/subflow.c
net/netfilter/nf_nat_proto.c
net/netfilter/nfnetlink_osf.c
net/sched/cls_api.c
net/sched/sch_choke.c
net/sched/sch_fq_codel.c
net/sched/sch_sfq.c
net/sched/sch_skbprio.c
net/sunrpc/auth_gss/auth_gss.c
net/sunrpc/auth_gss/gss_krb5_crypto.c
net/sunrpc/auth_gss/gss_krb5_wrap.c
net/sunrpc/auth_gss/gss_mech_switch.c
net/sunrpc/auth_gss/svcauth_gss.c
net/sunrpc/xdr.c
net/tipc/topsrv.c
net/tls/tls_sw.c
net/vmw_vsock/virtio_transport_common.c
net/x25/x25_subr.c
samples/trace_events/trace-events-sample.h
scripts/decodecode
scripts/gdb/linux/rbtree.py
tools/bootconfig/main.c
tools/cgroup/iocost_monitor.py
tools/objtool/check.c
tools/objtool/elf.h
tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c
tools/testing/selftests/ftrace/ftracetest
tools/testing/selftests/kvm/Makefile
tools/testing/selftests/kvm/include/evmcs.h
tools/testing/selftests/kvm/lib/x86_64/vmx.c
tools/testing/selftests/net/tcp_mmap.c
tools/testing/selftests/wireguard/netns.sh
tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config
tools/testing/selftests/wireguard/qemu/debug.config
virt/kvm/arm/hyp/aarch32.c
virt/kvm/arm/psci.c
virt/kvm/arm/vgic/vgic-init.c
virt/kvm/arm/vgic/vgic-its.c
virt/kvm/arm/vgic/vgic-mmio-v2.c
virt/kvm/arm/vgic/vgic-mmio-v3.c
virt/kvm/arm/vgic/vgic-mmio.c
virt/kvm/arm/vgic/vgic-mmio.h

index 5b58fc4..4574352 100644 (file)
@@ -61,8 +61,8 @@ The ``ice`` driver reports the following versions
       - running
       - ICE OS Default Package
       - The name of the DDP package that is active in the device. The DDP
-        package is loaded by the driver during initialization. Each varation
-        of DDP package shall have a unique name.
+        package is loaded by the driver during initialization. Each
+        variation of the DDP package has a unique name.
     * - ``fw.app``
       - running
       - 1.3.1.0
index dcc2526..b6833c7 100644 (file)
@@ -28,3 +28,5 @@ KVM
    arm/index
 
    devices/index
+
+   running-nested-guests
diff --git a/Documentation/virt/kvm/running-nested-guests.rst b/Documentation/virt/kvm/running-nested-guests.rst
new file mode 100644 (file)
index 0000000..d0a1fc7
--- /dev/null
@@ -0,0 +1,276 @@
+==============================
+Running nested guests with KVM
+==============================
+
+A nested guest is the ability to run a guest inside another guest (it
+can be KVM-based or a different hypervisor).  The straightforward
+example is a KVM guest that in turn runs on a KVM guest (the rest of
+this document is built on this example)::
+
+              .----------------.  .----------------.
+              |                |  |                |
+              |      L2        |  |      L2        |
+              | (Nested Guest) |  | (Nested Guest) |
+              |                |  |                |
+              |----------------'--'----------------|
+              |                                    |
+              |       L1 (Guest Hypervisor)        |
+              |          KVM (/dev/kvm)            |
+              |                                    |
+      .------------------------------------------------------.
+      |                 L0 (Host Hypervisor)                 |
+      |                    KVM (/dev/kvm)                    |
+      |------------------------------------------------------|
+      |        Hardware (with virtualization extensions)     |
+      '------------------------------------------------------'
+
+Terminology:
+
+- L0 – level-0; the bare metal host, running KVM
+
+- L1 – level-1 guest; a VM running on L0; also called the "guest
+  hypervisor", as it itself is capable of running KVM.
+
+- L2 – level-2 guest; a VM running on L1, this is the "nested guest"
+
+.. note:: The above diagram is modelled after the x86 architecture;
+          s390x, ppc64 and other architectures are likely to have
+          a different design for nesting.
+
+          For example, s390x always has an LPAR (LogicalPARtition)
+          hypervisor running on bare metal, adding another layer and
+          resulting in at least four levels in a nested setup — L0 (bare
+          metal, running the LPAR hypervisor), L1 (host hypervisor), L2
+          (guest hypervisor), L3 (nested guest).
+
+          This document will stick with the three-level terminology (L0,
+          L1, and L2) for all architectures; and will largely focus on
+          x86.
+
+
+Use Cases
+---------
+
+There are several scenarios where nested KVM can be useful, to name a
+few:
+
+- As a developer, you want to test your software on different operating
+  systems (OSes).  Instead of renting multiple VMs from a Cloud
+  Provider, using nested KVM lets you rent a large enough "guest
+  hypervisor" (level-1 guest).  This in turn allows you to create
+  multiple nested guests (level-2 guests), running different OSes, on
+  which you can develop and test your software.
+
+- Live migration of "guest hypervisors" and their nested guests, for
+  load balancing, disaster recovery, etc.
+
+- VM image creation tools (e.g. ``virt-install``,  etc) often run
+  their own VM, and users expect these to work inside a VM.
+
+- Some OSes use virtualization internally for security (e.g. to let
+  applications run safely in isolation).
+
+
+Enabling "nested" (x86)
+-----------------------
+
+From Linux kernel v4.19 onwards, the ``nested`` KVM parameter is enabled
+by default for Intel and AMD.  (Though your Linux distribution might
+override this default.)
+
+In case you are running a Linux kernel older than v4.19, to enable
+nesting, set the ``nested`` KVM module parameter to ``Y`` or ``1``.  To
+persist this setting across reboots, you can add it in a config file, as
+shown below:
+
+1. On the bare metal host (L0), list the kernel modules and ensure that
+   the KVM modules::
+
+    $ lsmod | grep -i kvm
+    kvm_intel             133627  0
+    kvm                   435079  1 kvm_intel
+
+2. Show information for ``kvm_intel`` module::
+
+    $ modinfo kvm_intel | grep -i nested
+    parm:           nested:bool
+
+3. For the nested KVM configuration to persist across reboots, place the
+   below in ``/etc/modprobed/kvm_intel.conf`` (create the file if it
+   doesn't exist)::
+
+    $ cat /etc/modprobe.d/kvm_intel.conf
+    options kvm-intel nested=y
+
+4. Unload and re-load the KVM Intel module::
+
+    $ sudo rmmod kvm-intel
+    $ sudo modprobe kvm-intel
+
+5. Verify if the ``nested`` parameter for KVM is enabled::
+
+    $ cat /sys/module/kvm_intel/parameters/nested
+    Y
+
+For AMD hosts, the process is the same as above, except that the module
+name is ``kvm-amd``.
+
+
+Additional nested-related kernel parameters (x86)
+-------------------------------------------------
+
+If your hardware is sufficiently advanced (Intel Haswell processor or
+higher, which has newer hardware virt extensions), the following
+additional features will also be enabled by default: "Shadow VMCS
+(Virtual Machine Control Structure)", APIC Virtualization on your bare
+metal host (L0).  Parameters for Intel hosts::
+
+    $ cat /sys/module/kvm_intel/parameters/enable_shadow_vmcs
+    Y
+
+    $ cat /sys/module/kvm_intel/parameters/enable_apicv
+    Y
+
+    $ cat /sys/module/kvm_intel/parameters/ept
+    Y
+
+.. note:: If you suspect your L2 (i.e. nested guest) is running slower,
+          ensure the above are enabled (particularly
+          ``enable_shadow_vmcs`` and ``ept``).
+
+
+Starting a nested guest (x86)
+-----------------------------
+
+Once your bare metal host (L0) is configured for nesting, you should be
+able to start an L1 guest with::
+
+    $ qemu-kvm -cpu host [...]
+
+The above will pass through the host CPU's capabilities as-is to the
+gues); or for better live migration compatibility, use a named CPU
+model supported by QEMU. e.g.::
+
+    $ qemu-kvm -cpu Haswell-noTSX-IBRS,vmx=on
+
+then the guest hypervisor will subsequently be capable of running a
+nested guest with accelerated KVM.
+
+
+Enabling "nested" (s390x)
+-------------------------
+
+1. On the host hypervisor (L0), enable the ``nested`` parameter on
+   s390x::
+
+    $ rmmod kvm
+    $ modprobe kvm nested=1
+
+.. note:: On s390x, the kernel parameter ``hpage`` is mutually exclusive
+          with the ``nested`` paramter — i.e. to be able to enable
+          ``nested``, the ``hpage`` parameter *must* be disabled.
+
+2. The guest hypervisor (L1) must be provided with the ``sie`` CPU
+   feature — with QEMU, this can be done by using "host passthrough"
+   (via the command-line ``-cpu host``).
+
+3. Now the KVM module can be loaded in the L1 (guest hypervisor)::
+
+    $ modprobe kvm
+
+
+Live migration with nested KVM
+------------------------------
+
+Migrating an L1 guest, with a  *live* nested guest in it, to another
+bare metal host, works as of Linux kernel 5.3 and QEMU 4.2.0 for
+Intel x86 systems, and even on older versions for s390x.
+
+On AMD systems, once an L1 guest has started an L2 guest, the L1 guest
+should no longer be migrated or saved (refer to QEMU documentation on
+"savevm"/"loadvm") until the L2 guest shuts down.  Attempting to migrate
+or save-and-load an L1 guest while an L2 guest is running will result in
+undefined behavior.  You might see a ``kernel BUG!`` entry in ``dmesg``, a
+kernel 'oops', or an outright kernel panic.  Such a migrated or loaded L1
+guest can no longer be considered stable or secure, and must be restarted.
+Migrating an L1 guest merely configured to support nesting, while not
+actually running L2 guests, is expected to function normally even on AMD
+systems but may fail once guests are started.
+
+Migrating an L2 guest is always expected to succeed, so all the following
+scenarios should work even on AMD systems:
+
+- Migrating a nested guest (L2) to another L1 guest on the *same* bare
+  metal host.
+
+- Migrating a nested guest (L2) to another L1 guest on a *different*
+  bare metal host.
+
+- Migrating a nested guest (L2) to a bare metal host.
+
+Reporting bugs from nested setups
+-----------------------------------
+
+Debugging "nested" problems can involve sifting through log files across
+L0, L1 and L2; this can result in tedious back-n-forth between the bug
+reporter and the bug fixer.
+
+- Mention that you are in a "nested" setup.  If you are running any kind
+  of "nesting" at all, say so.  Unfortunately, this needs to be called
+  out because when reporting bugs, people tend to forget to even
+  *mention* that they're using nested virtualization.
+
+- Ensure you are actually running KVM on KVM.  Sometimes people do not
+  have KVM enabled for their guest hypervisor (L1), which results in
+  them running with pure emulation or what QEMU calls it as "TCG", but
+  they think they're running nested KVM.  Thus confusing "nested Virt"
+  (which could also mean, QEMU on KVM) with "nested KVM" (KVM on KVM).
+
+Information to collect (generic)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The following is not an exhaustive list, but a very good starting point:
+
+  - Kernel, libvirt, and QEMU version from L0
+
+  - Kernel, libvirt and QEMU version from L1
+
+  - QEMU command-line of L1 -- when using libvirt, you'll find it here:
+    ``/var/log/libvirt/qemu/instance.log``
+
+  - QEMU command-line of L2 -- as above, when using libvirt, get the
+    complete libvirt-generated QEMU command-line
+
+  - ``cat /sys/cpuinfo`` from L0
+
+  - ``cat /sys/cpuinfo`` from L1
+
+  - ``lscpu`` from L0
+
+  - ``lscpu`` from L1
+
+  - Full ``dmesg`` output from L0
+
+  - Full ``dmesg`` output from L1
+
+x86-specific info to collect
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Both the below commands, ``x86info`` and ``dmidecode``, should be
+available on most Linux distributions with the same name:
+
+  - Output of: ``x86info -a`` from L0
+
+  - Output of: ``x86info -a`` from L1
+
+  - Output of: ``dmidecode`` from L0
+
+  - Output of: ``dmidecode`` from L1
+
+s390x-specific info to collect
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Along with the earlier mentioned generic details, the below is
+also recommended:
+
+  - ``/proc/sysinfo`` from L1; this will also include the info from L0
index 2926327..091ec22 100644 (file)
@@ -3936,11 +3936,9 @@ F:       arch/powerpc/platforms/cell/
 CEPH COMMON CODE (LIBCEPH)
 M:     Ilya Dryomov <idryomov@gmail.com>
 M:     Jeff Layton <jlayton@kernel.org>
-M:     Sage Weil <sage@redhat.com>
 L:     ceph-devel@vger.kernel.org
 S:     Supported
 W:     http://ceph.com/
-T:     git git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git
 T:     git git://github.com/ceph/ceph-client.git
 F:     include/linux/ceph/
 F:     include/linux/crush/
@@ -3948,12 +3946,10 @@ F:      net/ceph/
 
 CEPH DISTRIBUTED FILE SYSTEM CLIENT (CEPH)
 M:     Jeff Layton <jlayton@kernel.org>
-M:     Sage Weil <sage@redhat.com>
 M:     Ilya Dryomov <idryomov@gmail.com>
 L:     ceph-devel@vger.kernel.org
 S:     Supported
 W:     http://ceph.com/
-T:     git git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git
 T:     git git://github.com/ceph/ceph-client.git
 F:     Documentation/filesystems/ceph.rst
 F:     fs/ceph/
@@ -5935,9 +5931,9 @@ F:        lib/dynamic_debug.c
 DYNAMIC INTERRUPT MODERATION
 M:     Tal Gilboa <talgi@mellanox.com>
 S:     Maintained
+F:     Documentation/networking/net_dim.rst
 F:     include/linux/dim.h
 F:     lib/dim/
-F:     Documentation/networking/net_dim.rst
 
 DZ DECSTATION DZ11 SERIAL DRIVER
 M:     "Maciej W. Rozycki" <macro@linux-mips.org>
@@ -7119,9 +7115,10 @@ F:       include/uapi/asm-generic/
 
 GENERIC PHY FRAMEWORK
 M:     Kishon Vijay Abraham I <kishon@ti.com>
+M:     Vinod Koul <vkoul@kernel.org>
 L:     linux-kernel@vger.kernel.org
 S:     Supported
-T:     git git://git.kernel.org/pub/scm/linux/kernel/git/kishon/linux-phy.git
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/phy/linux-phy.git
 F:     Documentation/devicetree/bindings/phy/
 F:     drivers/phy/
 F:     include/linux/phy/
@@ -7746,11 +7743,6 @@ L:       platform-driver-x86@vger.kernel.org
 S:     Orphan
 F:     drivers/platform/x86/tc1100-wmi.c
 
-HP100: Driver for HP 10/100 Mbit/s Voice Grade Network Adapter Series
-M:     Jaroslav Kysela <perex@perex.cz>
-S:     Obsolete
-F:     drivers/staging/hp/hp100.*
-
 HPET:  High Precision Event Timers driver
 M:     Clemens Ladisch <clemens@ladisch.de>
 S:     Maintained
@@ -14102,12 +14094,10 @@ F:    drivers/media/radio/radio-tea5777.c
 
 RADOS BLOCK DEVICE (RBD)
 M:     Ilya Dryomov <idryomov@gmail.com>
-M:     Sage Weil <sage@redhat.com>
 R:     Dongsheng Yang <dongsheng.yang@easystack.cn>
 L:     ceph-devel@vger.kernel.org
 S:     Supported
 W:     http://ceph.com/
-T:     git git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git
 T:     git git://github.com/ceph/ceph-client.git
 F:     Documentation/ABI/testing/sysfs-bus-rbd
 F:     drivers/block/rbd.c
index 3512f7b..11fe9b1 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 5
 PATCHLEVEL = 7
 SUBLEVEL = 0
-EXTRAVERSION = -rc4
+EXTRAVERSION = -rc5
 NAME = Kleptomaniac Octopus
 
 # *DOCUMENTATION*
@@ -729,10 +729,6 @@ else ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
 KBUILD_CFLAGS += -Os
 endif
 
-ifdef CONFIG_CC_DISABLE_WARN_MAYBE_UNINITIALIZED
-KBUILD_CFLAGS   += -Wno-maybe-uninitialized
-endif
-
 # Tell gcc to never replace conditional load with a non-conditional one
 KBUILD_CFLAGS  += $(call cc-option,--param=allow-store-data-races=0)
 KBUILD_CFLAGS  += $(call cc-option,-fno-allow-store-data-races)
@@ -881,6 +877,17 @@ KBUILD_CFLAGS += -Wno-pointer-sign
 # disable stringop warnings in gcc 8+
 KBUILD_CFLAGS += $(call cc-disable-warning, stringop-truncation)
 
+# We'll want to enable this eventually, but it's not going away for 5.7 at least
+KBUILD_CFLAGS += $(call cc-disable-warning, zero-length-bounds)
+KBUILD_CFLAGS += $(call cc-disable-warning, array-bounds)
+KBUILD_CFLAGS += $(call cc-disable-warning, stringop-overflow)
+
+# Another good warning that we'll want to enable eventually
+KBUILD_CFLAGS += $(call cc-disable-warning, restrict)
+
+# Enabled with W=2, disabled by default as noisy
+KBUILD_CFLAGS += $(call cc-disable-warning, maybe-uninitialized)
+
 # disable invalid "can't wrap" optimizations for signed / pointers
 KBUILD_CFLAGS  += $(call cc-option,-fno-strict-overflow)
 
index e133da3..a915188 100644 (file)
@@ -165,8 +165,13 @@ arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
        preempt_enable();
 #endif
 
-       if (!ret)
-               *oval = oldval;
+       /*
+        * Store unconditionally. If ret != 0 the extra store is the least
+        * of the worries but GCC cannot figure out that __futex_atomic_op()
+        * is either setting ret to -EFAULT or storing the old value in
+        * oldval which results in a uninitialized warning at the call site.
+        */
+       *oval = oldval;
 
        return ret;
 }
index 23ebe51..50a279d 100644 (file)
@@ -200,6 +200,13 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
        }
 
        memcpy((u32 *)regs + off, valp, KVM_REG_SIZE(reg->id));
+
+       if (*vcpu_cpsr(vcpu) & PSR_MODE32_BIT) {
+               int i;
+
+               for (i = 0; i < 16; i++)
+                       *vcpu_reg32(vcpu, i) = (u32)*vcpu_reg32(vcpu, i);
+       }
 out:
        return err;
 }
index d22d053..90186cf 100644 (file)
@@ -18,6 +18,7 @@
 
 #define CPU_GP_REG_OFFSET(x)   (CPU_GP_REGS + x)
 #define CPU_XREG_OFFSET(x)     CPU_GP_REG_OFFSET(CPU_USER_PT_REGS + 8*x)
+#define CPU_SP_EL0_OFFSET      (CPU_XREG_OFFSET(30) + 8)
 
        .text
        .pushsection    .hyp.text, "ax"
        ldp     x29, lr,  [\ctxt, #CPU_XREG_OFFSET(29)]
 .endm
 
+.macro save_sp_el0 ctxt, tmp
+       mrs     \tmp,   sp_el0
+       str     \tmp,   [\ctxt, #CPU_SP_EL0_OFFSET]
+.endm
+
+.macro restore_sp_el0 ctxt, tmp
+       ldr     \tmp,     [\ctxt, #CPU_SP_EL0_OFFSET]
+       msr     sp_el0, \tmp
+.endm
+
 /*
  * u64 __guest_enter(struct kvm_vcpu *vcpu,
  *                  struct kvm_cpu_context *host_ctxt);
@@ -60,6 +71,9 @@ SYM_FUNC_START(__guest_enter)
        // Store the host regs
        save_callee_saved_regs x1
 
+       // Save the host's sp_el0
+       save_sp_el0     x1, x2
+
        // Now the host state is stored if we have a pending RAS SError it must
        // affect the host. If any asynchronous exception is pending we defer
        // the guest entry. The DSB isn't necessary before v8.2 as any SError
@@ -83,6 +97,9 @@ alternative_else_nop_endif
        // when this feature is enabled for kernel code.
        ptrauth_switch_to_guest x29, x0, x1, x2
 
+       // Restore the guest's sp_el0
+       restore_sp_el0 x29, x0
+
        // Restore guest regs x0-x17
        ldp     x0, x1,   [x29, #CPU_XREG_OFFSET(0)]
        ldp     x2, x3,   [x29, #CPU_XREG_OFFSET(2)]
@@ -130,6 +147,9 @@ SYM_INNER_LABEL(__guest_exit, SYM_L_GLOBAL)
        // Store the guest regs x18-x29, lr
        save_callee_saved_regs x1
 
+       // Store the guest's sp_el0
+       save_sp_el0     x1, x2
+
        get_host_ctxt   x2, x3
 
        // Macro ptrauth_switch_to_guest format:
@@ -139,6 +159,9 @@ SYM_INNER_LABEL(__guest_exit, SYM_L_GLOBAL)
        // when this feature is enabled for kernel code.
        ptrauth_switch_to_host x1, x2, x3, x4, x5
 
+       // Restore the hosts's sp_el0
+       restore_sp_el0 x2, x3
+
        // Now restore the host regs
        restore_callee_saved_regs x2
 
index c2a13ab..9c5cfb0 100644 (file)
@@ -198,7 +198,6 @@ SYM_CODE_END(__hyp_panic)
 .macro invalid_vector  label, target = __hyp_panic
        .align  2
 SYM_CODE_START(\label)
-\label:
        b \target
 SYM_CODE_END(\label)
 .endm
index 75b1925..6d2df9f 100644 (file)
@@ -15,8 +15,9 @@
 /*
  * Non-VHE: Both host and guest must save everything.
  *
- * VHE: Host and guest must save mdscr_el1 and sp_el0 (and the PC and pstate,
- * which are handled as part of the el2 return state) on every switch.
+ * VHE: Host and guest must save mdscr_el1 and sp_el0 (and the PC and
+ * pstate, which are handled as part of the el2 return state) on every
+ * switch (sp_el0 is being dealt with in the assembly code).
  * tpidr_el0 and tpidrro_el0 only need to be switched when going
  * to host userspace or a different VCPU.  EL1 registers only need to be
  * switched when potentially going to run a different VCPU.  The latter two
 static void __hyp_text __sysreg_save_common_state(struct kvm_cpu_context *ctxt)
 {
        ctxt->sys_regs[MDSCR_EL1]       = read_sysreg(mdscr_el1);
-
-       /*
-        * The host arm64 Linux uses sp_el0 to point to 'current' and it must
-        * therefore be saved/restored on every entry/exit to/from the guest.
-        */
-       ctxt->gp_regs.regs.sp           = read_sysreg(sp_el0);
 }
 
 static void __hyp_text __sysreg_save_user_state(struct kvm_cpu_context *ctxt)
@@ -99,12 +94,6 @@ NOKPROBE_SYMBOL(sysreg_save_guest_state_vhe);
 static void __hyp_text __sysreg_restore_common_state(struct kvm_cpu_context *ctxt)
 {
        write_sysreg(ctxt->sys_regs[MDSCR_EL1],   mdscr_el1);
-
-       /*
-        * The host arm64 Linux uses sp_el0 to point to 'current' and it must
-        * therefore be saved/restored on every entry/exit to/from the guest.
-        */
-       write_sysreg(ctxt->gp_regs.regs.sp,       sp_el0);
 }
 
 static void __hyp_text __sysreg_restore_user_state(struct kvm_cpu_context *ctxt)
index bbeb6a5..0be3355 100644 (file)
@@ -230,6 +230,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
                ptep = (pte_t *)pudp;
        } else if (sz == (CONT_PTE_SIZE)) {
                pmdp = pmd_alloc(mm, pudp, addr);
+               if (!pmdp)
+                       return NULL;
 
                WARN_ON(addr & (sz - 1));
                /*
index e15166b..ad2f172 100644 (file)
@@ -521,6 +521,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
        case KVM_CAP_IOEVENTFD:
        case KVM_CAP_DEVICE_CTRL:
        case KVM_CAP_IMMEDIATE_EXIT:
+       case KVM_CAP_SET_GUEST_DEBUG:
                r = 1;
                break;
        case KVM_CAP_PPC_GUEST_DEBUG_SSTEP:
index 8e18d2c..cec462e 100644 (file)
 #define CAUSE_IRQ_FLAG         (_AC(1, UL) << (__riscv_xlen - 1))
 
 /* Interrupt causes (minus the high bit) */
-#define IRQ_U_SOFT             0
 #define IRQ_S_SOFT             1
 #define IRQ_M_SOFT             3
-#define IRQ_U_TIMER            4
 #define IRQ_S_TIMER            5
 #define IRQ_M_TIMER            7
-#define IRQ_U_EXT              8
 #define IRQ_S_EXT              9
 #define IRQ_M_EXT              11
 
index 1bb0cd0..5ce5046 100644 (file)
@@ -8,6 +8,7 @@
 #ifndef _ASM_RISCV_HWCAP_H
 #define _ASM_RISCV_HWCAP_H
 
+#include <linux/bits.h>
 #include <uapi/asm/hwcap.h>
 
 #ifndef __ASSEMBLY__
@@ -22,6 +23,27 @@ enum {
 };
 
 extern unsigned long elf_hwcap;
+
+#define RISCV_ISA_EXT_a                ('a' - 'a')
+#define RISCV_ISA_EXT_c                ('c' - 'a')
+#define RISCV_ISA_EXT_d                ('d' - 'a')
+#define RISCV_ISA_EXT_f                ('f' - 'a')
+#define RISCV_ISA_EXT_h                ('h' - 'a')
+#define RISCV_ISA_EXT_i                ('i' - 'a')
+#define RISCV_ISA_EXT_m                ('m' - 'a')
+#define RISCV_ISA_EXT_s                ('s' - 'a')
+#define RISCV_ISA_EXT_u                ('u' - 'a')
+
+#define RISCV_ISA_EXT_MAX      64
+
+unsigned long riscv_isa_extension_base(const unsigned long *isa_bitmap);
+
+#define riscv_isa_extension_mask(ext) BIT_MASK(RISCV_ISA_EXT_##ext)
+
+bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, int bit);
+#define riscv_isa_extension_available(isa_bitmap, ext) \
+       __riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_##ext)
+
 #endif
 
 #endif /* _ASM_RISCV_HWCAP_H */
index c38df47..4c5bae7 100644 (file)
@@ -22,14 +22,6 @@ static inline int set_memory_x(unsigned long addr, int numpages) { return 0; }
 static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; }
 #endif
 
-#ifdef CONFIG_STRICT_KERNEL_RWX
-void set_kernel_text_ro(void);
-void set_kernel_text_rw(void);
-#else
-static inline void set_kernel_text_ro(void) { }
-static inline void set_kernel_text_rw(void) { }
-#endif
-
 int set_direct_map_invalid_noflush(struct page *page);
 int set_direct_map_default_noflush(struct page *page);
 
index c4c33bf..0ec2235 100644 (file)
@@ -15,8 +15,8 @@
 
 const struct cpu_operations *cpu_ops[NR_CPUS] __ro_after_init;
 
-void *__cpu_up_stack_pointer[NR_CPUS];
-void *__cpu_up_task_pointer[NR_CPUS];
+void *__cpu_up_stack_pointer[NR_CPUS] __section(.data);
+void *__cpu_up_task_pointer[NR_CPUS] __section(.data);
 
 extern const struct cpu_operations cpu_ops_sbi;
 extern const struct cpu_operations cpu_ops_spinwait;
index a5ad000..ac202f4 100644 (file)
@@ -6,6 +6,7 @@
  * Copyright (C) 2017 SiFive
  */
 
+#include <linux/bitmap.h>
 #include <linux/of.h>
 #include <asm/processor.h>
 #include <asm/hwcap.h>
 #include <asm/switch_to.h>
 
 unsigned long elf_hwcap __read_mostly;
+
+/* Host ISA bitmap */
+static DECLARE_BITMAP(riscv_isa, RISCV_ISA_EXT_MAX) __read_mostly;
+
 #ifdef CONFIG_FPU
 bool has_fpu __read_mostly;
 #endif
 
+/**
+ * riscv_isa_extension_base() - Get base extension word
+ *
+ * @isa_bitmap: ISA bitmap to use
+ * Return: base extension word as unsigned long value
+ *
+ * NOTE: If isa_bitmap is NULL then Host ISA bitmap will be used.
+ */
+unsigned long riscv_isa_extension_base(const unsigned long *isa_bitmap)
+{
+       if (!isa_bitmap)
+               return riscv_isa[0];
+       return isa_bitmap[0];
+}
+EXPORT_SYMBOL_GPL(riscv_isa_extension_base);
+
+/**
+ * __riscv_isa_extension_available() - Check whether given extension
+ * is available or not
+ *
+ * @isa_bitmap: ISA bitmap to use
+ * @bit: bit position of the desired extension
+ * Return: true or false
+ *
+ * NOTE: If isa_bitmap is NULL then Host ISA bitmap will be used.
+ */
+bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, int bit)
+{
+       const unsigned long *bmap = (isa_bitmap) ? isa_bitmap : riscv_isa;
+
+       if (bit >= RISCV_ISA_EXT_MAX)
+               return false;
+
+       return test_bit(bit, bmap) ? true : false;
+}
+EXPORT_SYMBOL_GPL(__riscv_isa_extension_available);
+
 void riscv_fill_hwcap(void)
 {
        struct device_node *node;
        const char *isa;
-       size_t i;
+       char print_str[BITS_PER_LONG + 1];
+       size_t i, j, isa_len;
        static unsigned long isa2hwcap[256] = {0};
 
        isa2hwcap['i'] = isa2hwcap['I'] = COMPAT_HWCAP_ISA_I;
@@ -33,8 +76,11 @@ void riscv_fill_hwcap(void)
 
        elf_hwcap = 0;
 
+       bitmap_zero(riscv_isa, RISCV_ISA_EXT_MAX);
+
        for_each_of_cpu_node(node) {
                unsigned long this_hwcap = 0;
+               unsigned long this_isa = 0;
 
                if (riscv_of_processor_hartid(node) < 0)
                        continue;
@@ -44,8 +90,24 @@ void riscv_fill_hwcap(void)
                        continue;
                }
 
-               for (i = 0; i < strlen(isa); ++i)
+               i = 0;
+               isa_len = strlen(isa);
+#if IS_ENABLED(CONFIG_32BIT)
+               if (!strncmp(isa, "rv32", 4))
+                       i += 4;
+#elif IS_ENABLED(CONFIG_64BIT)
+               if (!strncmp(isa, "rv64", 4))
+                       i += 4;
+#endif
+               for (; i < isa_len; ++i) {
                        this_hwcap |= isa2hwcap[(unsigned char)(isa[i])];
+                       /*
+                        * TODO: X, Y and Z extension parsing for Host ISA
+                        * bitmap will be added in-future.
+                        */
+                       if ('a' <= isa[i] && isa[i] < 'x')
+                               this_isa |= (1UL << (isa[i] - 'a'));
+               }
 
                /*
                 * All "okay" hart should have same isa. Set HWCAP based on
@@ -56,6 +118,11 @@ void riscv_fill_hwcap(void)
                        elf_hwcap &= this_hwcap;
                else
                        elf_hwcap = this_hwcap;
+
+               if (riscv_isa[0])
+                       riscv_isa[0] &= this_isa;
+               else
+                       riscv_isa[0] = this_isa;
        }
 
        /* We don't support systems with F but without D, so mask those out
@@ -65,7 +132,17 @@ void riscv_fill_hwcap(void)
                elf_hwcap &= ~COMPAT_HWCAP_ISA_F;
        }
 
-       pr_info("elf_hwcap is 0x%lx\n", elf_hwcap);
+       memset(print_str, 0, sizeof(print_str));
+       for (i = 0, j = 0; i < BITS_PER_LONG; i++)
+               if (riscv_isa[0] & BIT_MASK(i))
+                       print_str[j++] = (char)('a' + i);
+       pr_info("riscv: ISA extensions %s\n", print_str);
+
+       memset(print_str, 0, sizeof(print_str));
+       for (i = 0, j = 0; i < BITS_PER_LONG; i++)
+               if (elf_hwcap & BIT_MASK(i))
+                       print_str[j++] = (char)('a' + i);
+       pr_info("riscv: ELF capabilities %s\n", print_str);
 
 #ifdef CONFIG_FPU
        if (elf_hwcap & (COMPAT_HWCAP_ISA_F | COMPAT_HWCAP_ISA_D))
index e0a6293..a65a8fa 100644 (file)
@@ -10,6 +10,7 @@
 
 #include <linux/cpu.h>
 #include <linux/interrupt.h>
+#include <linux/module.h>
 #include <linux/profile.h>
 #include <linux/smp.h>
 #include <linux/sched.h>
@@ -63,6 +64,7 @@ void riscv_cpuid_to_hartid_mask(const struct cpumask *in, struct cpumask *out)
        for_each_cpu(cpu, in)
                cpumask_set_cpu(cpuid_to_hartid_map(cpu), out);
 }
+EXPORT_SYMBOL_GPL(riscv_cpuid_to_hartid_mask);
 
 bool arch_match_cpu_phys_id(int cpu, u64 phys_id)
 {
index a4ee3a0..4c8b2a4 100644 (file)
@@ -12,7 +12,7 @@ vdso-syms += getcpu
 vdso-syms += flush_icache
 
 # Files to link into the vdso
-obj-vdso = $(patsubst %, %.o, $(vdso-syms))
+obj-vdso = $(patsubst %, %.o, $(vdso-syms)) note.o
 
 # Build rules
 targets := $(obj-vdso) vdso.so vdso.so.dbg vdso.lds vdso-dummy.o
diff --git a/arch/riscv/kernel/vdso/note.S b/arch/riscv/kernel/vdso/note.S
new file mode 100644 (file)
index 0000000..2a956c9
--- /dev/null
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
+ * Here we can supply some information useful to userland.
+ */
+
+#include <linux/elfnote.h>
+#include <linux/version.h>
+
+ELFNOTE_START(Linux, 0, "a")
+       .long LINUX_VERSION_CODE
+ELFNOTE_END
index b55be44..27a3341 100644 (file)
@@ -150,7 +150,8 @@ void __init setup_bootmem(void)
        memblock_reserve(vmlinux_start, vmlinux_end - vmlinux_start);
 
        set_max_mapnr(PFN_DOWN(mem_size));
-       max_low_pfn = PFN_DOWN(memblock_end_of_DRAM());
+       max_pfn = PFN_DOWN(memblock_end_of_DRAM());
+       max_low_pfn = max_pfn;
 
 #ifdef CONFIG_BLK_DEV_INITRD
        setup_initrd();
@@ -501,22 +502,6 @@ static inline void setup_vm_final(void)
 #endif /* CONFIG_MMU */
 
 #ifdef CONFIG_STRICT_KERNEL_RWX
-void set_kernel_text_rw(void)
-{
-       unsigned long text_start = (unsigned long)_text;
-       unsigned long text_end = (unsigned long)_etext;
-
-       set_memory_rw(text_start, (text_end - text_start) >> PAGE_SHIFT);
-}
-
-void set_kernel_text_ro(void)
-{
-       unsigned long text_start = (unsigned long)_text;
-       unsigned long text_end = (unsigned long)_etext;
-
-       set_memory_ro(text_start, (text_end - text_start) >> PAGE_SHIFT);
-}
-
 void mark_rodata_ro(void)
 {
        unsigned long text_start = (unsigned long)_text;
index 5dcf9ff..d05bb04 100644 (file)
@@ -545,6 +545,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
        case KVM_CAP_S390_AIS:
        case KVM_CAP_S390_AIS_MIGRATION:
        case KVM_CAP_S390_VCPU_RESETS:
+       case KVM_CAP_SET_GUEST_DEBUG:
                r = 1;
                break;
        case KVM_CAP_S390_HPAGE_1M:
index 69a824f..8938936 100644 (file)
@@ -626,10 +626,12 @@ static int handle_pqap(struct kvm_vcpu *vcpu)
         * available for the guest are AQIC and TAPQ with the t bit set
         * since we do not set IC.3 (FIII) we currently will only intercept
         * the AQIC function code.
+        * Note: running nested under z/VM can result in intercepts for other
+        * function codes, e.g. PQAP(QCI). We do not support this and bail out.
         */
        reg0 = vcpu->run->s.regs.gprs[0];
        fc = (reg0 >> 24) & 0xff;
-       if (WARN_ON_ONCE(fc != 0x03))
+       if (fc != 0x03)
                return -EOPNOTSUPP;
 
        /* PQAP instruction is allowed for guest kernel only */
index 4782600..fff8861 100644 (file)
@@ -54,7 +54,7 @@ extern struct sparc_phys_banks sp_banks[SPARC_PHYS_BANKS+1];
  */
 typedef struct { unsigned long pte; } pte_t;
 typedef struct { unsigned long iopte; } iopte_t;
-typedef struct { unsigned long pmdv[16]; } pmd_t;
+typedef struct { unsigned long pmd; } pmd_t;
 typedef struct { unsigned long pgd; } pgd_t;
 typedef struct { unsigned long ctxd; } ctxd_t;
 typedef struct { unsigned long pgprot; } pgprot_t;
@@ -62,7 +62,7 @@ typedef struct { unsigned long iopgprot; } iopgprot_t;
 
 #define pte_val(x)     ((x).pte)
 #define iopte_val(x)   ((x).iopte)
-#define pmd_val(x)      ((x).pmdv[0])
+#define pmd_val(x)      ((x).pmd)
 #define pgd_val(x)     ((x).pgd)
 #define ctxd_val(x)    ((x).ctxd)
 #define pgprot_val(x)  ((x).pgprot)
@@ -82,7 +82,7 @@ typedef struct { unsigned long iopgprot; } iopgprot_t;
  */
 typedef unsigned long pte_t;
 typedef unsigned long iopte_t;
-typedef struct { unsigned long pmdv[16]; } pmd_t;
+typedef unsigned long pmd_t;
 typedef unsigned long pgd_t;
 typedef unsigned long ctxd_t;
 typedef unsigned long pgprot_t;
@@ -90,14 +90,14 @@ typedef unsigned long iopgprot_t;
 
 #define pte_val(x)     (x)
 #define iopte_val(x)   (x)
-#define pmd_val(x)      ((x).pmdv[0])
+#define pmd_val(x)      (x)
 #define pgd_val(x)     (x)
 #define ctxd_val(x)    (x)
 #define pgprot_val(x)  (x)
 #define iopgprot_val(x)        (x)
 
 #define __pte(x)       (x)
-#define __pmd(x)       ((pmd_t) { { (x) }, })
+#define __pmd(x)       (x)
 #define __iopte(x)     (x)
 #define __pgd(x)       (x)
 #define __ctxd(x)      (x)
@@ -106,7 +106,7 @@ typedef unsigned long iopgprot_t;
 
 #endif
 
-typedef struct page *pgtable_t;
+typedef pte_t *pgtable_t;
 
 #define TASK_UNMAPPED_BASE     0x50000000
 
index eae0c92..b772384 100644 (file)
@@ -50,23 +50,24 @@ static inline void free_pmd_fast(pmd_t * pmd)
 #define pmd_free(mm, pmd)              free_pmd_fast(pmd)
 #define __pmd_free_tlb(tlb, pmd, addr) pmd_free((tlb)->mm, pmd)
 
-void pmd_populate(struct mm_struct *mm, pmd_t *pmdp, struct page *ptep);
-#define pmd_pgtable(pmd) pmd_page(pmd)
+#define pmd_populate(mm, pmd, pte)     pmd_set(pmd, pte)
+#define pmd_pgtable(pmd)               (pgtable_t)__pmd_page(pmd)
 
 void pmd_set(pmd_t *pmdp, pte_t *ptep);
-#define pmd_populate_kernel(MM, PMD, PTE) pmd_set(PMD, PTE)
+#define pmd_populate_kernel            pmd_populate
 
 pgtable_t pte_alloc_one(struct mm_struct *mm);
 
 static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
-       return srmmu_get_nocache(PTE_SIZE, PTE_SIZE);
+       return srmmu_get_nocache(SRMMU_PTE_TABLE_SIZE,
+                                SRMMU_PTE_TABLE_SIZE);
 }
 
 
 static inline void free_pte_fast(pte_t *pte)
 {
-       srmmu_free_nocache(pte, PTE_SIZE);
+       srmmu_free_nocache(pte, SRMMU_PTE_TABLE_SIZE);
 }
 
 #define pte_free_kernel(mm, pte)       free_pte_fast(pte)
index 0de659a..c5625b2 100644 (file)
 
 #include <linux/const.h>
 
+#define PMD_SHIFT              18
+#define PMD_SIZE               (1UL << PMD_SHIFT)
+#define PMD_MASK               (~(PMD_SIZE-1))
+#define PMD_ALIGN(__addr)      (((__addr) + ~PMD_MASK) & PMD_MASK)
+
+#define PGDIR_SHIFT            24
+#define PGDIR_SIZE             (1UL << PGDIR_SHIFT)
+#define PGDIR_MASK             (~(PGDIR_SIZE-1))
+#define PGDIR_ALIGN(__addr)    (((__addr) + ~PGDIR_MASK) & PGDIR_MASK)
+
 #ifndef __ASSEMBLY__
 #include <asm-generic/pgtable-nopud.h>
 
@@ -34,17 +44,10 @@ unsigned long __init bootmem_init(unsigned long *pages_avail);
 #define pmd_ERROR(e)   __builtin_trap()
 #define pgd_ERROR(e)   __builtin_trap()
 
-#define PMD_SHIFT              22
-#define PMD_SIZE               (1UL << PMD_SHIFT)
-#define PMD_MASK               (~(PMD_SIZE-1))
-#define PMD_ALIGN(__addr)      (((__addr) + ~PMD_MASK) & PMD_MASK)
-#define PGDIR_SHIFT            SRMMU_PGDIR_SHIFT
-#define PGDIR_SIZE             SRMMU_PGDIR_SIZE
-#define PGDIR_MASK             SRMMU_PGDIR_MASK
-#define PTRS_PER_PTE           1024
-#define PTRS_PER_PMD           SRMMU_PTRS_PER_PMD
-#define PTRS_PER_PGD           SRMMU_PTRS_PER_PGD
-#define USER_PTRS_PER_PGD      PAGE_OFFSET / SRMMU_PGDIR_SIZE
+#define PTRS_PER_PTE           64
+#define PTRS_PER_PMD           64
+#define PTRS_PER_PGD           256
+#define USER_PTRS_PER_PGD      PAGE_OFFSET / PGDIR_SIZE
 #define FIRST_USER_ADDRESS     0UL
 #define PTE_SIZE               (PTRS_PER_PTE*4)
 
@@ -132,6 +135,17 @@ static inline struct page *pmd_page(pmd_t pmd)
        return pfn_to_page((pmd_val(pmd) & SRMMU_PTD_PMASK) >> (PAGE_SHIFT-4));
 }
 
+static inline unsigned long __pmd_page(pmd_t pmd)
+{
+       unsigned long v;
+
+       if (srmmu_device_memory(pmd_val(pmd)))
+               BUG();
+
+       v = pmd_val(pmd) & SRMMU_PTD_PMASK;
+       return (unsigned long)__nocache_va(v << 4);
+}
+
 static inline unsigned long pud_page_vaddr(pud_t pud)
 {
        if (srmmu_device_memory(pud_val(pud))) {
@@ -179,9 +193,7 @@ static inline int pmd_none(pmd_t pmd)
 
 static inline void pmd_clear(pmd_t *pmdp)
 {
-       int i;
-       for (i = 0; i < PTRS_PER_PTE/SRMMU_REAL_PTRS_PER_PTE; i++)
-               set_pte((pte_t *)&pmdp->pmdv[i], __pte(0));
+       set_pte((pte_t *)&pmd_val(*pmdp), __pte(0));
 }
 
 static inline int pud_none(pud_t pud)
index 32a5088..7708d01 100644 (file)
 /* Number of contexts is implementation-dependent; 64k is the most we support */
 #define SRMMU_MAX_CONTEXTS     65536
 
-/* PMD_SHIFT determines the size of the area a second-level page table entry can map */
-#define SRMMU_REAL_PMD_SHIFT           18
-#define SRMMU_REAL_PMD_SIZE            (1UL << SRMMU_REAL_PMD_SHIFT)
-#define SRMMU_REAL_PMD_MASK            (~(SRMMU_REAL_PMD_SIZE-1))
-#define SRMMU_REAL_PMD_ALIGN(__addr)   (((__addr)+SRMMU_REAL_PMD_SIZE-1)&SRMMU_REAL_PMD_MASK)
-
-/* PGDIR_SHIFT determines what a third-level page table entry can map */
-#define SRMMU_PGDIR_SHIFT       24
-#define SRMMU_PGDIR_SIZE        (1UL << SRMMU_PGDIR_SHIFT)
-#define SRMMU_PGDIR_MASK        (~(SRMMU_PGDIR_SIZE-1))
-#define SRMMU_PGDIR_ALIGN(addr) (((addr)+SRMMU_PGDIR_SIZE-1)&SRMMU_PGDIR_MASK)
-
-#define SRMMU_REAL_PTRS_PER_PTE        64
-#define SRMMU_REAL_PTRS_PER_PMD        64
-#define SRMMU_PTRS_PER_PGD     256
-
-#define SRMMU_REAL_PTE_TABLE_SIZE      (SRMMU_REAL_PTRS_PER_PTE*4)
-#define SRMMU_PMD_TABLE_SIZE           (SRMMU_REAL_PTRS_PER_PMD*4)
-#define SRMMU_PGD_TABLE_SIZE           (SRMMU_PTRS_PER_PGD*4)
-
-/*
- * To support pagetables in highmem, Linux introduces APIs which
- * return struct page* and generally manipulate page tables when
- * they are not mapped into kernel space. Our hardware page tables
- * are smaller than pages. We lump hardware tabes into big, page sized
- * software tables.
- *
- * PMD_SHIFT determines the size of the area a second-level page table entry
- * can map, and our pmd_t is 16 times larger than normal.  The values which
- * were once defined here are now generic for 4c and srmmu, so they're
- * found in pgtable.h.
- */
-#define SRMMU_PTRS_PER_PMD     4
+#define SRMMU_PTE_TABLE_SIZE           (PTRS_PER_PTE*4)
+#define SRMMU_PMD_TABLE_SIZE           (PTRS_PER_PMD*4)
+#define SRMMU_PGD_TABLE_SIZE           (PTRS_PER_PGD*4)
 
 /* Definition of the values in the ET field of PTD's and PTE's */
 #define SRMMU_ET_MASK         0x3
index 0bbefd1..08ffc60 100644 (file)
@@ -10,6 +10,7 @@
 
 #include <asm/asi.h>
 #include <asm/mxcc.h>
+#include <asm/pgtable.h>
 #include <asm/pgtsrmmu.h>
 
 /* Bits in the SRMMU control register for GNU/Viking modules.
@@ -227,7 +228,7 @@ static inline unsigned long viking_hwprobe(unsigned long vaddr)
                             : "=r" (val)
                             : "r" (vaddr | 0x200), "i" (ASI_M_FLUSH_PROBE));
        if ((val & SRMMU_ET_MASK) == SRMMU_ET_PTE) {
-               vaddr &= ~SRMMU_PGDIR_MASK;
+               vaddr &= ~PGDIR_MASK;
                vaddr >>= PAGE_SHIFT;
                return val | (vaddr << 8);
        }
@@ -237,7 +238,7 @@ static inline unsigned long viking_hwprobe(unsigned long vaddr)
                             : "=r" (val)
                             : "r" (vaddr | 0x100), "i" (ASI_M_FLUSH_PROBE));
        if ((val & SRMMU_ET_MASK) == SRMMU_ET_PTE) {
-               vaddr &= ~SRMMU_REAL_PMD_MASK;
+               vaddr &= ~PMD_MASK;
                vaddr >>= PAGE_SHIFT;
                return val | (vaddr << 8);
        }
index e55f2c0..be30c8d 100644 (file)
@@ -24,7 +24,7 @@
 #include <asm/winmacro.h>
 #include <asm/thread_info.h>   /* TI_UWINMASK */
 #include <asm/errno.h>
-#include <asm/pgtsrmmu.h>      /* SRMMU_PGDIR_SHIFT */
+#include <asm/pgtable.h>       /* PGDIR_SHIFT */
 #include <asm/export.h>
 
        .data
@@ -273,7 +273,7 @@ not_a_sun4:
                lda     [%o1] ASI_M_BYPASS, %o2         ! This is the 0x0 16MB pgd
 
                /* Calculate to KERNBASE entry. */
-               add     %o1, KERNBASE >> (SRMMU_PGDIR_SHIFT - 2), %o3
+               add     %o1, KERNBASE >> (PGDIR_SHIFT - 2), %o3
 
                /* Poke the entry into the calculated address. */
                sta     %o2, [%o3] ASI_M_BYPASS
@@ -317,7 +317,7 @@ srmmu_not_viking:
                sll     %g1, 0x8, %g1                   ! make phys addr for l1 tbl
 
                lda     [%g1] ASI_M_BYPASS, %g2         ! get level1 entry for 0x0
-               add     %g1, KERNBASE >> (SRMMU_PGDIR_SHIFT - 2), %g3
+               add     %g1, KERNBASE >> (PGDIR_SHIFT - 2), %g3
                sta     %g2, [%g3] ASI_M_BYPASS         ! place at KERNBASE entry
                b       go_to_highmem
                 nop                                    ! wheee....
@@ -341,7 +341,7 @@ leon_remap:
                sll     %g1, 0x8, %g1                   ! make phys addr for l1 tbl
 
                lda     [%g1] ASI_M_BYPASS, %g2         ! get level1 entry for 0x0
-               add     %g1, KERNBASE >> (SRMMU_PGDIR_SHIFT - 2), %g3
+               add     %g1, KERNBASE >> (PGDIR_SHIFT - 2), %g3
                sta     %g2, [%g3] ASI_M_BYPASS         ! place at KERNBASE entry
                b       go_to_highmem
                 nop                                    ! wheee....
index 16b50af..646dd58 100644 (file)
@@ -46,82 +46,79 @@ enum sparc_regset {
        REGSET_FP,
 };
 
+static int regwindow32_get(struct task_struct *target,
+                          const struct pt_regs *regs,
+                          u32 *uregs)
+{
+       unsigned long reg_window = regs->u_regs[UREG_I6];
+       int size = 16 * sizeof(u32);
+
+       if (target == current) {
+               if (copy_from_user(uregs, (void __user *)reg_window, size))
+                       return -EFAULT;
+       } else {
+               if (access_process_vm(target, reg_window, uregs, size,
+                                     FOLL_FORCE) != size)
+                       return -EFAULT;
+       }
+       return 0;
+}
+
+static int regwindow32_set(struct task_struct *target,
+                          const struct pt_regs *regs,
+                          u32 *uregs)
+{
+       unsigned long reg_window = regs->u_regs[UREG_I6];
+       int size = 16 * sizeof(u32);
+
+       if (target == current) {
+               if (copy_to_user((void __user *)reg_window, uregs, size))
+                       return -EFAULT;
+       } else {
+               if (access_process_vm(target, reg_window, uregs, size,
+                                     FOLL_FORCE | FOLL_WRITE) != size)
+                       return -EFAULT;
+       }
+       return 0;
+}
+
 static int genregs32_get(struct task_struct *target,
                         const struct user_regset *regset,
                         unsigned int pos, unsigned int count,
                         void *kbuf, void __user *ubuf)
 {
        const struct pt_regs *regs = target->thread.kregs;
-       unsigned long __user *reg_window;
-       unsigned long *k = kbuf;
-       unsigned long __user *u = ubuf;
-       unsigned long reg;
+       u32 uregs[16];
+       int ret;
 
        if (target == current)
                flush_user_windows();
 
-       pos /= sizeof(reg);
-       count /= sizeof(reg);
-
-       if (kbuf) {
-               for (; count > 0 && pos < 16; count--)
-                       *k++ = regs->u_regs[pos++];
-
-               reg_window = (unsigned long __user *) regs->u_regs[UREG_I6];
-               reg_window -= 16;
-               for (; count > 0 && pos < 32; count--) {
-                       if (get_user(*k++, &reg_window[pos++]))
-                               return -EFAULT;
-               }
-       } else {
-               for (; count > 0 && pos < 16; count--) {
-                       if (put_user(regs->u_regs[pos++], u++))
-                               return -EFAULT;
-               }
-
-               reg_window = (unsigned long __user *) regs->u_regs[UREG_I6];
-               reg_window -= 16;
-               for (; count > 0 && pos < 32; count--) {
-                       if (get_user(reg, &reg_window[pos++]) ||
-                           put_user(reg, u++))
-                               return -EFAULT;
-               }
-       }
-       while (count > 0) {
-               switch (pos) {
-               case 32: /* PSR */
-                       reg = regs->psr;
-                       break;
-               case 33: /* PC */
-                       reg = regs->pc;
-                       break;
-               case 34: /* NPC */
-                       reg = regs->npc;
-                       break;
-               case 35: /* Y */
-                       reg = regs->y;
-                       break;
-               case 36: /* WIM */
-               case 37: /* TBR */
-                       reg = 0;
-                       break;
-               default:
-                       goto finish;
-               }
+       ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+                                 regs->u_regs,
+                                 0, 16 * sizeof(u32));
+       if (ret || !count)
+               return ret;
 
-               if (kbuf)
-                       *k++ = reg;
-               else if (put_user(reg, u++))
+       if (pos < 32 * sizeof(u32)) {
+               if (regwindow32_get(target, regs, uregs))
                        return -EFAULT;
-               pos++;
-               count--;
+               ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+                                         uregs,
+                                         16 * sizeof(u32), 32 * sizeof(u32));
+               if (ret || !count)
+                       return ret;
        }
-finish:
-       pos *= sizeof(reg);
-       count *= sizeof(reg);
 
-       return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
-                                       38 * sizeof(reg), -1);
+       uregs[0] = regs->psr;
+       uregs[1] = regs->pc;
+       uregs[2] = regs->npc;
+       uregs[3] = regs->y;
+       uregs[4] = 0;   /* WIM */
+       uregs[5] = 0;   /* TBR */
+       return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+                                 uregs,
+                                 32 * sizeof(u32), 38 * sizeof(u32));
 }
 
 static int genregs32_set(struct task_struct *target,
@@ -130,82 +127,58 @@ static int genregs32_set(struct task_struct *target,
                         const void *kbuf, const void __user *ubuf)
 {
        struct pt_regs *regs = target->thread.kregs;
-       unsigned long __user *reg_window;
-       const unsigned long *k = kbuf;
-       const unsigned long __user *u = ubuf;
-       unsigned long reg;
+       u32 uregs[16];
+       u32 psr;
+       int ret;
 
        if (target == current)
                flush_user_windows();
 
-       pos /= sizeof(reg);
-       count /= sizeof(reg);
-
-       if (kbuf) {
-               for (; count > 0 && pos < 16; count--)
-                       regs->u_regs[pos++] = *k++;
-
-               reg_window = (unsigned long __user *) regs->u_regs[UREG_I6];
-               reg_window -= 16;
-               for (; count > 0 && pos < 32; count--) {
-                       if (put_user(*k++, &reg_window[pos++]))
-                               return -EFAULT;
-               }
-       } else {
-               for (; count > 0 && pos < 16; count--) {
-                       if (get_user(reg, u++))
-                               return -EFAULT;
-                       regs->u_regs[pos++] = reg;
-               }
-
-               reg_window = (unsigned long __user *) regs->u_regs[UREG_I6];
-               reg_window -= 16;
-               for (; count > 0 && pos < 32; count--) {
-                       if (get_user(reg, u++) ||
-                           put_user(reg, &reg_window[pos++]))
-                               return -EFAULT;
-               }
-       }
-       while (count > 0) {
-               unsigned long psr;
+       ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+                                regs->u_regs,
+                                0, 16 * sizeof(u32));
+       if (ret || !count)
+               return ret;
 
-               if (kbuf)
-                       reg = *k++;
-               else if (get_user(reg, u++))
+       if (pos < 32 * sizeof(u32)) {
+               if (regwindow32_get(target, regs, uregs))
                        return -EFAULT;
-
-               switch (pos) {
-               case 32: /* PSR */
-                       psr = regs->psr;
-                       psr &= ~(PSR_ICC | PSR_SYSCALL);
-                       psr |= (reg & (PSR_ICC | PSR_SYSCALL));
-                       regs->psr = psr;
-                       break;
-               case 33: /* PC */
-                       regs->pc = reg;
-                       break;
-               case 34: /* NPC */
-                       regs->npc = reg;
-                       break;
-               case 35: /* Y */
-                       regs->y = reg;
-                       break;
-               case 36: /* WIM */
-               case 37: /* TBR */
-                       break;
-               default:
-                       goto finish;
-               }
-
-               pos++;
-               count--;
+               ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+                                        uregs,
+                                        16 * sizeof(u32), 32 * sizeof(u32));
+               if (ret)
+                       return ret;
+               if (regwindow32_set(target, regs, uregs))
+                       return -EFAULT;
+               if (!count)
+                       return 0;
        }
-finish:
-       pos *= sizeof(reg);
-       count *= sizeof(reg);
-
+       ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+                                &psr,
+                                32 * sizeof(u32), 33 * sizeof(u32));
+       if (ret)
+               return ret;
+       regs->psr = (regs->psr & ~(PSR_ICC | PSR_SYSCALL)) |
+                   (psr & (PSR_ICC | PSR_SYSCALL));
+       if (!count)
+               return 0;
+       ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+                                &regs->pc,
+                                33 * sizeof(u32), 34 * sizeof(u32));
+       if (ret || !count)
+               return ret;
+       ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+                                &regs->npc,
+                                34 * sizeof(u32), 35 * sizeof(u32));
+       if (ret || !count)
+               return ret;
+       ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+                                &regs->y,
+                                35 * sizeof(u32), 36 * sizeof(u32));
+       if (ret || !count)
+               return ret;
        return user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
-                                        38 * sizeof(reg), -1);
+                                        36 * sizeof(u32), 38 * sizeof(u32));
 }
 
 static int fpregs32_get(struct task_struct *target,
index c9d41a9..3f5930b 100644 (file)
@@ -572,19 +572,13 @@ static int genregs32_get(struct task_struct *target,
                        for (; count > 0 && pos < 32; count--) {
                                if (access_process_vm(target,
                                                      (unsigned long)
-                                                     &reg_window[pos],
+                                                     &reg_window[pos++],
                                                      &reg, sizeof(reg),
                                                      FOLL_FORCE)
                                    != sizeof(reg))
                                        return -EFAULT;
-                               if (access_process_vm(target,
-                                                     (unsigned long) u,
-                                                     &reg, sizeof(reg),
-                                                     FOLL_FORCE | FOLL_WRITE)
-                                   != sizeof(reg))
+                               if (put_user(reg, u++))
                                        return -EFAULT;
-                               pos++;
-                               u++;
                        }
                }
        }
@@ -684,12 +678,7 @@ static int genregs32_set(struct task_struct *target,
                        }
                } else {
                        for (; count > 0 && pos < 32; count--) {
-                               if (access_process_vm(target,
-                                                     (unsigned long)
-                                                     u,
-                                                     &reg, sizeof(reg),
-                                                     FOLL_FORCE)
-                                   != sizeof(reg))
+                               if (get_user(reg, u++))
                                        return -EFAULT;
                                if (access_process_vm(target,
                                                      (unsigned long)
index b5da3bf..f84a02a 100644 (file)
@@ -22,7 +22,6 @@
 #include <linux/msg.h>
 #include <linux/shm.h>
 #include <linux/uio.h>
-#include <linux/nfs_fs.h>
 #include <linux/quota.h>
 #include <linux/poll.h>
 #include <linux/personality.h>
index 66885a8..6c2521e 100644 (file)
@@ -10,6 +10,7 @@
 #include <asm/asm-offsets.h>
 #include <asm/asi.h>
 #include <asm/page.h>
+#include <asm/pgtable.h>
 #include <asm/pgtsrmmu.h>
 #include <linux/init.h>
 
@@ -293,7 +294,7 @@ hypersparc_flush_tlb_range:
        cmp     %o3, -1
        be      hypersparc_flush_tlb_range_out
 #endif
-        sethi  %hi(~((1 << SRMMU_PGDIR_SHIFT) - 1)), %o4
+        sethi  %hi(~((1 << PGDIR_SHIFT) - 1)), %o4
        sta     %o3, [%g1] ASI_M_MMUREGS
        and     %o1, %o4, %o1
        add     %o1, 0x200, %o1
index 906eda1..3cb3dff 100644 (file)
@@ -193,6 +193,7 @@ unsigned long __init bootmem_init(unsigned long *pages_avail)
        /* Reserve the kernel text/data/bss. */
        size = (start_pfn << PAGE_SHIFT) - phys_base;
        memblock_reserve(phys_base, size);
+       memblock_add(phys_base, size);
 
        size = memblock_phys_mem_size() - memblock_reserved_size();
        *pages_avail = (size >> PAGE_SHIFT) - high_pages;
index b7c94de..116d19a 100644 (file)
@@ -136,26 +136,8 @@ static void msi_set_sync(void)
 
 void pmd_set(pmd_t *pmdp, pte_t *ptep)
 {
-       unsigned long ptp;      /* Physical address, shifted right by 4 */
-       int i;
-
-       ptp = __nocache_pa(ptep) >> 4;
-       for (i = 0; i < PTRS_PER_PTE/SRMMU_REAL_PTRS_PER_PTE; i++) {
-               set_pte((pte_t *)&pmdp->pmdv[i], __pte(SRMMU_ET_PTD | ptp));
-               ptp += (SRMMU_REAL_PTRS_PER_PTE * sizeof(pte_t) >> 4);
-       }
-}
-
-void pmd_populate(struct mm_struct *mm, pmd_t *pmdp, struct page *ptep)
-{
-       unsigned long ptp;      /* Physical address, shifted right by 4 */
-       int i;
-
-       ptp = page_to_pfn(ptep) << (PAGE_SHIFT-4);      /* watch for overflow */
-       for (i = 0; i < PTRS_PER_PTE/SRMMU_REAL_PTRS_PER_PTE; i++) {
-               set_pte((pte_t *)&pmdp->pmdv[i], __pte(SRMMU_ET_PTD | ptp));
-               ptp += (SRMMU_REAL_PTRS_PER_PTE * sizeof(pte_t) >> 4);
-       }
+       unsigned long ptp = __nocache_pa(ptep) >> 4;
+       set_pte((pte_t *)&pmd_val(*pmdp), __pte(SRMMU_ET_PTD | ptp));
 }
 
 /* Find an entry in the third-level page table.. */
@@ -163,7 +145,7 @@ pte_t *pte_offset_kernel(pmd_t *dir, unsigned long address)
 {
        void *pte;
 
-       pte = __nocache_va((dir->pmdv[0] & SRMMU_PTD_PMASK) << 4);
+       pte = __nocache_va((pmd_val(*dir) & SRMMU_PTD_PMASK) << 4);
        return (pte_t *) pte +
            ((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1));
 }
@@ -175,18 +157,18 @@ pte_t *pte_offset_kernel(pmd_t *dir, unsigned long address)
  */
 static void *__srmmu_get_nocache(int size, int align)
 {
-       int offset;
+       int offset, minsz = 1 << SRMMU_NOCACHE_BITMAP_SHIFT;
        unsigned long addr;
 
-       if (size < SRMMU_NOCACHE_BITMAP_SHIFT) {
+       if (size < minsz) {
                printk(KERN_ERR "Size 0x%x too small for nocache request\n",
                       size);
-               size = SRMMU_NOCACHE_BITMAP_SHIFT;
+               size = minsz;
        }
-       if (size & (SRMMU_NOCACHE_BITMAP_SHIFT - 1)) {
-               printk(KERN_ERR "Size 0x%x unaligned int nocache request\n",
+       if (size & (minsz - 1)) {
+               printk(KERN_ERR "Size 0x%x unaligned in nocache request\n",
                       size);
-               size += SRMMU_NOCACHE_BITMAP_SHIFT - 1;
+               size += minsz - 1;
        }
        BUG_ON(align > SRMMU_NOCACHE_ALIGN_MAX);
 
@@ -376,31 +358,33 @@ pgd_t *get_pgd_fast(void)
  */
 pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
-       unsigned long pte;
+       pte_t *ptep;
        struct page *page;
 
-       if ((pte = (unsigned long)pte_alloc_one_kernel(mm)) == 0)
-               return NULL;
-       page = pfn_to_page(__nocache_pa(pte) >> PAGE_SHIFT);
-       if (!pgtable_pte_page_ctor(page)) {
-               __free_page(page);
+       if ((ptep = pte_alloc_one_kernel(mm)) == 0)
                return NULL;
+       page = pfn_to_page(__nocache_pa((unsigned long)ptep) >> PAGE_SHIFT);
+       spin_lock(&mm->page_table_lock);
+       if (page_ref_inc_return(page) == 2 && !pgtable_pte_page_ctor(page)) {
+               page_ref_dec(page);
+               ptep = NULL;
        }
-       return page;
+       spin_unlock(&mm->page_table_lock);
+
+       return ptep;
 }
 
-void pte_free(struct mm_struct *mm, pgtable_t pte)
+void pte_free(struct mm_struct *mm, pgtable_t ptep)
 {
-       unsigned long p;
+       struct page *page;
 
-       pgtable_pte_page_dtor(pte);
-       p = (unsigned long)page_address(pte);   /* Cached address (for test) */
-       if (p == 0)
-               BUG();
-       p = page_to_pfn(pte) << PAGE_SHIFT;     /* Physical address */
+       page = pfn_to_page(__nocache_pa((unsigned long)ptep) >> PAGE_SHIFT);
+       spin_lock(&mm->page_table_lock);
+       if (page_ref_dec_return(page) == 1)
+               pgtable_pte_page_dtor(page);
+       spin_unlock(&mm->page_table_lock);
 
-       /* free non cached virtual address*/
-       srmmu_free_nocache(__nocache_va(p), PTE_SIZE);
+       srmmu_free_nocache(ptep, SRMMU_PTE_TABLE_SIZE);
 }
 
 /* context handling - a dynamically sized pool is used */
@@ -822,13 +806,13 @@ static void __init srmmu_inherit_prom_mappings(unsigned long start,
                what = 0;
                addr = start - PAGE_SIZE;
 
-               if (!(start & ~(SRMMU_REAL_PMD_MASK))) {
-                       if (srmmu_probe(addr + SRMMU_REAL_PMD_SIZE) == probed)
+               if (!(start & ~(PMD_MASK))) {
+                       if (srmmu_probe(addr + PMD_SIZE) == probed)
                                what = 1;
                }
 
-               if (!(start & ~(SRMMU_PGDIR_MASK))) {
-                       if (srmmu_probe(addr + SRMMU_PGDIR_SIZE) == probed)
+               if (!(start & ~(PGDIR_MASK))) {
+                       if (srmmu_probe(addr + PGDIR_SIZE) == probed)
                                what = 2;
                }
 
@@ -837,7 +821,7 @@ static void __init srmmu_inherit_prom_mappings(unsigned long start,
                pudp = pud_offset(p4dp, start);
                if (what == 2) {
                        *(pgd_t *)__nocache_fix(pgdp) = __pgd(probed);
-                       start += SRMMU_PGDIR_SIZE;
+                       start += PGDIR_SIZE;
                        continue;
                }
                if (pud_none(*(pud_t *)__nocache_fix(pudp))) {
@@ -849,6 +833,11 @@ static void __init srmmu_inherit_prom_mappings(unsigned long start,
                        pud_set(__nocache_fix(pudp), pmdp);
                }
                pmdp = pmd_offset(__nocache_fix(pgdp), start);
+               if (what == 1) {
+                       *(pmd_t *)__nocache_fix(pmdp) = __pmd(probed);
+                       start += PMD_SIZE;
+                       continue;
+               }
                if (srmmu_pmd_none(*(pmd_t *)__nocache_fix(pmdp))) {
                        ptep = __srmmu_get_nocache(PTE_SIZE, PTE_SIZE);
                        if (ptep == NULL)
@@ -856,19 +845,6 @@ static void __init srmmu_inherit_prom_mappings(unsigned long start,
                        memset(__nocache_fix(ptep), 0, PTE_SIZE);
                        pmd_set(__nocache_fix(pmdp), ptep);
                }
-               if (what == 1) {
-                       /* We bend the rule where all 16 PTPs in a pmd_t point
-                        * inside the same PTE page, and we leak a perfectly
-                        * good hardware PTE piece. Alternatives seem worse.
-                        */
-                       unsigned int x; /* Index of HW PMD in soft cluster */
-                       unsigned long *val;
-                       x = (start >> PMD_SHIFT) & 15;
-                       val = &pmdp->pmdv[x];
-                       *(unsigned long *)__nocache_fix(val) = probed;
-                       start += SRMMU_REAL_PMD_SIZE;
-                       continue;
-               }
                ptep = pte_offset_kernel(__nocache_fix(pmdp), start);
                *(pte_t *)__nocache_fix(ptep) = __pte(probed);
                start += PAGE_SIZE;
@@ -890,9 +866,9 @@ static void __init do_large_mapping(unsigned long vaddr, unsigned long phys_base
 /* Map sp_bank entry SP_ENTRY, starting at virtual address VBASE. */
 static unsigned long __init map_spbank(unsigned long vbase, int sp_entry)
 {
-       unsigned long pstart = (sp_banks[sp_entry].base_addr & SRMMU_PGDIR_MASK);
-       unsigned long vstart = (vbase & SRMMU_PGDIR_MASK);
-       unsigned long vend = SRMMU_PGDIR_ALIGN(vbase + sp_banks[sp_entry].num_bytes);
+       unsigned long pstart = (sp_banks[sp_entry].base_addr & PGDIR_MASK);
+       unsigned long vstart = (vbase & PGDIR_MASK);
+       unsigned long vend = PGDIR_ALIGN(vbase + sp_banks[sp_entry].num_bytes);
        /* Map "low" memory only */
        const unsigned long min_vaddr = PAGE_OFFSET;
        const unsigned long max_vaddr = PAGE_OFFSET + SRMMU_MAXMEM;
@@ -905,7 +881,7 @@ static unsigned long __init map_spbank(unsigned long vbase, int sp_entry)
 
        while (vstart < vend) {
                do_large_mapping(vstart, pstart);
-               vstart += SRMMU_PGDIR_SIZE; pstart += SRMMU_PGDIR_SIZE;
+               vstart += PGDIR_SIZE; pstart += PGDIR_SIZE;
        }
        return vstart;
 }
index adaef6e..48f062d 100644 (file)
@@ -13,6 +13,7 @@
 #include <asm/asi.h>
 #include <asm/mxcc.h>
 #include <asm/page.h>
+#include <asm/pgtable.h>
 #include <asm/pgtsrmmu.h>
 #include <asm/viking.h>
 
@@ -157,7 +158,7 @@ viking_flush_tlb_range:
        cmp     %o3, -1
        be      2f
 #endif
-       sethi   %hi(~((1 << SRMMU_PGDIR_SHIFT) - 1)), %o4
+       sethi   %hi(~((1 << PGDIR_SHIFT) - 1)), %o4
        sta     %o3, [%g1] ASI_M_MMUREGS
        and     %o1, %o4, %o1
        add     %o1, 0x200, %o1
@@ -243,7 +244,7 @@ sun4dsmp_flush_tlb_range:
        ld      [%o0 + VMA_VM_MM], %o0
        ld      [%o0 + AOFF_mm_context], %o3
        lda     [%g1] ASI_M_MMUREGS, %g5
-       sethi   %hi(~((1 << SRMMU_PGDIR_SHIFT) - 1)), %o4
+       sethi   %hi(~((1 << PGDIR_SHIFT) - 1)), %o4
        sta     %o3, [%g1] ASI_M_MMUREGS
        and     %o1, %o4, %o1
        add     %o1, 0x200, %o1
index 0789e13..1c7f13b 100644 (file)
@@ -98,13 +98,6 @@ For 32-bit we have the following conventions - kernel is built with
 #define SIZEOF_PTREGS  21*8
 
 .macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax save_ret=0
-       /*
-        * Push registers and sanitize registers of values that a
-        * speculation attack might otherwise want to exploit. The
-        * lower registers are likely clobbered well before they
-        * could be put to use in a speculative execution gadget.
-        * Interleave XOR with PUSH for better uop scheduling:
-        */
        .if \save_ret
        pushq   %rsi            /* pt_regs->si */
        movq    8(%rsp), %rsi   /* temporarily store the return address in %rsi */
@@ -114,34 +107,43 @@ For 32-bit we have the following conventions - kernel is built with
        pushq   %rsi            /* pt_regs->si */
        .endif
        pushq   \rdx            /* pt_regs->dx */
-       xorl    %edx, %edx      /* nospec   dx */
        pushq   %rcx            /* pt_regs->cx */
-       xorl    %ecx, %ecx      /* nospec   cx */
        pushq   \rax            /* pt_regs->ax */
        pushq   %r8             /* pt_regs->r8 */
-       xorl    %r8d, %r8d      /* nospec   r8 */
        pushq   %r9             /* pt_regs->r9 */
-       xorl    %r9d, %r9d      /* nospec   r9 */
        pushq   %r10            /* pt_regs->r10 */
-       xorl    %r10d, %r10d    /* nospec   r10 */
        pushq   %r11            /* pt_regs->r11 */
-       xorl    %r11d, %r11d    /* nospec   r11*/
        pushq   %rbx            /* pt_regs->rbx */
-       xorl    %ebx, %ebx      /* nospec   rbx*/
        pushq   %rbp            /* pt_regs->rbp */
-       xorl    %ebp, %ebp      /* nospec   rbp*/
        pushq   %r12            /* pt_regs->r12 */
-       xorl    %r12d, %r12d    /* nospec   r12*/
        pushq   %r13            /* pt_regs->r13 */
-       xorl    %r13d, %r13d    /* nospec   r13*/
        pushq   %r14            /* pt_regs->r14 */
-       xorl    %r14d, %r14d    /* nospec   r14*/
        pushq   %r15            /* pt_regs->r15 */
-       xorl    %r15d, %r15d    /* nospec   r15*/
        UNWIND_HINT_REGS
+
        .if \save_ret
        pushq   %rsi            /* return address on top of stack */
        .endif
+
+       /*
+        * Sanitize registers of values that a speculation attack might
+        * otherwise want to exploit. The lower registers are likely clobbered
+        * well before they could be put to use in a speculative execution
+        * gadget.
+        */
+       xorl    %edx,  %edx     /* nospec dx  */
+       xorl    %ecx,  %ecx     /* nospec cx  */
+       xorl    %r8d,  %r8d     /* nospec r8  */
+       xorl    %r9d,  %r9d     /* nospec r9  */
+       xorl    %r10d, %r10d    /* nospec r10 */
+       xorl    %r11d, %r11d    /* nospec r11 */
+       xorl    %ebx,  %ebx     /* nospec rbx */
+       xorl    %ebp,  %ebp     /* nospec rbp */
+       xorl    %r12d, %r12d    /* nospec r12 */
+       xorl    %r13d, %r13d    /* nospec r13 */
+       xorl    %r14d, %r14d    /* nospec r14 */
+       xorl    %r15d, %r15d    /* nospec r15 */
+
 .endm
 
 .macro POP_REGS pop_rdi=1 skip_r11rcx=0
index 0e9504f..3063aa9 100644 (file)
@@ -249,7 +249,6 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL)
         */
 syscall_return_via_sysret:
        /* rcx and r11 are already restored (see code above) */
-       UNWIND_HINT_EMPTY
        POP_REGS pop_rdi=0 skip_r11rcx=1
 
        /*
@@ -258,6 +257,7 @@ syscall_return_via_sysret:
         */
        movq    %rsp, %rdi
        movq    PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp
+       UNWIND_HINT_EMPTY
 
        pushq   RSP-RDI(%rdi)   /* RSP */
        pushq   (%rdi)          /* RDI */
@@ -279,8 +279,7 @@ SYM_CODE_END(entry_SYSCALL_64)
  * %rdi: prev task
  * %rsi: next task
  */
-SYM_CODE_START(__switch_to_asm)
-       UNWIND_HINT_FUNC
+SYM_FUNC_START(__switch_to_asm)
        /*
         * Save callee-saved registers
         * This must match the order in inactive_task_frame
@@ -321,7 +320,7 @@ SYM_CODE_START(__switch_to_asm)
        popq    %rbp
 
        jmp     __switch_to
-SYM_CODE_END(__switch_to_asm)
+SYM_FUNC_END(__switch_to_asm)
 
 /*
  * A newly forked process directly context switches into this address.
@@ -512,7 +511,7 @@ SYM_CODE_END(spurious_entries_start)
  * +----------------------------------------------------+
  */
 SYM_CODE_START(interrupt_entry)
-       UNWIND_HINT_FUNC
+       UNWIND_HINT_IRET_REGS offset=16
        ASM_CLAC
        cld
 
@@ -544,9 +543,9 @@ SYM_CODE_START(interrupt_entry)
        pushq   5*8(%rdi)               /* regs->eflags */
        pushq   4*8(%rdi)               /* regs->cs */
        pushq   3*8(%rdi)               /* regs->ip */
+       UNWIND_HINT_IRET_REGS
        pushq   2*8(%rdi)               /* regs->orig_ax */
        pushq   8(%rdi)                 /* return address */
-       UNWIND_HINT_FUNC
 
        movq    (%rdi), %rdi
        jmp     2f
@@ -637,6 +636,7 @@ SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL)
         */
        movq    %rsp, %rdi
        movq    PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp
+       UNWIND_HINT_EMPTY
 
        /* Copy the IRET frame to the trampoline stack. */
        pushq   6*8(%rdi)       /* SS */
@@ -1739,7 +1739,7 @@ SYM_CODE_START(rewind_stack_do_exit)
 
        movq    PER_CPU_VAR(cpu_current_top_of_stack), %rax
        leaq    -PTREGS_SIZE(%rax), %rsp
-       UNWIND_HINT_FUNC sp_offset=PTREGS_SIZE
+       UNWIND_HINT_REGS
 
        call    do_exit
 SYM_CODE_END(rewind_stack_do_exit)
index 85be2f5..70b96ca 100644 (file)
@@ -61,11 +61,12 @@ static inline bool arch_syscall_match_sym_name(const char *sym, const char *name
 {
        /*
         * Compare the symbol name with the system call name. Skip the
-        * "__x64_sys", "__ia32_sys" or simple "sys" prefix.
+        * "__x64_sys", "__ia32_sys", "__do_sys" or simple "sys" prefix.
         */
        return !strcmp(sym + 3, name + 3) ||
                (!strncmp(sym, "__x64_", 6) && !strcmp(sym + 9, name + 3)) ||
-               (!strncmp(sym, "__ia32_", 7) && !strcmp(sym + 10, name + 3));
+               (!strncmp(sym, "__ia32_", 7) && !strcmp(sym + 10, name + 3)) ||
+               (!strncmp(sym, "__do_sys", 8) && !strcmp(sym + 8, name + 3));
 }
 
 #ifndef COMPILE_OFFSETS
index 42a2d0d..0dea9f1 100644 (file)
@@ -1663,8 +1663,8 @@ void kvm_set_msi_irq(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
 static inline bool kvm_irq_is_postable(struct kvm_lapic_irq *irq)
 {
        /* We can only post Fixed and LowPrio IRQs */
-       return (irq->delivery_mode == dest_Fixed ||
-               irq->delivery_mode == dest_LowestPrio);
+       return (irq->delivery_mode == APIC_DM_FIXED ||
+               irq->delivery_mode == APIC_DM_LOWEST);
 }
 
 static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
index 499578f..70fc159 100644 (file)
@@ -19,7 +19,7 @@ struct unwind_state {
 #if defined(CONFIG_UNWINDER_ORC)
        bool signal, full_regs;
        unsigned long sp, bp, ip;
-       struct pt_regs *regs;
+       struct pt_regs *regs, *prev_regs;
 #elif defined(CONFIG_UNWINDER_FRAME_POINTER)
        bool got_irq;
        unsigned long *bp, *orig_sp, ip;
index 81b9c63..e53dda2 100644 (file)
@@ -352,8 +352,6 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
                 * According to Intel, MFENCE can do the serialization here.
                 */
                asm volatile("mfence" : : : "memory");
-
-               printk_once(KERN_DEBUG "TSC deadline timer enabled\n");
                return;
        }
 
@@ -546,7 +544,7 @@ static struct clock_event_device lapic_clockevent = {
 };
 static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
 
-static u32 hsx_deadline_rev(void)
+static __init u32 hsx_deadline_rev(void)
 {
        switch (boot_cpu_data.x86_stepping) {
        case 0x02: return 0x3a; /* EP */
@@ -556,7 +554,7 @@ static u32 hsx_deadline_rev(void)
        return ~0U;
 }
 
-static u32 bdx_deadline_rev(void)
+static __init u32 bdx_deadline_rev(void)
 {
        switch (boot_cpu_data.x86_stepping) {
        case 0x02: return 0x00000011;
@@ -568,7 +566,7 @@ static u32 bdx_deadline_rev(void)
        return ~0U;
 }
 
-static u32 skx_deadline_rev(void)
+static __init u32 skx_deadline_rev(void)
 {
        switch (boot_cpu_data.x86_stepping) {
        case 0x03: return 0x01000136;
@@ -581,7 +579,7 @@ static u32 skx_deadline_rev(void)
        return ~0U;
 }
 
-static const struct x86_cpu_id deadline_match[] = {
+static const struct x86_cpu_id deadline_match[] __initconst = {
        X86_MATCH_INTEL_FAM6_MODEL( HASWELL_X,          &hsx_deadline_rev),
        X86_MATCH_INTEL_FAM6_MODEL( BROADWELL_X,        0x0b000020),
        X86_MATCH_INTEL_FAM6_MODEL( BROADWELL_D,        &bdx_deadline_rev),
@@ -603,18 +601,19 @@ static const struct x86_cpu_id deadline_match[] = {
        {},
 };
 
-static void apic_check_deadline_errata(void)
+static __init bool apic_validate_deadline_timer(void)
 {
        const struct x86_cpu_id *m;
        u32 rev;
 
-       if (!boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER) ||
-           boot_cpu_has(X86_FEATURE_HYPERVISOR))
-               return;
+       if (!boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
+               return false;
+       if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
+               return true;
 
        m = x86_match_cpu(deadline_match);
        if (!m)
-               return;
+               return true;
 
        /*
         * Function pointers will have the MSB set due to address layout,
@@ -626,11 +625,12 @@ static void apic_check_deadline_errata(void)
                rev = (u32)m->driver_data;
 
        if (boot_cpu_data.microcode >= rev)
-               return;
+               return true;
 
        setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
        pr_err(FW_BUG "TSC_DEADLINE disabled due to Errata; "
               "please update microcode to version: 0x%x (or later)\n", rev);
+       return false;
 }
 
 /*
@@ -2092,7 +2092,8 @@ void __init init_apic_mappings(void)
 {
        unsigned int new_apicid;
 
-       apic_check_deadline_errata();
+       if (apic_validate_deadline_timer())
+               pr_debug("TSC deadline timer available\n");
 
        if (x2apic_mode) {
                boot_cpu_physical_apicid = read_apic_id();
index 87b9789..460ae7f 100644 (file)
@@ -183,7 +183,8 @@ recursion_check:
         */
        if (visit_mask) {
                if (*visit_mask & (1UL << info->type)) {
-                       printk_deferred_once(KERN_WARNING "WARNING: stack recursion on stack type %d\n", info->type);
+                       if (task == current)
+                               printk_deferred_once(KERN_WARNING "WARNING: stack recursion on stack type %d\n", info->type);
                        goto unknown;
                }
                *visit_mask |= 1UL << info->type;
index a224b5a..5422611 100644 (file)
@@ -344,6 +344,9 @@ bad_address:
        if (IS_ENABLED(CONFIG_X86_32))
                goto the_end;
 
+       if (state->task != current)
+               goto the_end;
+
        if (state->regs) {
                printk_deferred_once(KERN_WARNING
                        "WARNING: kernel stack regs at %p in %s:%d has bad 'bp' value %p\n",
index e9cc182..5b0bd85 100644 (file)
@@ -8,19 +8,21 @@
 #include <asm/orc_lookup.h>
 
 #define orc_warn(fmt, ...) \
-       printk_deferred_once(KERN_WARNING pr_fmt("WARNING: " fmt), ##__VA_ARGS__)
+       printk_deferred_once(KERN_WARNING "WARNING: " fmt, ##__VA_ARGS__)
+
+#define orc_warn_current(args...)                                      \
+({                                                                     \
+       if (state->task == current)                                     \
+               orc_warn(args);                                         \
+})
 
 extern int __start_orc_unwind_ip[];
 extern int __stop_orc_unwind_ip[];
 extern struct orc_entry __start_orc_unwind[];
 extern struct orc_entry __stop_orc_unwind[];
 
-static DEFINE_MUTEX(sort_mutex);
-int *cur_orc_ip_table = __start_orc_unwind_ip;
-struct orc_entry *cur_orc_table = __start_orc_unwind;
-
-unsigned int lookup_num_blocks;
-bool orc_init;
+static bool orc_init __ro_after_init;
+static unsigned int lookup_num_blocks __ro_after_init;
 
 static inline unsigned long orc_ip(const int *ip)
 {
@@ -142,9 +144,6 @@ static struct orc_entry *orc_find(unsigned long ip)
 {
        static struct orc_entry *orc;
 
-       if (!orc_init)
-               return NULL;
-
        if (ip == 0)
                return &null_orc_entry;
 
@@ -189,6 +188,10 @@ static struct orc_entry *orc_find(unsigned long ip)
 
 #ifdef CONFIG_MODULES
 
+static DEFINE_MUTEX(sort_mutex);
+static int *cur_orc_ip_table = __start_orc_unwind_ip;
+static struct orc_entry *cur_orc_table = __start_orc_unwind;
+
 static void orc_sort_swap(void *_a, void *_b, int size)
 {
        struct orc_entry *orc_a, *orc_b;
@@ -381,9 +384,38 @@ static bool deref_stack_iret_regs(struct unwind_state *state, unsigned long addr
        return true;
 }
 
+/*
+ * If state->regs is non-NULL, and points to a full pt_regs, just get the reg
+ * value from state->regs.
+ *
+ * Otherwise, if state->regs just points to IRET regs, and the previous frame
+ * had full regs, it's safe to get the value from the previous regs.  This can
+ * happen when early/late IRQ entry code gets interrupted by an NMI.
+ */
+static bool get_reg(struct unwind_state *state, unsigned int reg_off,
+                   unsigned long *val)
+{
+       unsigned int reg = reg_off/8;
+
+       if (!state->regs)
+               return false;
+
+       if (state->full_regs) {
+               *val = ((unsigned long *)state->regs)[reg];
+               return true;
+       }
+
+       if (state->prev_regs) {
+               *val = ((unsigned long *)state->prev_regs)[reg];
+               return true;
+       }
+
+       return false;
+}
+
 bool unwind_next_frame(struct unwind_state *state)
 {
-       unsigned long ip_p, sp, orig_ip = state->ip, prev_sp = state->sp;
+       unsigned long ip_p, sp, tmp, orig_ip = state->ip, prev_sp = state->sp;
        enum stack_type prev_type = state->stack_info.type;
        struct orc_entry *orc;
        bool indirect = false;
@@ -445,43 +477,39 @@ bool unwind_next_frame(struct unwind_state *state)
                break;
 
        case ORC_REG_R10:
-               if (!state->regs || !state->full_regs) {
-                       orc_warn("missing regs for base reg R10 at ip %pB\n",
-                                (void *)state->ip);
+               if (!get_reg(state, offsetof(struct pt_regs, r10), &sp)) {
+                       orc_warn_current("missing R10 value at %pB\n",
+                                        (void *)state->ip);
                        goto err;
                }
-               sp = state->regs->r10;
                break;
 
        case ORC_REG_R13:
-               if (!state->regs || !state->full_regs) {
-                       orc_warn("missing regs for base reg R13 at ip %pB\n",
-                                (void *)state->ip);
+               if (!get_reg(state, offsetof(struct pt_regs, r13), &sp)) {
+                       orc_warn_current("missing R13 value at %pB\n",
+                                        (void *)state->ip);
                        goto err;
                }
-               sp = state->regs->r13;
                break;
 
        case ORC_REG_DI:
-               if (!state->regs || !state->full_regs) {
-                       orc_warn("missing regs for base reg DI at ip %pB\n",
-                                (void *)state->ip);
+               if (!get_reg(state, offsetof(struct pt_regs, di), &sp)) {
+                       orc_warn_current("missing RDI value at %pB\n",
+                                        (void *)state->ip);
                        goto err;
                }
-               sp = state->regs->di;
                break;
 
        case ORC_REG_DX:
-               if (!state->regs || !state->full_regs) {
-                       orc_warn("missing regs for base reg DX at ip %pB\n",
-                                (void *)state->ip);
+               if (!get_reg(state, offsetof(struct pt_regs, dx), &sp)) {
+                       orc_warn_current("missing DX value at %pB\n",
+                                        (void *)state->ip);
                        goto err;
                }
-               sp = state->regs->dx;
                break;
 
        default:
-               orc_warn("unknown SP base reg %d for ip %pB\n",
+               orc_warn("unknown SP base reg %d at %pB\n",
                         orc->sp_reg, (void *)state->ip);
                goto err;
        }
@@ -504,44 +532,48 @@ bool unwind_next_frame(struct unwind_state *state)
 
                state->sp = sp;
                state->regs = NULL;
+               state->prev_regs = NULL;
                state->signal = false;
                break;
 
        case ORC_TYPE_REGS:
                if (!deref_stack_regs(state, sp, &state->ip, &state->sp)) {
-                       orc_warn("can't dereference registers at %p for ip %pB\n",
-                                (void *)sp, (void *)orig_ip);
+                       orc_warn_current("can't access registers at %pB\n",
+                                        (void *)orig_ip);
                        goto err;
                }
 
                state->regs = (struct pt_regs *)sp;
+               state->prev_regs = NULL;
                state->full_regs = true;
                state->signal = true;
                break;
 
        case ORC_TYPE_REGS_IRET:
                if (!deref_stack_iret_regs(state, sp, &state->ip, &state->sp)) {
-                       orc_warn("can't dereference iret registers at %p for ip %pB\n",
-                                (void *)sp, (void *)orig_ip);
+                       orc_warn_current("can't access iret registers at %pB\n",
+                                        (void *)orig_ip);
                        goto err;
                }
 
+               if (state->full_regs)
+                       state->prev_regs = state->regs;
                state->regs = (void *)sp - IRET_FRAME_OFFSET;
                state->full_regs = false;
                state->signal = true;
                break;
 
        default:
-               orc_warn("unknown .orc_unwind entry type %d for ip %pB\n",
+               orc_warn("unknown .orc_unwind entry type %d at %pB\n",
                         orc->type, (void *)orig_ip);
-               break;
+               goto err;
        }
 
        /* Find BP: */
        switch (orc->bp_reg) {
        case ORC_REG_UNDEFINED:
-               if (state->regs && state->full_regs)
-                       state->bp = state->regs->bp;
+               if (get_reg(state, offsetof(struct pt_regs, bp), &tmp))
+                       state->bp = tmp;
                break;
 
        case ORC_REG_PREV_SP:
@@ -564,8 +596,8 @@ bool unwind_next_frame(struct unwind_state *state)
        if (state->stack_info.type == prev_type &&
            on_stack(&state->stack_info, (void *)state->sp, sizeof(long)) &&
            state->sp <= prev_sp) {
-               orc_warn("stack going in the wrong direction? ip=%pB\n",
-                        (void *)orig_ip);
+               orc_warn_current("stack going in the wrong direction? at %pB\n",
+                                (void *)orig_ip);
                goto err;
        }
 
@@ -585,6 +617,9 @@ EXPORT_SYMBOL_GPL(unwind_next_frame);
 void __unwind_start(struct unwind_state *state, struct task_struct *task,
                    struct pt_regs *regs, unsigned long *first_frame)
 {
+       if (!orc_init)
+               goto done;
+
        memset(state, 0, sizeof(*state));
        state->task = task;
 
@@ -651,7 +686,7 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
        /* Otherwise, skip ahead to the user-specified starting frame: */
        while (!unwind_done(state) &&
               (!on_stack(&state->stack_info, first_frame, sizeof(long)) ||
-                       state->sp <= (unsigned long)first_frame))
+                       state->sp < (unsigned long)first_frame))
                unwind_next_frame(state);
 
        return;
index 750ff0b..d057376 100644 (file)
@@ -225,12 +225,12 @@ static int ioapic_set_irq(struct kvm_ioapic *ioapic, unsigned int irq,
        }
 
        /*
-        * AMD SVM AVIC accelerate EOI write and do not trap,
-        * in-kernel IOAPIC will not be able to receive the EOI.
-        * In this case, we do lazy update of the pending EOI when
-        * trying to set IOAPIC irq.
+        * AMD SVM AVIC accelerate EOI write iff the interrupt is edge
+        * triggered, in which case the in-kernel IOAPIC will not be able
+        * to receive the EOI.  In this case, we do a lazy update of the
+        * pending EOI when trying to set IOAPIC irq.
         */
-       if (kvm_apicv_activated(ioapic->kvm))
+       if (edge && kvm_apicv_activated(ioapic->kvm))
                ioapic_lazy_update_eoi(ioapic, irq);
 
        /*
index cf912b4..89f7f3a 100644 (file)
@@ -345,7 +345,7 @@ static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr,
                return NULL;
 
        /* Pin the user virtual address. */
-       npinned = get_user_pages_fast(uaddr, npages, FOLL_WRITE, pages);
+       npinned = get_user_pages_fast(uaddr, npages, write ? FOLL_WRITE : 0, pages);
        if (npinned != npages) {
                pr_err("SEV: Failure locking %lu pages.\n", npages);
                goto err;
index 2f379ba..38f6aee 100644 (file)
@@ -1752,6 +1752,8 @@ static int db_interception(struct vcpu_svm *svm)
        if (svm->vcpu.guest_debug &
            (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) {
                kvm_run->exit_reason = KVM_EXIT_DEBUG;
+               kvm_run->debug.arch.dr6 = svm->vmcb->save.dr6;
+               kvm_run->debug.arch.dr7 = svm->vmcb->save.dr7;
                kvm_run->debug.arch.pc =
                        svm->vmcb->save.cs.base + svm->vmcb->save.rip;
                kvm_run->debug.arch.exception = DB_VECTOR;
index fd78ffb..e44f33c 100644 (file)
@@ -5165,7 +5165,7 @@ static int handle_invept(struct kvm_vcpu *vcpu)
         */
                break;
        default:
-               BUG_ON(1);
+               BUG();
                break;
        }
 
index 87f3f24..51d1a82 100644 (file)
@@ -82,6 +82,9 @@ SYM_FUNC_START(vmx_vmexit)
        /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */
        FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE
 
+       /* Clear RFLAGS.CF and RFLAGS.ZF to preserve VM-Exit, i.e. !VM-Fail. */
+       or $1, %_ASM_AX
+
        pop %_ASM_AX
 .Lvmexit_skip_rsb:
 #endif
index c5835f9..d786c7d 100644 (file)
@@ -926,19 +926,6 @@ EXPORT_SYMBOL_GPL(kvm_set_xcr);
        __reserved_bits;                                \
 })
 
-static u64 kvm_host_cr4_reserved_bits(struct cpuinfo_x86 *c)
-{
-       u64 reserved_bits = __cr4_reserved_bits(cpu_has, c);
-
-       if (kvm_cpu_cap_has(X86_FEATURE_LA57))
-               reserved_bits &= ~X86_CR4_LA57;
-
-       if (kvm_cpu_cap_has(X86_FEATURE_UMIP))
-               reserved_bits &= ~X86_CR4_UMIP;
-
-       return reserved_bits;
-}
-
 static int kvm_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 {
        if (cr4 & cr4_reserved_bits)
@@ -3385,6 +3372,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
        case KVM_CAP_GET_MSR_FEATURES:
        case KVM_CAP_MSR_PLATFORM_INFO:
        case KVM_CAP_EXCEPTION_PAYLOAD:
+       case KVM_CAP_SET_GUEST_DEBUG:
                r = 1;
                break;
        case KVM_CAP_SYNC_REGS:
@@ -9675,7 +9663,9 @@ int kvm_arch_hardware_setup(void *opaque)
        if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES))
                supported_xss = 0;
 
-       cr4_reserved_bits = kvm_host_cr4_reserved_bits(&boot_cpu_data);
+#define __kvm_cpu_cap_has(UNUSED_, f) kvm_cpu_cap_has(f)
+       cr4_reserved_bits = __cr4_reserved_bits(__kvm_cpu_cap_has, UNUSED_);
+#undef __kvm_cpu_cap_has
 
        if (kvm_has_tsc_control) {
                /*
@@ -9707,7 +9697,8 @@ int kvm_arch_check_processor_compat(void *opaque)
 
        WARN_ON(!irqs_disabled());
 
-       if (kvm_host_cr4_reserved_bits(c) != cr4_reserved_bits)
+       if (__cr4_reserved_bits(cpu_has, c) !=
+           __cr4_reserved_bits(cpu_has, &boot_cpu_data))
                return -EIO;
 
        return ops->check_processor_compatibility();
index 59eca6a..b8c55a2 100644 (file)
@@ -43,7 +43,8 @@ struct cpa_data {
        unsigned long   pfn;
        unsigned int    flags;
        unsigned int    force_split             : 1,
-                       force_static_prot       : 1;
+                       force_static_prot       : 1,
+                       force_flush_all         : 1;
        struct page     **pages;
 };
 
@@ -355,10 +356,10 @@ static void cpa_flush(struct cpa_data *data, int cache)
                return;
        }
 
-       if (cpa->numpages <= tlb_single_page_flush_ceiling)
-               on_each_cpu(__cpa_flush_tlb, cpa, 1);
-       else
+       if (cpa->force_flush_all || cpa->numpages > tlb_single_page_flush_ceiling)
                flush_tlb_all();
+       else
+               on_each_cpu(__cpa_flush_tlb, cpa, 1);
 
        if (!cache)
                return;
@@ -1598,6 +1599,8 @@ static int cpa_process_alias(struct cpa_data *cpa)
                alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
                alias_cpa.curpage = 0;
 
+               cpa->force_flush_all = 1;
+
                ret = __change_page_attr_set_clr(&alias_cpa, 0);
                if (ret)
                        return ret;
@@ -1618,6 +1621,7 @@ static int cpa_process_alias(struct cpa_data *cpa)
                alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
                alias_cpa.curpage = 0;
 
+               cpa->force_flush_all = 1;
                /*
                 * The high mapping range is imprecise, so ignore the
                 * return value.
index 78ba57e..3d41171 100644 (file)
 #include <linux/ioprio.h>
 #include <linux/sbitmap.h>
 #include <linux/delay.h>
+#include <linux/backing-dev.h>
 
 #include "blk.h"
 #include "blk-mq.h"
@@ -4976,8 +4977,9 @@ bfq_set_next_ioprio_data(struct bfq_queue *bfqq, struct bfq_io_cq *bic)
        ioprio_class = IOPRIO_PRIO_CLASS(bic->ioprio);
        switch (ioprio_class) {
        default:
-               dev_err(bfqq->bfqd->queue->backing_dev_info->dev,
-                       "bfq: bad prio class %d\n", ioprio_class);
+               pr_err("bdi %s: bfq: bad prio class %d\n",
+                               bdi_dev_name(bfqq->bfqd->queue->backing_dev_info),
+                               ioprio_class);
                /* fall through */
        case IOPRIO_CLASS_NONE:
                /*
index c5dc833..930212c 100644 (file)
@@ -496,7 +496,7 @@ const char *blkg_dev_name(struct blkcg_gq *blkg)
 {
        /* some drivers (floppy) instantiate a queue w/o disk registered */
        if (blkg->q->backing_dev_info->dev)
-               return dev_name(blkg->q->backing_dev_info->dev);
+               return bdi_dev_name(blkg->q->backing_dev_info);
        return NULL;
 }
 
index 3ab0c1c..7c1fe60 100644 (file)
@@ -466,7 +466,7 @@ struct ioc_gq {
         */
        atomic64_t                      vtime;
        atomic64_t                      done_vtime;
-       atomic64_t                      abs_vdebt;
+       u64                             abs_vdebt;
        u64                             last_vtime;
 
        /*
@@ -1142,7 +1142,7 @@ static void iocg_kick_waitq(struct ioc_gq *iocg, struct ioc_now *now)
        struct iocg_wake_ctx ctx = { .iocg = iocg };
        u64 margin_ns = (u64)(ioc->period_us *
                              WAITQ_TIMER_MARGIN_PCT / 100) * NSEC_PER_USEC;
-       u64 abs_vdebt, vdebt, vshortage, expires, oexpires;
+       u64 vdebt, vshortage, expires, oexpires;
        s64 vbudget;
        u32 hw_inuse;
 
@@ -1152,18 +1152,15 @@ static void iocg_kick_waitq(struct ioc_gq *iocg, struct ioc_now *now)
        vbudget = now->vnow - atomic64_read(&iocg->vtime);
 
        /* pay off debt */
-       abs_vdebt = atomic64_read(&iocg->abs_vdebt);
-       vdebt = abs_cost_to_cost(abs_vdebt, hw_inuse);
+       vdebt = abs_cost_to_cost(iocg->abs_vdebt, hw_inuse);
        if (vdebt && vbudget > 0) {
                u64 delta = min_t(u64, vbudget, vdebt);
                u64 abs_delta = min(cost_to_abs_cost(delta, hw_inuse),
-                                   abs_vdebt);
+                                   iocg->abs_vdebt);
 
                atomic64_add(delta, &iocg->vtime);
                atomic64_add(delta, &iocg->done_vtime);
-               atomic64_sub(abs_delta, &iocg->abs_vdebt);
-               if (WARN_ON_ONCE(atomic64_read(&iocg->abs_vdebt) < 0))
-                       atomic64_set(&iocg->abs_vdebt, 0);
+               iocg->abs_vdebt -= abs_delta;
        }
 
        /*
@@ -1219,12 +1216,18 @@ static bool iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now, u64 cost)
        u64 expires, oexpires;
        u32 hw_inuse;
 
+       lockdep_assert_held(&iocg->waitq.lock);
+
        /* debt-adjust vtime */
        current_hweight(iocg, NULL, &hw_inuse);
-       vtime += abs_cost_to_cost(atomic64_read(&iocg->abs_vdebt), hw_inuse);
+       vtime += abs_cost_to_cost(iocg->abs_vdebt, hw_inuse);
 
-       /* clear or maintain depending on the overage */
-       if (time_before_eq64(vtime, now->vnow)) {
+       /*
+        * Clear or maintain depending on the overage. Non-zero vdebt is what
+        * guarantees that @iocg is online and future iocg_kick_delay() will
+        * clear use_delay. Don't leave it on when there's no vdebt.
+        */
+       if (!iocg->abs_vdebt || time_before_eq64(vtime, now->vnow)) {
                blkcg_clear_delay(blkg);
                return false;
        }
@@ -1258,9 +1261,12 @@ static enum hrtimer_restart iocg_delay_timer_fn(struct hrtimer *timer)
 {
        struct ioc_gq *iocg = container_of(timer, struct ioc_gq, delay_timer);
        struct ioc_now now;
+       unsigned long flags;
 
+       spin_lock_irqsave(&iocg->waitq.lock, flags);
        ioc_now(iocg->ioc, &now);
        iocg_kick_delay(iocg, &now, 0);
+       spin_unlock_irqrestore(&iocg->waitq.lock, flags);
 
        return HRTIMER_NORESTART;
 }
@@ -1368,14 +1374,13 @@ static void ioc_timer_fn(struct timer_list *timer)
         * should have woken up in the last period and expire idle iocgs.
         */
        list_for_each_entry_safe(iocg, tiocg, &ioc->active_iocgs, active_list) {
-               if (!waitqueue_active(&iocg->waitq) &&
-                   !atomic64_read(&iocg->abs_vdebt) && !iocg_is_idle(iocg))
+               if (!waitqueue_active(&iocg->waitq) && iocg->abs_vdebt &&
+                   !iocg_is_idle(iocg))
                        continue;
 
                spin_lock(&iocg->waitq.lock);
 
-               if (waitqueue_active(&iocg->waitq) ||
-                   atomic64_read(&iocg->abs_vdebt)) {
+               if (waitqueue_active(&iocg->waitq) || iocg->abs_vdebt) {
                        /* might be oversleeping vtime / hweight changes, kick */
                        iocg_kick_waitq(iocg, &now);
                        iocg_kick_delay(iocg, &now, 0);
@@ -1718,28 +1723,49 @@ static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio)
         * tests are racy but the races aren't systemic - we only miss once
         * in a while which is fine.
         */
-       if (!waitqueue_active(&iocg->waitq) &&
-           !atomic64_read(&iocg->abs_vdebt) &&
+       if (!waitqueue_active(&iocg->waitq) && !iocg->abs_vdebt &&
            time_before_eq64(vtime + cost, now.vnow)) {
                iocg_commit_bio(iocg, bio, cost);
                return;
        }
 
        /*
-        * We're over budget.  If @bio has to be issued regardless,
-        * remember the abs_cost instead of advancing vtime.
-        * iocg_kick_waitq() will pay off the debt before waking more IOs.
+        * We activated above but w/o any synchronization. Deactivation is
+        * synchronized with waitq.lock and we won't get deactivated as long
+        * as we're waiting or has debt, so we're good if we're activated
+        * here. In the unlikely case that we aren't, just issue the IO.
+        */
+       spin_lock_irq(&iocg->waitq.lock);
+
+       if (unlikely(list_empty(&iocg->active_list))) {
+               spin_unlock_irq(&iocg->waitq.lock);
+               iocg_commit_bio(iocg, bio, cost);
+               return;
+       }
+
+       /*
+        * We're over budget. If @bio has to be issued regardless, remember
+        * the abs_cost instead of advancing vtime. iocg_kick_waitq() will pay
+        * off the debt before waking more IOs.
+        *
         * This way, the debt is continuously paid off each period with the
-        * actual budget available to the cgroup.  If we just wound vtime,
-        * we would incorrectly use the current hw_inuse for the entire
-        * amount which, for example, can lead to the cgroup staying
-        * blocked for a long time even with substantially raised hw_inuse.
+        * actual budget available to the cgroup. If we just wound vtime, we
+        * would incorrectly use the current hw_inuse for the entire amount
+        * which, for example, can lead to the cgroup staying blocked for a
+        * long time even with substantially raised hw_inuse.
+        *
+        * An iocg with vdebt should stay online so that the timer can keep
+        * deducting its vdebt and [de]activate use_delay mechanism
+        * accordingly. We don't want to race against the timer trying to
+        * clear them and leave @iocg inactive w/ dangling use_delay heavily
+        * penalizing the cgroup and its descendants.
         */
        if (bio_issue_as_root_blkg(bio) || fatal_signal_pending(current)) {
-               atomic64_add(abs_cost, &iocg->abs_vdebt);
+               iocg->abs_vdebt += abs_cost;
                if (iocg_kick_delay(iocg, &now, cost))
                        blkcg_schedule_throttle(rqos->q,
                                        (bio->bi_opf & REQ_SWAP) == REQ_SWAP);
+               spin_unlock_irq(&iocg->waitq.lock);
                return;
        }
 
@@ -1756,20 +1782,6 @@ static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio)
         * All waiters are on iocg->waitq and the wait states are
         * synchronized using waitq.lock.
         */
-       spin_lock_irq(&iocg->waitq.lock);
-
-       /*
-        * We activated above but w/o any synchronization.  Deactivation is
-        * synchronized with waitq.lock and we won't get deactivated as
-        * long as we're waiting, so we're good if we're activated here.
-        * In the unlikely case that we are deactivated, just issue the IO.
-        */
-       if (unlikely(list_empty(&iocg->active_list))) {
-               spin_unlock_irq(&iocg->waitq.lock);
-               iocg_commit_bio(iocg, bio, cost);
-               return;
-       }
-
        init_waitqueue_func_entry(&wait.wait, iocg_wake_fn);
        wait.wait.private = current;
        wait.bio = bio;
@@ -1801,6 +1813,7 @@ static void ioc_rqos_merge(struct rq_qos *rqos, struct request *rq,
        struct ioc_now now;
        u32 hw_inuse;
        u64 abs_cost, cost;
+       unsigned long flags;
 
        /* bypass if disabled or for root cgroup */
        if (!ioc->enabled || !iocg->level)
@@ -1820,15 +1833,28 @@ static void ioc_rqos_merge(struct rq_qos *rqos, struct request *rq,
                iocg->cursor = bio_end;
 
        /*
-        * Charge if there's enough vtime budget and the existing request
-        * has cost assigned.  Otherwise, account it as debt.  See debt
-        * handling in ioc_rqos_throttle() for details.
+        * Charge if there's enough vtime budget and the existing request has
+        * cost assigned.
         */
        if (rq->bio && rq->bio->bi_iocost_cost &&
-           time_before_eq64(atomic64_read(&iocg->vtime) + cost, now.vnow))
+           time_before_eq64(atomic64_read(&iocg->vtime) + cost, now.vnow)) {
                iocg_commit_bio(iocg, bio, cost);
-       else
-               atomic64_add(abs_cost, &iocg->abs_vdebt);
+               return;
+       }
+
+       /*
+        * Otherwise, account it as debt if @iocg is online, which it should
+        * be for the vast majority of cases. See debt handling in
+        * ioc_rqos_throttle() for details.
+        */
+       spin_lock_irqsave(&iocg->waitq.lock, flags);
+       if (likely(!list_empty(&iocg->active_list))) {
+               iocg->abs_vdebt += abs_cost;
+               iocg_kick_delay(iocg, &now, cost);
+       } else {
+               iocg_commit_bio(iocg, bio, cost);
+       }
+       spin_unlock_irqrestore(&iocg->waitq.lock, flags);
 }
 
 static void ioc_rqos_done_bio(struct rq_qos *rqos, struct bio *bio)
@@ -1998,7 +2024,6 @@ static void ioc_pd_init(struct blkg_policy_data *pd)
        iocg->ioc = ioc;
        atomic64_set(&iocg->vtime, now.vnow);
        atomic64_set(&iocg->done_vtime, now.vnow);
-       atomic64_set(&iocg->abs_vdebt, 0);
        atomic64_set(&iocg->active_period, atomic64_read(&ioc->cur_period));
        INIT_LIST_HEAD(&iocg->active_list);
        iocg->hweight_active = HWEIGHT_WHOLE;
index 376d7ed..3c734b8 100644 (file)
@@ -287,7 +287,7 @@ static void exit_tfm(struct crypto_skcipher *tfm)
        crypto_free_skcipher(ctx->child);
 }
 
-static void free(struct skcipher_instance *inst)
+static void free_inst(struct skcipher_instance *inst)
 {
        crypto_drop_skcipher(skcipher_instance_ctx(inst));
        kfree(inst);
@@ -400,12 +400,12 @@ static int create(struct crypto_template *tmpl, struct rtattr **tb)
        inst->alg.encrypt = encrypt;
        inst->alg.decrypt = decrypt;
 
-       inst->free = free;
+       inst->free = free_inst;
 
        err = skcipher_register_instance(tmpl, inst);
        if (err) {
 err_free_inst:
-               free(inst);
+               free_inst(inst);
        }
        return err;
 }
index dbdd8af..6d8cea9 100644 (file)
@@ -322,7 +322,7 @@ static void exit_tfm(struct crypto_skcipher *tfm)
        crypto_free_cipher(ctx->tweak);
 }
 
-static void free(struct skcipher_instance *inst)
+static void free_inst(struct skcipher_instance *inst)
 {
        crypto_drop_skcipher(skcipher_instance_ctx(inst));
        kfree(inst);
@@ -434,12 +434,12 @@ static int create(struct crypto_template *tmpl, struct rtattr **tb)
        inst->alg.encrypt = encrypt;
        inst->alg.decrypt = decrypt;
 
-       inst->free = free;
+       inst->free = free_inst;
 
        err = skcipher_register_instance(tmpl, inst);
        if (err) {
 err_free_inst:
-               free(inst);
+               free_inst(inst);
        }
        return err;
 }
index fe15236..8558b62 100644 (file)
@@ -645,6 +645,7 @@ static void amba_device_initialize(struct amba_device *dev, const char *name)
        dev->dev.release = amba_device_release;
        dev->dev.bus = &amba_bustype;
        dev->dev.dma_mask = &dev->dev.coherent_dma_mask;
+       dev->dev.dma_parms = &dev->dma_parms;
        dev->res.name = dev_name(&dev->dev);
 }
 
index e977041..dcfbe72 100644 (file)
@@ -256,7 +256,8 @@ static int try_to_bring_up_master(struct master *master,
        ret = master->ops->bind(master->dev);
        if (ret < 0) {
                devres_release_group(master->dev, NULL);
-               dev_info(master->dev, "master bind failed: %d\n", ret);
+               if (ret != -EPROBE_DEFER)
+                       dev_info(master->dev, "master bind failed: %d\n", ret);
                return ret;
        }
 
@@ -611,8 +612,9 @@ static int component_bind(struct component *component, struct master *master,
                devres_release_group(component->dev, NULL);
                devres_release_group(master->dev, NULL);
 
-               dev_err(master->dev, "failed to bind %s (ops %ps): %d\n",
-                       dev_name(component->dev), component->ops, ret);
+               if (ret != -EPROBE_DEFER)
+                       dev_err(master->dev, "failed to bind %s (ops %ps): %d\n",
+                               dev_name(component->dev), component->ops, ret);
        }
 
        return ret;
index 139cdf7..073045c 100644 (file)
@@ -2370,6 +2370,11 @@ u32 fw_devlink_get_flags(void)
        return fw_devlink_flags;
 }
 
+static bool fw_devlink_is_permissive(void)
+{
+       return fw_devlink_flags == DL_FLAG_SYNC_STATE_ONLY;
+}
+
 /**
  * device_add - add device to device hierarchy.
  * @dev: device.
@@ -2524,7 +2529,7 @@ int device_add(struct device *dev)
        if (fw_devlink_flags && is_fwnode_dev &&
            fwnode_has_op(dev->fwnode, add_links)) {
                fw_ret = fwnode_call_int_op(dev->fwnode, add_links, dev);
-               if (fw_ret == -ENODEV)
+               if (fw_ret == -ENODEV && !fw_devlink_is_permissive())
                        device_link_wait_for_mandatory_supplier(dev);
                else if (fw_ret)
                        device_link_wait_for_optional_supplier(dev);
index 06ec0e8..94037be 100644 (file)
@@ -224,17 +224,9 @@ static int deferred_devs_show(struct seq_file *s, void *data)
 }
 DEFINE_SHOW_ATTRIBUTE(deferred_devs);
 
-#ifdef CONFIG_MODULES
-/*
- * In the case of modules, set the default probe timeout to
- * 30 seconds to give userland some time to load needed modules
- */
-int driver_deferred_probe_timeout = 30;
-#else
-/* In the case of !modules, no probe timeout needed */
-int driver_deferred_probe_timeout = -1;
-#endif
+int driver_deferred_probe_timeout;
 EXPORT_SYMBOL_GPL(driver_deferred_probe_timeout);
+static DECLARE_WAIT_QUEUE_HEAD(probe_timeout_waitqueue);
 
 static int __init deferred_probe_timeout_setup(char *str)
 {
@@ -266,8 +258,8 @@ int driver_deferred_probe_check_state(struct device *dev)
                return -ENODEV;
        }
 
-       if (!driver_deferred_probe_timeout) {
-               dev_WARN(dev, "deferred probe timeout, ignoring dependency");
+       if (!driver_deferred_probe_timeout && initcalls_done) {
+               dev_warn(dev, "deferred probe timeout, ignoring dependency");
                return -ETIMEDOUT;
        }
 
@@ -284,6 +276,7 @@ static void deferred_probe_timeout_work_func(struct work_struct *work)
 
        list_for_each_entry_safe(private, p, &deferred_probe_pending_list, deferred_probe)
                dev_info(private->device, "deferred probe pending");
+       wake_up(&probe_timeout_waitqueue);
 }
 static DECLARE_DELAYED_WORK(deferred_probe_timeout_work, deferred_probe_timeout_work_func);
 
@@ -658,6 +651,9 @@ int driver_probe_done(void)
  */
 void wait_for_device_probe(void)
 {
+       /* wait for probe timeout */
+       wait_event(probe_timeout_waitqueue, !driver_deferred_probe_timeout);
+
        /* wait for the deferred probe workqueue to finish */
        flush_work(&deferred_probe_work);
 
index 5255550..b27d0f6 100644 (file)
@@ -380,6 +380,8 @@ struct platform_object {
  */
 static void setup_pdev_dma_masks(struct platform_device *pdev)
 {
+       pdev->dev.dma_parms = &pdev->dma_parms;
+
        if (!pdev->dev.coherent_dma_mask)
                pdev->dev.coherent_dma_mask = DMA_BIT_MASK(32);
        if (!pdev->dev.dma_mask) {
index b38359c..eb2ab05 100644 (file)
@@ -812,10 +812,9 @@ int mhi_register_controller(struct mhi_controller *mhi_cntrl,
        if (!mhi_cntrl)
                return -EINVAL;
 
-       if (!mhi_cntrl->runtime_get || !mhi_cntrl->runtime_put)
-               return -EINVAL;
-
-       if (!mhi_cntrl->status_cb || !mhi_cntrl->link_status)
+       if (!mhi_cntrl->runtime_get || !mhi_cntrl->runtime_put ||
+           !mhi_cntrl->status_cb || !mhi_cntrl->read_reg ||
+           !mhi_cntrl->write_reg)
                return -EINVAL;
 
        ret = parse_config(mhi_cntrl, config);
index 5deadfa..095d95b 100644 (file)
@@ -11,9 +11,6 @@
 
 extern struct bus_type mhi_bus_type;
 
-/* MHI MMIO register mapping */
-#define PCI_INVALID_READ(val) (val == U32_MAX)
-
 #define MHIREGLEN (0x0)
 #define MHIREGLEN_MHIREGLEN_MASK (0xFFFFFFFF)
 #define MHIREGLEN_MHIREGLEN_SHIFT (0)
index eb4256b..97e06cc 100644 (file)
 int __must_check mhi_read_reg(struct mhi_controller *mhi_cntrl,
                              void __iomem *base, u32 offset, u32 *out)
 {
-       u32 tmp = readl(base + offset);
-
-       /* If there is any unexpected value, query the link status */
-       if (PCI_INVALID_READ(tmp) &&
-           mhi_cntrl->link_status(mhi_cntrl))
-               return -EIO;
-
-       *out = tmp;
-
-       return 0;
+       return mhi_cntrl->read_reg(mhi_cntrl, base + offset, out);
 }
 
 int __must_check mhi_read_reg_field(struct mhi_controller *mhi_cntrl,
@@ -49,7 +40,7 @@ int __must_check mhi_read_reg_field(struct mhi_controller *mhi_cntrl,
 void mhi_write_reg(struct mhi_controller *mhi_cntrl, void __iomem *base,
                   u32 offset, u32 val)
 {
-       writel(val, base + offset);
+       mhi_cntrl->write_reg(mhi_cntrl, base + offset, val);
 }
 
 void mhi_write_reg_field(struct mhi_controller *mhi_cntrl, void __iomem *base,
@@ -294,7 +285,7 @@ void mhi_create_devices(struct mhi_controller *mhi_cntrl)
                    !(mhi_chan->ee_mask & BIT(mhi_cntrl->ee)))
                        continue;
                mhi_dev = mhi_alloc_device(mhi_cntrl);
-               if (!mhi_dev)
+               if (IS_ERR(mhi_dev))
                        return;
 
                mhi_dev->dev_type = MHI_DEVICE_XFER;
@@ -336,7 +327,8 @@ void mhi_create_devices(struct mhi_controller *mhi_cntrl)
 
                /* Channel name is same for both UL and DL */
                mhi_dev->chan_name = mhi_chan->name;
-               dev_set_name(&mhi_dev->dev, "%04x_%s", mhi_chan->chan,
+               dev_set_name(&mhi_dev->dev, "%s_%s",
+                            dev_name(mhi_cntrl->cntrl_dev),
                             mhi_dev->chan_name);
 
                /* Init wakeup source if available */
index 52690cb..dc83d65 100644 (file)
@@ -902,7 +902,11 @@ int mhi_sync_power_up(struct mhi_controller *mhi_cntrl)
                           MHI_PM_IN_ERROR_STATE(mhi_cntrl->pm_state),
                           msecs_to_jiffies(mhi_cntrl->timeout_ms));
 
-       return (MHI_IN_MISSION_MODE(mhi_cntrl->ee)) ? 0 : -EIO;
+       ret = (MHI_IN_MISSION_MODE(mhi_cntrl->ee)) ? 0 : -ETIMEDOUT;
+       if (ret)
+               mhi_power_down(mhi_cntrl, false);
+
+       return ret;
 }
 EXPORT_SYMBOL(mhi_sync_power_up);
 
index e92b352..43d9e24 100644 (file)
@@ -673,41 +673,14 @@ int chcr_ktls_cpl_set_tcb_rpl(struct adapter *adap, unsigned char *input)
        return 0;
 }
 
-/*
- * chcr_write_cpl_set_tcb_ulp: update tcb values.
- * TCB is responsible to create tcp headers, so all the related values
- * should be correctly updated.
- * @tx_info - driver specific tls info.
- * @q - tx queue on which packet is going out.
- * @tid - TCB identifier.
- * @pos - current index where should we start writing.
- * @word - TCB word.
- * @mask - TCB word related mask.
- * @val - TCB word related value.
- * @reply - set 1 if looking for TP response.
- * return - next position to write.
- */
-static void *chcr_write_cpl_set_tcb_ulp(struct chcr_ktls_info *tx_info,
-                                       struct sge_eth_txq *q, u32 tid,
-                                       void *pos, u16 word, u64 mask,
+static void *__chcr_write_cpl_set_tcb_ulp(struct chcr_ktls_info *tx_info,
+                                       u32 tid, void *pos, u16 word, u64 mask,
                                        u64 val, u32 reply)
 {
        struct cpl_set_tcb_field_core *cpl;
        struct ulptx_idata *idata;
        struct ulp_txpkt *txpkt;
-       void *save_pos = NULL;
-       u8 buf[48] = {0};
-       int left;
 
-       left = (void *)q->q.stat - pos;
-       if (unlikely(left < CHCR_SET_TCB_FIELD_LEN)) {
-               if (!left) {
-                       pos = q->q.desc;
-               } else {
-                       save_pos = pos;
-                       pos = buf;
-               }
-       }
        /* ULP_TXPKT */
        txpkt = pos;
        txpkt->cmd_dest = htonl(ULPTX_CMD_V(ULP_TX_PKT) | ULP_TXPKT_DEST_V(0));
@@ -732,18 +705,54 @@ static void *chcr_write_cpl_set_tcb_ulp(struct chcr_ktls_info *tx_info,
        idata = (struct ulptx_idata *)(cpl + 1);
        idata->cmd_more = htonl(ULPTX_CMD_V(ULP_TX_SC_NOOP));
        idata->len = htonl(0);
+       pos = idata + 1;
 
-       if (save_pos) {
-               pos = chcr_copy_to_txd(buf, &q->q, save_pos,
-                                      CHCR_SET_TCB_FIELD_LEN);
-       } else {
-               /* check again if we are at the end of the queue */
-               if (left == CHCR_SET_TCB_FIELD_LEN)
+       return pos;
+}
+
+
+/*
+ * chcr_write_cpl_set_tcb_ulp: update tcb values.
+ * TCB is responsible to create tcp headers, so all the related values
+ * should be correctly updated.
+ * @tx_info - driver specific tls info.
+ * @q - tx queue on which packet is going out.
+ * @tid - TCB identifier.
+ * @pos - current index where should we start writing.
+ * @word - TCB word.
+ * @mask - TCB word related mask.
+ * @val - TCB word related value.
+ * @reply - set 1 if looking for TP response.
+ * return - next position to write.
+ */
+static void *chcr_write_cpl_set_tcb_ulp(struct chcr_ktls_info *tx_info,
+                                       struct sge_eth_txq *q, u32 tid,
+                                       void *pos, u16 word, u64 mask,
+                                       u64 val, u32 reply)
+{
+       int left = (void *)q->q.stat - pos;
+
+       if (unlikely(left < CHCR_SET_TCB_FIELD_LEN)) {
+               if (!left) {
                        pos = q->q.desc;
-               else
-                       pos = idata + 1;
+               } else {
+                       u8 buf[48] = {0};
+
+                       __chcr_write_cpl_set_tcb_ulp(tx_info, tid, buf, word,
+                                                    mask, val, reply);
+
+                       return chcr_copy_to_txd(buf, &q->q, pos,
+                                               CHCR_SET_TCB_FIELD_LEN);
+               }
        }
 
+       pos = __chcr_write_cpl_set_tcb_ulp(tx_info, tid, pos, word,
+                                          mask, val, reply);
+
+       /* check again if we are at the end of the queue */
+       if (left == CHCR_SET_TCB_FIELD_LEN)
+               pos = q->q.desc;
+
        return pos;
 }
 
index 31f9f0e..55b031d 100644 (file)
@@ -16,7 +16,7 @@
 int efi_tpm_final_log_size;
 EXPORT_SYMBOL(efi_tpm_final_log_size);
 
-static int tpm2_calc_event_log_size(void *data, int count, void *size_info)
+static int __init tpm2_calc_event_log_size(void *data, int count, void *size_info)
 {
        struct tcg_pcr_event2_head *header;
        int event_size, size = 0;
index 5638b4e..4269ea9 100644 (file)
@@ -531,7 +531,7 @@ static int pca953x_gpio_set_config(struct gpio_chip *gc, unsigned int offset,
 {
        struct pca953x_chip *chip = gpiochip_get_data(gc);
 
-       switch (config) {
+       switch (pinconf_to_config_param(config)) {
        case PIN_CONFIG_BIAS_PULL_UP:
        case PIN_CONFIG_BIAS_PULL_DOWN:
                return pca953x_gpio_set_pull_up_down(chip, offset, config);
index acb99ef..8656815 100644 (file)
@@ -368,6 +368,7 @@ static void tegra_gpio_irq_shutdown(struct irq_data *d)
        struct tegra_gpio_info *tgi = bank->tgi;
        unsigned int gpio = d->hwirq;
 
+       tegra_gpio_irq_mask(d);
        gpiochip_unlock_as_irq(&tgi->gc, gpio);
 }
 
index 40f2d7f..182136d 100644 (file)
@@ -1158,8 +1158,19 @@ static void gpio_desc_to_lineinfo(struct gpio_desc *desc,
                                  struct gpioline_info *info)
 {
        struct gpio_chip *gc = desc->gdev->chip;
+       bool ok_for_pinctrl;
        unsigned long flags;
 
+       /*
+        * This function takes a mutex so we must check this before taking
+        * the spinlock.
+        *
+        * FIXME: find a non-racy way to retrieve this information. Maybe a
+        * lock common to both frameworks?
+        */
+       ok_for_pinctrl =
+               pinctrl_gpio_can_use_line(gc->base + info->line_offset);
+
        spin_lock_irqsave(&gpio_lock, flags);
 
        if (desc->name) {
@@ -1186,7 +1197,7 @@ static void gpio_desc_to_lineinfo(struct gpio_desc *desc,
            test_bit(FLAG_USED_AS_IRQ, &desc->flags) ||
            test_bit(FLAG_EXPORT, &desc->flags) ||
            test_bit(FLAG_SYSFS, &desc->flags) ||
-           !pinctrl_gpio_can_use_line(gc->base + info->line_offset))
+           !ok_for_pinctrl)
                info->flags |= GPIOLINE_FLAG_KERNEL;
        if (test_bit(FLAG_IS_OUT, &desc->flags))
                info->flags |= GPIOLINE_FLAG_IS_OUT;
@@ -1227,6 +1238,7 @@ static long gpio_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
        void __user *ip = (void __user *)arg;
        struct gpio_desc *desc;
        __u32 offset;
+       int hwgpio;
 
        /* We fail any subsequent ioctl():s when the chip is gone */
        if (!gc)
@@ -1259,13 +1271,19 @@ static long gpio_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                if (IS_ERR(desc))
                        return PTR_ERR(desc);
 
+               hwgpio = gpio_chip_hwgpio(desc);
+
+               if (cmd == GPIO_GET_LINEINFO_WATCH_IOCTL &&
+                   test_bit(hwgpio, priv->watched_lines))
+                       return -EBUSY;
+
                gpio_desc_to_lineinfo(desc, &lineinfo);
 
                if (copy_to_user(ip, &lineinfo, sizeof(lineinfo)))
                        return -EFAULT;
 
                if (cmd == GPIO_GET_LINEINFO_WATCH_IOCTL)
-                       set_bit(gpio_chip_hwgpio(desc), priv->watched_lines);
+                       set_bit(hwgpio, priv->watched_lines);
 
                return 0;
        } else if (cmd == GPIO_GET_LINEHANDLE_IOCTL) {
@@ -1280,7 +1298,12 @@ static long gpio_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                if (IS_ERR(desc))
                        return PTR_ERR(desc);
 
-               clear_bit(gpio_chip_hwgpio(desc), priv->watched_lines);
+               hwgpio = gpio_chip_hwgpio(desc);
+
+               if (!test_bit(hwgpio, priv->watched_lines))
+                       return -EBUSY;
+
+               clear_bit(hwgpio, priv->watched_lines);
                return 0;
        }
        return -EINVAL;
@@ -5289,8 +5312,9 @@ static int __init gpiolib_dev_init(void)
        gpiolib_initialized = true;
        gpiochip_setup_devs();
 
-       if (IS_ENABLED(CONFIG_OF_DYNAMIC))
-               WARN_ON(of_reconfig_notifier_register(&gpio_of_notifier));
+#if IS_ENABLED(CONFIG_OF_DYNAMIC) && IS_ENABLED(CONFIG_OF_GPIO)
+       WARN_ON(of_reconfig_notifier_register(&gpio_of_notifier));
+#endif /* CONFIG_OF_DYNAMIC && CONFIG_OF_GPIO */
 
        return ret;
 }
index f84f9e3..affde2d 100644 (file)
@@ -3372,15 +3372,12 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
                }
        }
 
-       amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
-       amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
-
-       amdgpu_amdkfd_suspend(adev, !fbcon);
-
        amdgpu_ras_suspend(adev);
 
        r = amdgpu_device_ip_suspend_phase1(adev);
 
+       amdgpu_amdkfd_suspend(adev, !fbcon);
+
        /* evict vram memory */
        amdgpu_bo_evict_vram(adev);
 
index 94c29b7..9c83c13 100644 (file)
@@ -2008,17 +2008,22 @@ void amdgpu_dm_update_connector_after_detect(
                dc_sink_retain(aconnector->dc_sink);
                if (sink->dc_edid.length == 0) {
                        aconnector->edid = NULL;
-                       drm_dp_cec_unset_edid(&aconnector->dm_dp_aux.aux);
+                       if (aconnector->dc_link->aux_mode) {
+                               drm_dp_cec_unset_edid(
+                                       &aconnector->dm_dp_aux.aux);
+                       }
                } else {
                        aconnector->edid =
-                               (struct edid *) sink->dc_edid.raw_edid;
-
+                               (struct edid *)sink->dc_edid.raw_edid;
 
                        drm_connector_update_edid_property(connector,
-                                       aconnector->edid);
-                       drm_dp_cec_set_edid(&aconnector->dm_dp_aux.aux,
-                                           aconnector->edid);
+                                                          aconnector->edid);
+
+                       if (aconnector->dc_link->aux_mode)
+                               drm_dp_cec_set_edid(&aconnector->dm_dp_aux.aux,
+                                                   aconnector->edid);
                }
+
                amdgpu_dm_update_freesync_caps(connector, aconnector->edid);
                update_connector_ext_caps(aconnector);
        } else {
index 8489f1e..47431ca 100644 (file)
@@ -834,11 +834,10 @@ static void disable_dangling_plane(struct dc *dc, struct dc_state *context)
 static void wait_for_no_pipes_pending(struct dc *dc, struct dc_state *context)
 {
        int i;
-       int count = 0;
-       struct pipe_ctx *pipe;
        PERF_TRACE();
        for (i = 0; i < MAX_PIPES; i++) {
-               pipe = &context->res_ctx.pipe_ctx[i];
+               int count = 0;
+               struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
 
                if (!pipe->plane_state)
                        continue;
index c3535bd..e4348e3 100644 (file)
@@ -3068,25 +3068,32 @@ validate_out:
        return out;
 }
 
-
-bool dcn20_validate_bandwidth(struct dc *dc, struct dc_state *context,
-               bool fast_validate)
+/*
+ * This must be noinline to ensure anything that deals with FP registers
+ * is contained within this call; previously our compiling with hard-float
+ * would result in fp instructions being emitted outside of the boundaries
+ * of the DC_FP_START/END macros, which makes sense as the compiler has no
+ * idea about what is wrapped and what is not
+ *
+ * This is largely just a workaround to avoid breakage introduced with 5.6,
+ * ideally all fp-using code should be moved into its own file, only that
+ * should be compiled with hard-float, and all code exported from there
+ * should be strictly wrapped with DC_FP_START/END
+ */
+static noinline bool dcn20_validate_bandwidth_fp(struct dc *dc,
+               struct dc_state *context, bool fast_validate)
 {
        bool voltage_supported = false;
        bool full_pstate_supported = false;
        bool dummy_pstate_supported = false;
        double p_state_latency_us;
 
-       DC_FP_START();
        p_state_latency_us = context->bw_ctx.dml.soc.dram_clock_change_latency_us;
        context->bw_ctx.dml.soc.disable_dram_clock_change_vactive_support =
                dc->debug.disable_dram_clock_change_vactive_support;
 
        if (fast_validate) {
-               voltage_supported = dcn20_validate_bandwidth_internal(dc, context, true);
-
-               DC_FP_END();
-               return voltage_supported;
+               return dcn20_validate_bandwidth_internal(dc, context, true);
        }
 
        // Best case, we support full UCLK switch latency
@@ -3115,7 +3122,15 @@ bool dcn20_validate_bandwidth(struct dc *dc, struct dc_state *context,
 
 restore_dml_state:
        context->bw_ctx.dml.soc.dram_clock_change_latency_us = p_state_latency_us;
+       return voltage_supported;
+}
 
+bool dcn20_validate_bandwidth(struct dc *dc, struct dc_state *context,
+               bool fast_validate)
+{
+       bool voltage_supported = false;
+       DC_FP_START();
+       voltage_supported = dcn20_validate_bandwidth_fp(dc, context, fast_validate);
        DC_FP_END();
        return voltage_supported;
 }
index a38baa7..b8ec08e 100644 (file)
@@ -1200,7 +1200,7 @@ static void dml_rq_dlg_get_dlg_params(
        min_hratio_fact_l = 1.0;
        min_hratio_fact_c = 1.0;
 
-       if (htaps_l <= 1)
+       if (hratio_l <= 1)
                min_hratio_fact_l = 2.0;
        else if (htaps_l <= 6) {
                if ((hratio_l * 2.0) > 4.0)
@@ -1216,7 +1216,7 @@ static void dml_rq_dlg_get_dlg_params(
 
        hscale_pixel_rate_l = min_hratio_fact_l * dppclk_freq_in_mhz;
 
-       if (htaps_c <= 1)
+       if (hratio_c <= 1)
                min_hratio_fact_c = 2.0;
        else if (htaps_c <= 6) {
                if ((hratio_c * 2.0) > 4.0)
@@ -1522,8 +1522,8 @@ static void dml_rq_dlg_get_dlg_params(
 
        disp_dlg_regs->refcyc_per_vm_group_vblank   = get_refcyc_per_vm_group_vblank(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz;
        disp_dlg_regs->refcyc_per_vm_group_flip     = get_refcyc_per_vm_group_flip(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz;
-       disp_dlg_regs->refcyc_per_vm_req_vblank     = get_refcyc_per_vm_req_vblank(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz;
-       disp_dlg_regs->refcyc_per_vm_req_flip       = get_refcyc_per_vm_req_flip(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz;
+       disp_dlg_regs->refcyc_per_vm_req_vblank     = get_refcyc_per_vm_req_vblank(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz * dml_pow(2, 10);
+       disp_dlg_regs->refcyc_per_vm_req_flip       = get_refcyc_per_vm_req_flip(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz * dml_pow(2, 10);
 
        // Clamp to max for now
        if (disp_dlg_regs->refcyc_per_vm_group_vblank >= (unsigned int)dml_pow(2, 23))
index c34eba1..6d7bca5 100644 (file)
 #define ASSERT(expr) ASSERT_CRITICAL(expr)
 
 #else
-#define ASSERT(expr) WARN_ON(!(expr))
+#define ASSERT(expr) WARN_ON_ONCE(!(expr))
 #endif
 
 #define BREAK_TO_DEBUGGER() ASSERT(0)
index 7f386ad..910108c 100644 (file)
@@ -241,8 +241,12 @@ static int drm_hdcp_request_srm(struct drm_device *drm_dev,
 
        ret = request_firmware_direct(&fw, (const char *)fw_name,
                                      drm_dev->dev);
-       if (ret < 0)
+       if (ret < 0) {
+               *revoked_ksv_cnt = 0;
+               *revoked_ksv_list = NULL;
+               ret = 0;
                goto exit;
+       }
 
        if (fw->size && fw->data)
                ret = drm_hdcp_srm_update(fw->data, fw->size, revoked_ksv_list,
@@ -287,6 +291,8 @@ int drm_hdcp_check_ksvs_revoked(struct drm_device *drm_dev, u8 *ksvs,
 
        ret = drm_hdcp_request_srm(drm_dev, &revoked_ksv_list,
                                   &revoked_ksv_cnt);
+       if (ret)
+               return ret;
 
        /* revoked_ksv_cnt will be zero when above function failed */
        for (i = 0; i < revoked_ksv_cnt; i++)
index 9dfe7cb..1754c05 100644 (file)
@@ -843,6 +843,7 @@ static const struct of_device_id ingenic_drm_of_match[] = {
        { .compatible = "ingenic,jz4770-lcd", .data = &jz4770_soc_info },
        { /* sentinel */ },
 };
+MODULE_DEVICE_TABLE(of, ingenic_drm_of_match);
 
 static struct platform_driver ingenic_drm_driver = {
        .driver = {
index 0599397..3eb89f1 100644 (file)
@@ -717,7 +717,7 @@ static void sun6i_dsi_encoder_enable(struct drm_encoder *encoder)
        struct drm_display_mode *mode = &encoder->crtc->state->adjusted_mode;
        struct sun6i_dsi *dsi = encoder_to_sun6i_dsi(encoder);
        struct mipi_dsi_device *device = dsi->device;
-       union phy_configure_opts opts = { };
+       union phy_configure_opts opts = { };
        struct phy_configure_opts_mipi_dphy *cfg = &opts.mipi_dphy;
        u16 delay;
        int err;
index c1824bd..7879ff5 100644 (file)
@@ -221,6 +221,7 @@ struct virtio_gpu_fpriv {
 /* virtio_ioctl.c */
 #define DRM_VIRTIO_NUM_IOCTLS 10
 extern struct drm_ioctl_desc virtio_gpu_ioctls[DRM_VIRTIO_NUM_IOCTLS];
+void virtio_gpu_create_context(struct drm_device *dev, struct drm_file *file);
 
 /* virtio_kms.c */
 int virtio_gpu_init(struct drm_device *dev);
index 0d6152c..f0d5a89 100644 (file)
@@ -39,6 +39,9 @@ int virtio_gpu_gem_create(struct drm_file *file,
        int ret;
        u32 handle;
 
+       if (vgdev->has_virgl_3d)
+               virtio_gpu_create_context(dev, file);
+
        ret = virtio_gpu_object_create(vgdev, params, &obj, NULL);
        if (ret < 0)
                return ret;
index 3f60bf2..512daff 100644 (file)
@@ -34,8 +34,7 @@
 
 #include "virtgpu_drv.h"
 
-static void virtio_gpu_create_context(struct drm_device *dev,
-                                     struct drm_file *file)
+void virtio_gpu_create_context(struct drm_device *dev, struct drm_file *file)
 {
        struct virtio_gpu_device *vgdev = dev->dev_private;
        struct virtio_gpu_fpriv *vfpriv = file->driver_priv;
index a03c6d6..96fb9ff 100644 (file)
@@ -78,7 +78,7 @@ static struct qcom_icc_node *sdm845_osm_l3_nodes[] = {
        [SLAVE_OSM_L3] = &sdm845_osm_l3,
 };
 
-const static struct qcom_icc_desc sdm845_icc_osm_l3 = {
+static const struct qcom_icc_desc sdm845_icc_osm_l3 = {
        .nodes = sdm845_osm_l3_nodes,
        .num_nodes = ARRAY_SIZE(sdm845_osm_l3_nodes),
 };
@@ -91,7 +91,7 @@ static struct qcom_icc_node *sc7180_osm_l3_nodes[] = {
        [SLAVE_OSM_L3] = &sc7180_osm_l3,
 };
 
-const static struct qcom_icc_desc sc7180_icc_osm_l3 = {
+static const struct qcom_icc_desc sc7180_icc_osm_l3 = {
        .nodes = sc7180_osm_l3_nodes,
        .num_nodes = ARRAY_SIZE(sc7180_osm_l3_nodes),
 };
index b013b80..f6c7b96 100644 (file)
@@ -192,7 +192,7 @@ static struct qcom_icc_node *aggre1_noc_nodes[] = {
        [SLAVE_ANOC_PCIE_A1NOC_SNOC] = &qns_pcie_a1noc_snoc,
 };
 
-const static struct qcom_icc_desc sdm845_aggre1_noc = {
+static const struct qcom_icc_desc sdm845_aggre1_noc = {
        .nodes = aggre1_noc_nodes,
        .num_nodes = ARRAY_SIZE(aggre1_noc_nodes),
        .bcms = aggre1_noc_bcms,
@@ -220,7 +220,7 @@ static struct qcom_icc_node *aggre2_noc_nodes[] = {
        [SLAVE_SERVICE_A2NOC] = &srvc_aggre2_noc,
 };
 
-const static struct qcom_icc_desc sdm845_aggre2_noc = {
+static const struct qcom_icc_desc sdm845_aggre2_noc = {
        .nodes = aggre2_noc_nodes,
        .num_nodes = ARRAY_SIZE(aggre2_noc_nodes),
        .bcms = aggre2_noc_bcms,
@@ -281,7 +281,7 @@ static struct qcom_icc_node *config_noc_nodes[] = {
        [SLAVE_SERVICE_CNOC] = &srvc_cnoc,
 };
 
-const static struct qcom_icc_desc sdm845_config_noc = {
+static const struct qcom_icc_desc sdm845_config_noc = {
        .nodes = config_noc_nodes,
        .num_nodes = ARRAY_SIZE(config_noc_nodes),
        .bcms = config_noc_bcms,
@@ -297,7 +297,7 @@ static struct qcom_icc_node *dc_noc_nodes[] = {
        [SLAVE_MEM_NOC_CFG] = &qhs_memnoc,
 };
 
-const static struct qcom_icc_desc sdm845_dc_noc = {
+static const struct qcom_icc_desc sdm845_dc_noc = {
        .nodes = dc_noc_nodes,
        .num_nodes = ARRAY_SIZE(dc_noc_nodes),
        .bcms = dc_noc_bcms,
@@ -315,7 +315,7 @@ static struct qcom_icc_node *gladiator_noc_nodes[] = {
        [SLAVE_SERVICE_GNOC] = &srvc_gnoc,
 };
 
-const static struct qcom_icc_desc sdm845_gladiator_noc = {
+static const struct qcom_icc_desc sdm845_gladiator_noc = {
        .nodes = gladiator_noc_nodes,
        .num_nodes = ARRAY_SIZE(gladiator_noc_nodes),
        .bcms = gladiator_noc_bcms,
@@ -350,7 +350,7 @@ static struct qcom_icc_node *mem_noc_nodes[] = {
        [SLAVE_EBI1] = &ebi,
 };
 
-const static struct qcom_icc_desc sdm845_mem_noc = {
+static const struct qcom_icc_desc sdm845_mem_noc = {
        .nodes = mem_noc_nodes,
        .num_nodes = ARRAY_SIZE(mem_noc_nodes),
        .bcms = mem_noc_bcms,
@@ -384,7 +384,7 @@ static struct qcom_icc_node *mmss_noc_nodes[] = {
        [SLAVE_CAMNOC_UNCOMP] = &qns_camnoc_uncomp,
 };
 
-const static struct qcom_icc_desc sdm845_mmss_noc = {
+static const struct qcom_icc_desc sdm845_mmss_noc = {
        .nodes = mmss_noc_nodes,
        .num_nodes = ARRAY_SIZE(mmss_noc_nodes),
        .bcms = mmss_noc_bcms,
@@ -430,7 +430,7 @@ static struct qcom_icc_node *system_noc_nodes[] = {
        [SLAVE_TCU] = &xs_sys_tcu_cfg,
 };
 
-const static struct qcom_icc_desc sdm845_system_noc = {
+static const struct qcom_icc_desc sdm845_system_noc = {
        .nodes = system_noc_nodes,
        .num_nodes = ARRAY_SIZE(system_noc_nodes),
        .bcms = system_noc_bcms,
index 20cce36..1dc3718 100644 (file)
@@ -101,6 +101,8 @@ struct kmem_cache *amd_iommu_irq_cache;
 static void update_domain(struct protection_domain *domain);
 static int protection_domain_init(struct protection_domain *domain);
 static void detach_device(struct device *dev);
+static void update_and_flush_device_table(struct protection_domain *domain,
+                                         struct domain_pgtable *pgtable);
 
 /****************************************************************************
  *
@@ -151,6 +153,26 @@ static struct protection_domain *to_pdomain(struct iommu_domain *dom)
        return container_of(dom, struct protection_domain, domain);
 }
 
+static void amd_iommu_domain_get_pgtable(struct protection_domain *domain,
+                                        struct domain_pgtable *pgtable)
+{
+       u64 pt_root = atomic64_read(&domain->pt_root);
+
+       pgtable->root = (u64 *)(pt_root & PAGE_MASK);
+       pgtable->mode = pt_root & 7; /* lowest 3 bits encode pgtable mode */
+}
+
+static u64 amd_iommu_domain_encode_pgtable(u64 *root, int mode)
+{
+       u64 pt_root;
+
+       /* lowest 3 bits encode pgtable mode */
+       pt_root = mode & 7;
+       pt_root |= (u64)root;
+
+       return pt_root;
+}
+
 static struct iommu_dev_data *alloc_dev_data(u16 devid)
 {
        struct iommu_dev_data *dev_data;
@@ -1397,13 +1419,18 @@ static struct page *free_sub_pt(unsigned long root, int mode,
 
 static void free_pagetable(struct protection_domain *domain)
 {
-       unsigned long root = (unsigned long)domain->pt_root;
+       struct domain_pgtable pgtable;
        struct page *freelist = NULL;
+       unsigned long root;
 
-       BUG_ON(domain->mode < PAGE_MODE_NONE ||
-              domain->mode > PAGE_MODE_6_LEVEL);
+       amd_iommu_domain_get_pgtable(domain, &pgtable);
+       atomic64_set(&domain->pt_root, 0);
 
-       freelist = free_sub_pt(root, domain->mode, freelist);
+       BUG_ON(pgtable.mode < PAGE_MODE_NONE ||
+              pgtable.mode > PAGE_MODE_6_LEVEL);
+
+       root = (unsigned long)pgtable.root;
+       freelist = free_sub_pt(root, pgtable.mode, freelist);
 
        free_page_list(freelist);
 }
@@ -1417,24 +1444,39 @@ static bool increase_address_space(struct protection_domain *domain,
                                   unsigned long address,
                                   gfp_t gfp)
 {
+       struct domain_pgtable pgtable;
        unsigned long flags;
-       bool ret = false;
-       u64 *pte;
+       bool ret = true;
+       u64 *pte, root;
 
        spin_lock_irqsave(&domain->lock, flags);
 
-       if (address <= PM_LEVEL_SIZE(domain->mode) ||
-           WARN_ON_ONCE(domain->mode == PAGE_MODE_6_LEVEL))
+       amd_iommu_domain_get_pgtable(domain, &pgtable);
+
+       if (address <= PM_LEVEL_SIZE(pgtable.mode))
+               goto out;
+
+       ret = false;
+       if (WARN_ON_ONCE(pgtable.mode == PAGE_MODE_6_LEVEL))
                goto out;
 
        pte = (void *)get_zeroed_page(gfp);
        if (!pte)
                goto out;
 
-       *pte             = PM_LEVEL_PDE(domain->mode,
-                                       iommu_virt_to_phys(domain->pt_root));
-       domain->pt_root  = pte;
-       domain->mode    += 1;
+       *pte = PM_LEVEL_PDE(pgtable.mode, iommu_virt_to_phys(pgtable.root));
+
+       pgtable.root  = pte;
+       pgtable.mode += 1;
+       update_and_flush_device_table(domain, &pgtable);
+       domain_flush_complete(domain);
+
+       /*
+        * Device Table needs to be updated and flushed before the new root can
+        * be published.
+        */
+       root = amd_iommu_domain_encode_pgtable(pte, pgtable.mode);
+       atomic64_set(&domain->pt_root, root);
 
        ret = true;
 
@@ -1451,16 +1493,29 @@ static u64 *alloc_pte(struct protection_domain *domain,
                      gfp_t gfp,
                      bool *updated)
 {
+       struct domain_pgtable pgtable;
        int level, end_lvl;
        u64 *pte, *page;
 
        BUG_ON(!is_power_of_2(page_size));
 
-       while (address > PM_LEVEL_SIZE(domain->mode))
-               *updated = increase_address_space(domain, address, gfp) || *updated;
+       amd_iommu_domain_get_pgtable(domain, &pgtable);
+
+       while (address > PM_LEVEL_SIZE(pgtable.mode)) {
+               /*
+                * Return an error if there is no memory to update the
+                * page-table.
+                */
+               if (!increase_address_space(domain, address, gfp))
+                       return NULL;
+
+               /* Read new values to check if update was successful */
+               amd_iommu_domain_get_pgtable(domain, &pgtable);
+       }
+
 
-       level   = domain->mode - 1;
-       pte     = &domain->pt_root[PM_LEVEL_INDEX(level, address)];
+       level   = pgtable.mode - 1;
+       pte     = &pgtable.root[PM_LEVEL_INDEX(level, address)];
        address = PAGE_SIZE_ALIGN(address, page_size);
        end_lvl = PAGE_SIZE_LEVEL(page_size);
 
@@ -1536,16 +1591,19 @@ static u64 *fetch_pte(struct protection_domain *domain,
                      unsigned long address,
                      unsigned long *page_size)
 {
+       struct domain_pgtable pgtable;
        int level;
        u64 *pte;
 
        *page_size = 0;
 
-       if (address > PM_LEVEL_SIZE(domain->mode))
+       amd_iommu_domain_get_pgtable(domain, &pgtable);
+
+       if (address > PM_LEVEL_SIZE(pgtable.mode))
                return NULL;
 
-       level      =  domain->mode - 1;
-       pte        = &domain->pt_root[PM_LEVEL_INDEX(level, address)];
+       level      =  pgtable.mode - 1;
+       pte        = &pgtable.root[PM_LEVEL_INDEX(level, address)];
        *page_size =  PTE_LEVEL_PAGE_SIZE(level);
 
        while (level > 0) {
@@ -1660,7 +1718,13 @@ out:
                unsigned long flags;
 
                spin_lock_irqsave(&dom->lock, flags);
-               update_domain(dom);
+               /*
+                * Flush domain TLB(s) and wait for completion. Any Device-Table
+                * Updates and flushing already happened in
+                * increase_address_space().
+                */
+               domain_flush_tlb_pde(dom);
+               domain_flush_complete(dom);
                spin_unlock_irqrestore(&dom->lock, flags);
        }
 
@@ -1806,6 +1870,7 @@ static void dma_ops_domain_free(struct protection_domain *domain)
 static struct protection_domain *dma_ops_domain_alloc(void)
 {
        struct protection_domain *domain;
+       u64 *pt_root, root;
 
        domain = kzalloc(sizeof(struct protection_domain), GFP_KERNEL);
        if (!domain)
@@ -1814,12 +1879,14 @@ static struct protection_domain *dma_ops_domain_alloc(void)
        if (protection_domain_init(domain))
                goto free_domain;
 
-       domain->mode = PAGE_MODE_3_LEVEL;
-       domain->pt_root = (void *)get_zeroed_page(GFP_KERNEL);
-       domain->flags = PD_DMA_OPS_MASK;
-       if (!domain->pt_root)
+       pt_root = (void *)get_zeroed_page(GFP_KERNEL);
+       if (!pt_root)
                goto free_domain;
 
+       root = amd_iommu_domain_encode_pgtable(pt_root, PAGE_MODE_3_LEVEL);
+       atomic64_set(&domain->pt_root, root);
+       domain->flags = PD_DMA_OPS_MASK;
+
        if (iommu_get_dma_cookie(&domain->domain) == -ENOMEM)
                goto free_domain;
 
@@ -1841,16 +1908,17 @@ static bool dma_ops_domain(struct protection_domain *domain)
 }
 
 static void set_dte_entry(u16 devid, struct protection_domain *domain,
+                         struct domain_pgtable *pgtable,
                          bool ats, bool ppr)
 {
        u64 pte_root = 0;
        u64 flags = 0;
        u32 old_domid;
 
-       if (domain->mode != PAGE_MODE_NONE)
-               pte_root = iommu_virt_to_phys(domain->pt_root);
+       if (pgtable->mode != PAGE_MODE_NONE)
+               pte_root = iommu_virt_to_phys(pgtable->root);
 
-       pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK)
+       pte_root |= (pgtable->mode & DEV_ENTRY_MODE_MASK)
                    << DEV_ENTRY_MODE_SHIFT;
        pte_root |= DTE_FLAG_IR | DTE_FLAG_IW | DTE_FLAG_V | DTE_FLAG_TV;
 
@@ -1923,6 +1991,7 @@ static void clear_dte_entry(u16 devid)
 static void do_attach(struct iommu_dev_data *dev_data,
                      struct protection_domain *domain)
 {
+       struct domain_pgtable pgtable;
        struct amd_iommu *iommu;
        bool ats;
 
@@ -1938,7 +2007,9 @@ static void do_attach(struct iommu_dev_data *dev_data,
        domain->dev_cnt                 += 1;
 
        /* Update device table */
-       set_dte_entry(dev_data->devid, domain, ats, dev_data->iommu_v2);
+       amd_iommu_domain_get_pgtable(domain, &pgtable);
+       set_dte_entry(dev_data->devid, domain, &pgtable,
+                     ats, dev_data->iommu_v2);
        clone_aliases(dev_data->pdev);
 
        device_flush_dte(dev_data);
@@ -2249,23 +2320,36 @@ static int amd_iommu_domain_get_attr(struct iommu_domain *domain,
  *
  *****************************************************************************/
 
-static void update_device_table(struct protection_domain *domain)
+static void update_device_table(struct protection_domain *domain,
+                               struct domain_pgtable *pgtable)
 {
        struct iommu_dev_data *dev_data;
 
        list_for_each_entry(dev_data, &domain->dev_list, list) {
-               set_dte_entry(dev_data->devid, domain, dev_data->ats.enabled,
-                             dev_data->iommu_v2);
+               set_dte_entry(dev_data->devid, domain, pgtable,
+                             dev_data->ats.enabled, dev_data->iommu_v2);
                clone_aliases(dev_data->pdev);
        }
 }
 
+static void update_and_flush_device_table(struct protection_domain *domain,
+                                         struct domain_pgtable *pgtable)
+{
+       update_device_table(domain, pgtable);
+       domain_flush_devices(domain);
+}
+
 static void update_domain(struct protection_domain *domain)
 {
-       update_device_table(domain);
+       struct domain_pgtable pgtable;
 
-       domain_flush_devices(domain);
+       /* Update device table */
+       amd_iommu_domain_get_pgtable(domain, &pgtable);
+       update_and_flush_device_table(domain, &pgtable);
+
+       /* Flush domain TLB(s) and wait for completion */
        domain_flush_tlb_pde(domain);
+       domain_flush_complete(domain);
 }
 
 int __init amd_iommu_init_api(void)
@@ -2375,6 +2459,7 @@ out_err:
 static struct iommu_domain *amd_iommu_domain_alloc(unsigned type)
 {
        struct protection_domain *pdomain;
+       u64 *pt_root, root;
 
        switch (type) {
        case IOMMU_DOMAIN_UNMANAGED:
@@ -2382,13 +2467,15 @@ static struct iommu_domain *amd_iommu_domain_alloc(unsigned type)
                if (!pdomain)
                        return NULL;
 
-               pdomain->mode    = PAGE_MODE_3_LEVEL;
-               pdomain->pt_root = (void *)get_zeroed_page(GFP_KERNEL);
-               if (!pdomain->pt_root) {
+               pt_root = (void *)get_zeroed_page(GFP_KERNEL);
+               if (!pt_root) {
                        protection_domain_free(pdomain);
                        return NULL;
                }
 
+               root = amd_iommu_domain_encode_pgtable(pt_root, PAGE_MODE_3_LEVEL);
+               atomic64_set(&pdomain->pt_root, root);
+
                pdomain->domain.geometry.aperture_start = 0;
                pdomain->domain.geometry.aperture_end   = ~0ULL;
                pdomain->domain.geometry.force_aperture = true;
@@ -2406,7 +2493,7 @@ static struct iommu_domain *amd_iommu_domain_alloc(unsigned type)
                if (!pdomain)
                        return NULL;
 
-               pdomain->mode = PAGE_MODE_NONE;
+               atomic64_set(&pdomain->pt_root, PAGE_MODE_NONE);
                break;
        default:
                return NULL;
@@ -2418,6 +2505,7 @@ static struct iommu_domain *amd_iommu_domain_alloc(unsigned type)
 static void amd_iommu_domain_free(struct iommu_domain *dom)
 {
        struct protection_domain *domain;
+       struct domain_pgtable pgtable;
 
        domain = to_pdomain(dom);
 
@@ -2435,7 +2523,9 @@ static void amd_iommu_domain_free(struct iommu_domain *dom)
                dma_ops_domain_free(domain);
                break;
        default:
-               if (domain->mode != PAGE_MODE_NONE)
+               amd_iommu_domain_get_pgtable(domain, &pgtable);
+
+               if (pgtable.mode != PAGE_MODE_NONE)
                        free_pagetable(domain);
 
                if (domain->flags & PD_IOMMUV2_MASK)
@@ -2518,10 +2608,12 @@ static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova,
                         gfp_t gfp)
 {
        struct protection_domain *domain = to_pdomain(dom);
+       struct domain_pgtable pgtable;
        int prot = 0;
        int ret;
 
-       if (domain->mode == PAGE_MODE_NONE)
+       amd_iommu_domain_get_pgtable(domain, &pgtable);
+       if (pgtable.mode == PAGE_MODE_NONE)
                return -EINVAL;
 
        if (iommu_prot & IOMMU_READ)
@@ -2541,8 +2633,10 @@ static size_t amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova,
                              struct iommu_iotlb_gather *gather)
 {
        struct protection_domain *domain = to_pdomain(dom);
+       struct domain_pgtable pgtable;
 
-       if (domain->mode == PAGE_MODE_NONE)
+       amd_iommu_domain_get_pgtable(domain, &pgtable);
+       if (pgtable.mode == PAGE_MODE_NONE)
                return 0;
 
        return iommu_unmap_page(domain, iova, page_size);
@@ -2553,9 +2647,11 @@ static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom,
 {
        struct protection_domain *domain = to_pdomain(dom);
        unsigned long offset_mask, pte_pgsize;
+       struct domain_pgtable pgtable;
        u64 *pte, __pte;
 
-       if (domain->mode == PAGE_MODE_NONE)
+       amd_iommu_domain_get_pgtable(domain, &pgtable);
+       if (pgtable.mode == PAGE_MODE_NONE)
                return iova;
 
        pte = fetch_pte(domain, iova, &pte_pgsize);
@@ -2708,16 +2804,26 @@ EXPORT_SYMBOL(amd_iommu_unregister_ppr_notifier);
 void amd_iommu_domain_direct_map(struct iommu_domain *dom)
 {
        struct protection_domain *domain = to_pdomain(dom);
+       struct domain_pgtable pgtable;
        unsigned long flags;
+       u64 pt_root;
 
        spin_lock_irqsave(&domain->lock, flags);
 
+       /* First save pgtable configuration*/
+       amd_iommu_domain_get_pgtable(domain, &pgtable);
+
        /* Update data structure */
-       domain->mode    = PAGE_MODE_NONE;
+       pt_root = amd_iommu_domain_encode_pgtable(NULL, PAGE_MODE_NONE);
+       atomic64_set(&domain->pt_root, pt_root);
 
        /* Make changes visible to IOMMUs */
        update_domain(domain);
 
+       /* Restore old pgtable in domain->ptroot to free page-table */
+       pt_root = amd_iommu_domain_encode_pgtable(pgtable.root, pgtable.mode);
+       atomic64_set(&domain->pt_root, pt_root);
+
        /* Page-table is not visible to IOMMU anymore, so free it */
        free_pagetable(domain);
 
@@ -2908,9 +3014,11 @@ static u64 *__get_gcr3_pte(u64 *root, int level, int pasid, bool alloc)
 static int __set_gcr3(struct protection_domain *domain, int pasid,
                      unsigned long cr3)
 {
+       struct domain_pgtable pgtable;
        u64 *pte;
 
-       if (domain->mode != PAGE_MODE_NONE)
+       amd_iommu_domain_get_pgtable(domain, &pgtable);
+       if (pgtable.mode != PAGE_MODE_NONE)
                return -EINVAL;
 
        pte = __get_gcr3_pte(domain->gcr3_tbl, domain->glx, pasid, true);
@@ -2924,9 +3032,11 @@ static int __set_gcr3(struct protection_domain *domain, int pasid,
 
 static int __clear_gcr3(struct protection_domain *domain, int pasid)
 {
+       struct domain_pgtable pgtable;
        u64 *pte;
 
-       if (domain->mode != PAGE_MODE_NONE)
+       amd_iommu_domain_get_pgtable(domain, &pgtable);
+       if (pgtable.mode != PAGE_MODE_NONE)
                return -EINVAL;
 
        pte = __get_gcr3_pte(domain->gcr3_tbl, domain->glx, pasid, false);
index ca8c452..7a8fdec 100644 (file)
@@ -468,8 +468,7 @@ struct protection_domain {
                                       iommu core code */
        spinlock_t lock;        /* mostly used to lock the page table*/
        u16 id;                 /* the domain id written to the device table */
-       int mode;               /* paging mode (0-6 levels) */
-       u64 *pt_root;           /* page table root pointer */
+       atomic64_t pt_root;     /* pgtable root and pgtable mode */
        int glx;                /* Number of levels for GCR3 table */
        u64 *gcr3_tbl;          /* Guest CR3 table */
        unsigned long flags;    /* flags to find out type of domain */
@@ -477,6 +476,12 @@ struct protection_domain {
        unsigned dev_iommu[MAX_IOMMUS]; /* per-IOMMU reference count */
 };
 
+/* For decocded pt_root */
+struct domain_pgtable {
+       int mode;
+       u64 *root;
+};
+
 /*
  * Structure where we save information about one hardware AMD IOMMU in the
  * system.
index d5cac4f..4e1d11a 100644 (file)
@@ -453,7 +453,7 @@ static int viommu_add_resv_mem(struct viommu_endpoint *vdev,
        if (!region)
                return -ENOMEM;
 
-       list_add(&vdev->resv_regions, &region->list);
+       list_add(&region->list, &vdev->resv_regions);
        return 0;
 }
 
index 668418d..f620442 100644 (file)
@@ -1465,6 +1465,13 @@ static const struct mei_cfg mei_me_pch12_cfg = {
        MEI_CFG_DMA_128,
 };
 
+/* LBG with quirk for SPS Firmware exclusion */
+static const struct mei_cfg mei_me_pch12_sps_cfg = {
+       MEI_CFG_PCH8_HFS,
+       MEI_CFG_FW_VER_SUPP,
+       MEI_CFG_FW_SPS,
+};
+
 /* Tiger Lake and newer devices */
 static const struct mei_cfg mei_me_pch15_cfg = {
        MEI_CFG_PCH8_HFS,
@@ -1487,6 +1494,7 @@ static const struct mei_cfg *const mei_cfg_list[] = {
        [MEI_ME_PCH8_CFG] = &mei_me_pch8_cfg,
        [MEI_ME_PCH8_SPS_CFG] = &mei_me_pch8_sps_cfg,
        [MEI_ME_PCH12_CFG] = &mei_me_pch12_cfg,
+       [MEI_ME_PCH12_SPS_CFG] = &mei_me_pch12_sps_cfg,
        [MEI_ME_PCH15_CFG] = &mei_me_pch15_cfg,
 };
 
index 4a8d4dc..b6b94e2 100644 (file)
@@ -80,6 +80,9 @@ struct mei_me_hw {
  *                         servers platforms with quirk for
  *                         SPS firmware exclusion.
  * @MEI_ME_PCH12_CFG:      Platform Controller Hub Gen12 and newer
+ * @MEI_ME_PCH12_SPS_CFG:  Platform Controller Hub Gen12 and newer
+ *                         servers platforms with quirk for
+ *                         SPS firmware exclusion.
  * @MEI_ME_PCH15_CFG:      Platform Controller Hub Gen15 and newer
  * @MEI_ME_NUM_CFG:        Upper Sentinel.
  */
@@ -93,6 +96,7 @@ enum mei_cfg_idx {
        MEI_ME_PCH8_CFG,
        MEI_ME_PCH8_SPS_CFG,
        MEI_ME_PCH12_CFG,
+       MEI_ME_PCH12_SPS_CFG,
        MEI_ME_PCH15_CFG,
        MEI_ME_NUM_CFG,
 };
index 0c390fe..a1ed375 100644 (file)
@@ -70,7 +70,7 @@ static const struct pci_device_id mei_me_pci_tbl[] = {
        {MEI_PCI_DEVICE(MEI_DEV_ID_SPT_2, MEI_ME_PCH8_CFG)},
        {MEI_PCI_DEVICE(MEI_DEV_ID_SPT_H, MEI_ME_PCH8_SPS_CFG)},
        {MEI_PCI_DEVICE(MEI_DEV_ID_SPT_H_2, MEI_ME_PCH8_SPS_CFG)},
-       {MEI_PCI_DEVICE(MEI_DEV_ID_LBG, MEI_ME_PCH12_CFG)},
+       {MEI_PCI_DEVICE(MEI_DEV_ID_LBG, MEI_ME_PCH12_SPS_CFG)},
 
        {MEI_PCI_DEVICE(MEI_DEV_ID_BXT_M, MEI_ME_PCH8_CFG)},
        {MEI_PCI_DEVICE(MEI_DEV_ID_APL_I, MEI_ME_PCH8_CFG)},
index 06426fc..f781c46 100644 (file)
@@ -1483,7 +1483,7 @@ static void __exit most_exit(void)
        ida_destroy(&mdev_id);
 }
 
-module_init(most_init);
+subsys_initcall(most_init);
 module_exit(most_exit);
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Christian Gromm <christian.gromm@microchip.com>");
index 6435020..51185e4 100644 (file)
@@ -24,8 +24,8 @@ config NET_DSA_MV88E6XXX_PTP
        bool "PTP support for Marvell 88E6xxx"
        default n
        depends on NET_DSA_MV88E6XXX_GLOBAL2
+       depends on PTP_1588_CLOCK
        imply NETWORK_PHY_TIMESTAMPING
-       imply PTP_1588_CLOCK
        help
          Say Y to enable PTP hardware timestamping on Marvell 88E6xxx switch
          chips that support it.
index dd8a566..2b4a723 100644 (file)
@@ -3962,7 +3962,6 @@ static const struct mv88e6xxx_ops mv88e6190_ops = {
        .serdes_get_stats = mv88e6390_serdes_get_stats,
        .serdes_get_regs_len = mv88e6390_serdes_get_regs_len,
        .serdes_get_regs = mv88e6390_serdes_get_regs,
-       .phylink_validate = mv88e6390_phylink_validate,
        .gpio_ops = &mv88e6352_gpio_ops,
        .phylink_validate = mv88e6390_phylink_validate,
 };
@@ -4021,7 +4020,6 @@ static const struct mv88e6xxx_ops mv88e6190x_ops = {
        .serdes_get_stats = mv88e6390_serdes_get_stats,
        .serdes_get_regs_len = mv88e6390_serdes_get_regs_len,
        .serdes_get_regs = mv88e6390_serdes_get_regs,
-       .phylink_validate = mv88e6390_phylink_validate,
        .gpio_ops = &mv88e6352_gpio_ops,
        .phylink_validate = mv88e6390x_phylink_validate,
 };
@@ -4079,7 +4077,6 @@ static const struct mv88e6xxx_ops mv88e6191_ops = {
        .serdes_get_stats = mv88e6390_serdes_get_stats,
        .serdes_get_regs_len = mv88e6390_serdes_get_regs_len,
        .serdes_get_regs = mv88e6390_serdes_get_regs,
-       .phylink_validate = mv88e6390_phylink_validate,
        .avb_ops = &mv88e6390_avb_ops,
        .ptp_ops = &mv88e6352_ptp_ops,
        .phylink_validate = mv88e6390_phylink_validate,
@@ -4235,7 +4232,6 @@ static const struct mv88e6xxx_ops mv88e6290_ops = {
        .serdes_get_stats = mv88e6390_serdes_get_stats,
        .serdes_get_regs_len = mv88e6390_serdes_get_regs_len,
        .serdes_get_regs = mv88e6390_serdes_get_regs,
-       .phylink_validate = mv88e6390_phylink_validate,
        .gpio_ops = &mv88e6352_gpio_ops,
        .avb_ops = &mv88e6390_avb_ops,
        .ptp_ops = &mv88e6352_ptp_ops,
index d0a3764..e2c6bf0 100644 (file)
@@ -400,6 +400,7 @@ static int felix_init_structs(struct felix *felix, int num_phys_ports)
        ocelot->stats_layout    = felix->info->stats_layout;
        ocelot->num_stats       = felix->info->num_stats;
        ocelot->shared_queue_sz = felix->info->shared_queue_sz;
+       ocelot->num_mact_rows   = felix->info->num_mact_rows;
        ocelot->vcap_is2_keys   = felix->info->vcap_is2_keys;
        ocelot->vcap_is2_actions= felix->info->vcap_is2_actions;
        ocelot->vcap            = felix->info->vcap;
index 82d46f2..9af1065 100644 (file)
@@ -15,6 +15,7 @@ struct felix_info {
        const u32 *const                *map;
        const struct ocelot_ops         *ops;
        int                             shared_queue_sz;
+       int                             num_mact_rows;
        const struct ocelot_stat_layout *stats_layout;
        unsigned int                    num_stats;
        int                             num_ports;
index b4078f3..8bf395f 100644 (file)
@@ -1220,6 +1220,7 @@ struct felix_info felix_info_vsc9959 = {
        .vcap_is2_actions       = vsc9959_vcap_is2_actions,
        .vcap                   = vsc9959_vcap_props,
        .shared_queue_sz        = 128 * 1024,
+       .num_mact_rows          = 2048,
        .num_ports              = 6,
        .switch_pci_bar         = 4,
        .imdio_pci_bar          = 0,
index 0fe1ae1..68c3086 100644 (file)
@@ -20,6 +20,7 @@ tristate "NXP SJA1105 Ethernet switch family support"
 config NET_DSA_SJA1105_PTP
        bool "Support for the PTP clock on the NXP SJA1105 Ethernet switch"
        depends on NET_DSA_SJA1105
+       depends on PTP_1588_CLOCK
        help
          This enables support for timestamping and PTP clock manipulations in
          the SJA1105 DSA driver.
index a22f8e3..bc0e47c 100644 (file)
 
 /* PTPSYNCTS has no interrupt or update mechanism, because the intended
  * hardware use case is for the timestamp to be collected synchronously,
- * immediately after the CAS_MASTER SJA1105 switch has triggered a CASSYNC
- * pulse on the PTP_CLK pin. When used as a generic extts source, it needs
- * polling and a comparison with the old value. The polling interval is just
- * the Nyquist rate of a canonical PPS input (e.g. from a GPS module).
- * Anything of higher frequency than 1 Hz will be lost, since there is no
- * timestamp FIFO.
+ * immediately after the CAS_MASTER SJA1105 switch has performed a CASSYNC
+ * one-shot toggle (no return to level) on the PTP_CLK pin. When used as a
+ * generic extts source, the PTPSYNCTS register needs polling and a comparison
+ * with the old value. The polling interval is configured as the Nyquist rate
+ * of a signal with 50% duty cycle and 1Hz frequency, which is sadly all that
+ * this hardware can do (but may be enough for some setups). Anything of higher
+ * frequency than 1 Hz will be lost, since there is no timestamp FIFO.
  */
-#define SJA1105_EXTTS_INTERVAL         (HZ / 2)
+#define SJA1105_EXTTS_INTERVAL         (HZ / 4)
 
 /*            This range is actually +/- SJA1105_MAX_ADJ_PPB
  *            divided by 1000 (ppb -> ppm) and with a 16-bit
@@ -754,7 +755,16 @@ static int sja1105_extts_enable(struct sja1105_private *priv,
                return -EOPNOTSUPP;
 
        /* Reject requests with unsupported flags */
-       if (extts->flags)
+       if (extts->flags & ~(PTP_ENABLE_FEATURE |
+                            PTP_RISING_EDGE |
+                            PTP_FALLING_EDGE |
+                            PTP_STRICT_FLAGS))
+               return -EOPNOTSUPP;
+
+       /* We can only enable time stamping on both edges, sadly. */
+       if ((extts->flags & PTP_STRICT_FLAGS) &&
+           (extts->flags & PTP_ENABLE_FEATURE) &&
+           (extts->flags & PTP_EXTTS_EDGES) != PTP_EXTTS_EDGES)
                return -EOPNOTSUPP;
 
        rc = sja1105_change_ptp_clk_pin_func(priv, PTP_PF_EXTTS);
index 97dfd0c..9e1860d 100644 (file)
@@ -69,7 +69,7 @@
  * 16kB.
  */
 #if PAGE_SIZE > SZ_16K
-#define ENA_PAGE_SIZE SZ_16K
+#define ENA_PAGE_SIZE (_AC(SZ_16K, UL))
 #else
 #define ENA_PAGE_SIZE PAGE_SIZE
 #endif
index 2edf137..8a70ffe 100644 (file)
@@ -57,7 +57,7 @@ static const struct aq_board_revision_s hw_atl_boards[] = {
        { AQ_DEVICE_ID_D108,    AQ_HWREV_2,     &hw_atl_ops_b0, &hw_atl_b0_caps_aqc108, },
        { AQ_DEVICE_ID_D109,    AQ_HWREV_2,     &hw_atl_ops_b0, &hw_atl_b0_caps_aqc109, },
 
-       { AQ_DEVICE_ID_AQC100,  AQ_HWREV_ANY,   &hw_atl_ops_b1, &hw_atl_b0_caps_aqc107, },
+       { AQ_DEVICE_ID_AQC100,  AQ_HWREV_ANY,   &hw_atl_ops_b1, &hw_atl_b0_caps_aqc100, },
        { AQ_DEVICE_ID_AQC107,  AQ_HWREV_ANY,   &hw_atl_ops_b1, &hw_atl_b0_caps_aqc107, },
        { AQ_DEVICE_ID_AQC108,  AQ_HWREV_ANY,   &hw_atl_ops_b1, &hw_atl_b0_caps_aqc108, },
        { AQ_DEVICE_ID_AQC109,  AQ_HWREV_ANY,   &hw_atl_ops_b1, &hw_atl_b0_caps_aqc109, },
index a5d1a6c..6795b6d 100644 (file)
@@ -172,6 +172,7 @@ static int bgmac_probe(struct platform_device *pdev)
 {
        struct device_node *np = pdev->dev.of_node;
        struct bgmac *bgmac;
+       struct resource *regs;
        const u8 *mac_addr;
 
        bgmac = bgmac_alloc(&pdev->dev);
@@ -206,16 +207,21 @@ static int bgmac_probe(struct platform_device *pdev)
        if (IS_ERR(bgmac->plat.base))
                return PTR_ERR(bgmac->plat.base);
 
-       bgmac->plat.idm_base =
-               devm_platform_ioremap_resource_byname(pdev, "idm_base");
-       if (IS_ERR(bgmac->plat.idm_base))
-               return PTR_ERR(bgmac->plat.idm_base);
-       bgmac->feature_flags &= ~BGMAC_FEAT_IDM_MASK;
+       regs = platform_get_resource_byname(pdev, IORESOURCE_MEM, "idm_base");
+       if (regs) {
+               bgmac->plat.idm_base = devm_ioremap_resource(&pdev->dev, regs);
+               if (IS_ERR(bgmac->plat.idm_base))
+                       return PTR_ERR(bgmac->plat.idm_base);
+               bgmac->feature_flags &= ~BGMAC_FEAT_IDM_MASK;
+       }
 
-       bgmac->plat.nicpm_base =
-               devm_platform_ioremap_resource_byname(pdev, "nicpm_base");
-       if (IS_ERR(bgmac->plat.nicpm_base))
-               return PTR_ERR(bgmac->plat.nicpm_base);
+       regs = platform_get_resource_byname(pdev, IORESOURCE_MEM, "nicpm_base");
+       if (regs) {
+               bgmac->plat.nicpm_base = devm_ioremap_resource(&pdev->dev,
+                                                              regs);
+               if (IS_ERR(bgmac->plat.nicpm_base))
+                       return PTR_ERR(bgmac->plat.nicpm_base);
+       }
 
        bgmac->read = platform_bgmac_read;
        bgmac->write = platform_bgmac_write;
index fead64f..d1a8371 100644 (file)
@@ -6642,7 +6642,7 @@ static int bnxt_alloc_ctx_pg_tbls(struct bnxt *bp,
        int rc;
 
        if (!mem_size)
-               return 0;
+               return -EINVAL;
 
        ctx_pg->nr_pages = DIV_ROUND_UP(mem_size, BNXT_PAGE_SIZE);
        if (ctx_pg->nr_pages > MAX_CTX_TOTAL_PAGES) {
@@ -9780,6 +9780,7 @@ static netdev_features_t bnxt_fix_features(struct net_device *dev,
                                           netdev_features_t features)
 {
        struct bnxt *bp = netdev_priv(dev);
+       netdev_features_t vlan_features;
 
        if ((features & NETIF_F_NTUPLE) && !bnxt_rfs_capable(bp))
                features &= ~NETIF_F_NTUPLE;
@@ -9796,12 +9797,14 @@ static netdev_features_t bnxt_fix_features(struct net_device *dev,
        /* Both CTAG and STAG VLAN accelaration on the RX side have to be
         * turned on or off together.
         */
-       if ((features & (NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_STAG_RX)) !=
-           (NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_STAG_RX)) {
+       vlan_features = features & (NETIF_F_HW_VLAN_CTAG_RX |
+                                   NETIF_F_HW_VLAN_STAG_RX);
+       if (vlan_features != (NETIF_F_HW_VLAN_CTAG_RX |
+                             NETIF_F_HW_VLAN_STAG_RX)) {
                if (dev->features & NETIF_F_HW_VLAN_CTAG_RX)
                        features &= ~(NETIF_F_HW_VLAN_CTAG_RX |
                                      NETIF_F_HW_VLAN_STAG_RX);
-               else
+               else if (vlan_features)
                        features |= NETIF_F_HW_VLAN_CTAG_RX |
                                    NETIF_F_HW_VLAN_STAG_RX;
        }
@@ -12212,12 +12215,15 @@ static pci_ers_result_t bnxt_io_slot_reset(struct pci_dev *pdev)
                bnxt_ulp_start(bp, err);
        }
 
-       if (result != PCI_ERS_RESULT_RECOVERED && netif_running(netdev))
-               dev_close(netdev);
+       if (result != PCI_ERS_RESULT_RECOVERED) {
+               if (netif_running(netdev))
+                       dev_close(netdev);
+               pci_disable_device(pdev);
+       }
 
        rtnl_unlock();
 
-       return PCI_ERS_RESULT_RECOVERED;
+       return result;
 }
 
 /**
index f2caa27..f6a3250 100644 (file)
@@ -1066,7 +1066,6 @@ struct bnxt_vf_info {
 #define BNXT_VF_LINK_FORCED    0x4
 #define BNXT_VF_LINK_UP                0x8
 #define BNXT_VF_TRUST          0x10
-       u32     func_flags; /* func cfg flags */
        u32     min_tx_rate;
        u32     max_tx_rate;
        void    *hwrm_cmd_req_addr;
index 95f893f..d5c8bd4 100644 (file)
@@ -43,7 +43,7 @@ static inline void bnxt_link_bp_to_dl(struct bnxt *bp, struct devlink *dl)
 #define BNXT_NVM_CFG_VER_BITS          24
 #define BNXT_NVM_CFG_VER_BYTES         4
 
-#define BNXT_MSIX_VEC_MAX      1280
+#define BNXT_MSIX_VEC_MAX      512
 #define BNXT_MSIX_VEC_MIN_MAX  128
 
 enum bnxt_nvm_dir_type {
index 6ea3df6..cea2f99 100644 (file)
@@ -85,11 +85,10 @@ int bnxt_set_vf_spoofchk(struct net_device *dev, int vf_id, bool setting)
        if (old_setting == setting)
                return 0;
 
-       func_flags = vf->func_flags;
        if (setting)
-               func_flags |= FUNC_CFG_REQ_FLAGS_SRC_MAC_ADDR_CHECK_ENABLE;
+               func_flags = FUNC_CFG_REQ_FLAGS_SRC_MAC_ADDR_CHECK_ENABLE;
        else
-               func_flags |= FUNC_CFG_REQ_FLAGS_SRC_MAC_ADDR_CHECK_DISABLE;
+               func_flags = FUNC_CFG_REQ_FLAGS_SRC_MAC_ADDR_CHECK_DISABLE;
        /*TODO: if the driver supports VLAN filter on guest VLAN,
         * the spoof check should also include vlan anti-spoofing
         */
@@ -98,7 +97,6 @@ int bnxt_set_vf_spoofchk(struct net_device *dev, int vf_id, bool setting)
        req.flags = cpu_to_le32(func_flags);
        rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
        if (!rc) {
-               vf->func_flags = func_flags;
                if (setting)
                        vf->flags |= BNXT_VF_SPOOFCHK;
                else
@@ -228,7 +226,6 @@ int bnxt_set_vf_mac(struct net_device *dev, int vf_id, u8 *mac)
        memcpy(vf->mac_addr, mac, ETH_ALEN);
        bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_CFG, -1, -1);
        req.fid = cpu_to_le16(vf->fw_fid);
-       req.flags = cpu_to_le32(vf->func_flags);
        req.enables = cpu_to_le32(FUNC_CFG_REQ_ENABLES_DFLT_MAC_ADDR);
        memcpy(req.dflt_mac_addr, mac, ETH_ALEN);
        return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
@@ -266,7 +263,6 @@ int bnxt_set_vf_vlan(struct net_device *dev, int vf_id, u16 vlan_id, u8 qos,
 
        bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_CFG, -1, -1);
        req.fid = cpu_to_le16(vf->fw_fid);
-       req.flags = cpu_to_le32(vf->func_flags);
        req.dflt_vlan = cpu_to_le16(vlan_tag);
        req.enables = cpu_to_le32(FUNC_CFG_REQ_ENABLES_DFLT_VLAN);
        rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
@@ -305,7 +301,6 @@ int bnxt_set_vf_bw(struct net_device *dev, int vf_id, int min_tx_rate,
                return 0;
        bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_CFG, -1, -1);
        req.fid = cpu_to_le16(vf->fw_fid);
-       req.flags = cpu_to_le32(vf->func_flags);
        req.enables = cpu_to_le32(FUNC_CFG_REQ_ENABLES_MAX_BW);
        req.max_bw = cpu_to_le32(max_tx_rate);
        req.enables |= cpu_to_le32(FUNC_CFG_REQ_ENABLES_MIN_BW);
@@ -477,7 +472,6 @@ static void __bnxt_set_vf_params(struct bnxt *bp, int vf_id)
        vf = &bp->pf.vf[vf_id];
        bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_CFG, -1, -1);
        req.fid = cpu_to_le16(vf->fw_fid);
-       req.flags = cpu_to_le32(vf->func_flags);
 
        if (is_valid_ether_addr(vf->mac_addr)) {
                req.enables |= cpu_to_le32(FUNC_CFG_REQ_ENABLES_DFLT_MAC_ADDR);
index 53b50c2..2c4c12b 100644 (file)
@@ -35,8 +35,8 @@ config MACB
 config MACB_USE_HWSTAMP
        bool "Use IEEE 1588 hwstamp"
        depends on MACB
+       depends on PTP_1588_CLOCK
        default y
-       imply PTP_1588_CLOCK
        ---help---
          Enable IEEE 1588 Precision Time Protocol (PTP) support for MACB.
 
index a0e8c5b..36290a8 100644 (file)
@@ -334,8 +334,10 @@ static int macb_mdio_read(struct mii_bus *bus, int mii_id, int regnum)
        int status;
 
        status = pm_runtime_get_sync(&bp->pdev->dev);
-       if (status < 0)
+       if (status < 0) {
+               pm_runtime_put_noidle(&bp->pdev->dev);
                goto mdio_pm_exit;
+       }
 
        status = macb_mdio_wait_for_idle(bp);
        if (status < 0)
@@ -386,8 +388,10 @@ static int macb_mdio_write(struct mii_bus *bus, int mii_id, int regnum,
        int status;
 
        status = pm_runtime_get_sync(&bp->pdev->dev);
-       if (status < 0)
+       if (status < 0) {
+               pm_runtime_put_noidle(&bp->pdev->dev);
                goto mdio_pm_exit;
+       }
 
        status = macb_mdio_wait_for_idle(bp);
        if (status < 0)
@@ -3816,8 +3820,10 @@ static int at91ether_open(struct net_device *dev)
        int ret;
 
        ret = pm_runtime_get_sync(&lp->pdev->dev);
-       if (ret < 0)
+       if (ret < 0) {
+               pm_runtime_put_noidle(&lp->pdev->dev);
                return ret;
+       }
 
        /* Clear internal statistics */
        ctl = macb_readl(lp, NCR);
@@ -4172,15 +4178,9 @@ static int fu540_c000_clk_init(struct platform_device *pdev, struct clk **pclk,
 
 static int fu540_c000_init(struct platform_device *pdev)
 {
-       struct resource *res;
-
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
-       if (!res)
-               return -ENODEV;
-
-       mgmt->reg = ioremap(res->start, resource_size(res));
-       if (!mgmt->reg)
-               return -ENOMEM;
+       mgmt->reg = devm_platform_ioremap_resource(pdev, 1);
+       if (IS_ERR(mgmt->reg))
+               return PTR_ERR(mgmt->reg);
 
        return macb_init(pdev);
 }
index 6a700d3..4520e7e 100644 (file)
@@ -54,7 +54,7 @@ config        THUNDER_NIC_RGX
 config CAVIUM_PTP
        tristate "Cavium PTP coprocessor as PTP clock"
        depends on 64BIT && PCI
-       imply PTP_1588_CLOCK
+       depends on PTP_1588_CLOCK
        ---help---
          This driver adds support for the Precision Time Protocol Clocks and
          Timestamping coprocessor (PTP) found on Cavium processors.
index f5dd34d..6516c45 100644 (file)
@@ -2207,6 +2207,9 @@ static void ethofld_hard_xmit(struct net_device *dev,
        if (unlikely(skip_eotx_wr)) {
                start = (u64 *)wr;
                eosw_txq->state = next_state;
+               eosw_txq->cred -= wrlen16;
+               eosw_txq->ncompl++;
+               eosw_txq->last_compl = 0;
                goto write_wr_headers;
        }
 
@@ -2365,6 +2368,34 @@ netdev_tx_t t4_start_xmit(struct sk_buff *skb, struct net_device *dev)
        return cxgb4_eth_xmit(skb, dev);
 }
 
+static void eosw_txq_flush_pending_skbs(struct sge_eosw_txq *eosw_txq)
+{
+       int pktcount = eosw_txq->pidx - eosw_txq->last_pidx;
+       int pidx = eosw_txq->pidx;
+       struct sk_buff *skb;
+
+       if (!pktcount)
+               return;
+
+       if (pktcount < 0)
+               pktcount += eosw_txq->ndesc;
+
+       while (pktcount--) {
+               pidx--;
+               if (pidx < 0)
+                       pidx += eosw_txq->ndesc;
+
+               skb = eosw_txq->desc[pidx].skb;
+               if (skb) {
+                       dev_consume_skb_any(skb);
+                       eosw_txq->desc[pidx].skb = NULL;
+                       eosw_txq->inuse--;
+               }
+       }
+
+       eosw_txq->pidx = eosw_txq->last_pidx + 1;
+}
+
 /**
  * cxgb4_ethofld_send_flowc - Send ETHOFLD flowc request to bind eotid to tc.
  * @dev - netdevice
@@ -2440,9 +2471,11 @@ int cxgb4_ethofld_send_flowc(struct net_device *dev, u32 eotid, u32 tc)
                                            FW_FLOWC_MNEM_EOSTATE_CLOSING :
                                            FW_FLOWC_MNEM_EOSTATE_ESTABLISHED);
 
-       eosw_txq->cred -= len16;
-       eosw_txq->ncompl++;
-       eosw_txq->last_compl = 0;
+       /* Free up any pending skbs to ensure there's room for
+        * termination FLOWC.
+        */
+       if (tc == FW_SCHED_CLS_NONE)
+               eosw_txq_flush_pending_skbs(eosw_txq);
 
        ret = eosw_txq_enqueue(eosw_txq, skb);
        if (ret) {
@@ -2695,6 +2728,7 @@ static void ofldtxq_stop(struct sge_uld_txq *q, struct fw_wr_hdr *wr)
  *     is ever running at a time ...
  */
 static void service_ofldq(struct sge_uld_txq *q)
+       __must_hold(&q->sendq.lock)
 {
        u64 *pos, *before, *end;
        int credits;
index ebc635f..15f37c5 100644 (file)
@@ -74,8 +74,8 @@ err_pci_mem_reg:
        pci_disable_device(pdev);
 err_pci_enable:
 err_mdiobus_alloc:
-       iounmap(port_regs);
 err_hw_alloc:
+       iounmap(port_regs);
 err_ioremap:
        return err;
 }
index 4bd3324..3de549c 100644 (file)
@@ -2189,7 +2189,8 @@ static void __ibmvnic_reset(struct work_struct *work)
                                rc = do_hard_reset(adapter, rwi, reset_state);
                                rtnl_unlock();
                        }
-               } else {
+               } else if (!(rwi->reset_reason == VNIC_RESET_FATAL &&
+                               adapter->from_passive_init)) {
                        rc = do_reset(adapter, rwi, reset_state);
                }
                kfree(rwi);
index 8972cdd..7352244 100644 (file)
@@ -1428,6 +1428,9 @@ int mvpp2_ethtool_cls_rule_del(struct mvpp2_port *port,
        struct mvpp2_ethtool_fs *efs;
        int ret;
 
+       if (info->fs.location >= MVPP2_N_RFS_ENTRIES_PER_FLOW)
+               return -EINVAL;
+
        efs = port->rfs_rules[info->fs.location];
        if (!efs)
                return -EINVAL;
index 1fa60e9..2b5dad2 100644 (file)
@@ -4329,6 +4329,8 @@ static int mvpp2_ethtool_get_rxfh_context(struct net_device *dev, u32 *indir,
 
        if (!mvpp22_rss_is_supported())
                return -EOPNOTSUPP;
+       if (rss_context >= MVPP22_N_RSS_TABLES)
+               return -EINVAL;
 
        if (hfunc)
                *hfunc = ETH_RSS_HASH_CRC32;
index 5716c3d..c72c4e1 100644 (file)
@@ -2550,6 +2550,7 @@ static int mlx4_allocate_default_counters(struct mlx4_dev *dev)
 
                if (!err || err == -ENOSPC) {
                        priv->def_counter[port] = idx;
+                       err = 0;
                } else if (err == -ENOENT) {
                        err = 0;
                        continue;
@@ -2600,7 +2601,8 @@ int mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx, u8 usage)
                                   MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
                if (!err)
                        *idx = get_param_l(&out_param);
-
+               if (WARN_ON(err == -ENOSPC))
+                       err = -EINVAL;
                return err;
        }
        return __mlx4_counter_alloc(dev, idx);
index 34cba97..cede5bd 100644 (file)
@@ -888,7 +888,6 @@ static void cmd_work_handler(struct work_struct *work)
        }
 
        cmd->ent_arr[ent->idx] = ent;
-       set_bit(MLX5_CMD_ENT_STATE_PENDING_COMP, &ent->state);
        lay = get_inst(cmd, ent->idx);
        ent->lay = lay;
        memset(lay, 0, sizeof(*lay));
@@ -910,6 +909,7 @@ static void cmd_work_handler(struct work_struct *work)
 
        if (ent->callback)
                schedule_delayed_work(&ent->cb_timeout_work, cb_timeout);
+       set_bit(MLX5_CMD_ENT_STATE_PENDING_COMP, &ent->state);
 
        /* Skip sending command to fw if internal error */
        if (pci_channel_offline(dev->pdev) ||
@@ -922,6 +922,10 @@ static void cmd_work_handler(struct work_struct *work)
                MLX5_SET(mbox_out, ent->out, syndrome, drv_synd);
 
                mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true);
+               /* no doorbell, no need to keep the entry */
+               free_ent(cmd, ent->idx);
+               if (ent->callback)
+                       free_cmd(ent);
                return;
        }
 
index 55457f2..f372e94 100644 (file)
@@ -1773,19 +1773,14 @@ static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv)
 
 static int mlx5e_init_ul_rep_rx(struct mlx5e_priv *priv)
 {
-       int err = mlx5e_init_rep_rx(priv);
-
-       if (err)
-               return err;
-
        mlx5e_create_q_counters(priv);
-       return 0;
+       return mlx5e_init_rep_rx(priv);
 }
 
 static void mlx5e_cleanup_ul_rep_rx(struct mlx5e_priv *priv)
 {
-       mlx5e_destroy_q_counters(priv);
        mlx5e_cleanup_rep_rx(priv);
+       mlx5e_destroy_q_counters(priv);
 }
 
 static int mlx5e_init_uplink_rep_tx(struct mlx5e_rep_priv *rpriv)
index b2e38e0..5d9def1 100644 (file)
@@ -1550,9 +1550,9 @@ static int esw_create_restore_table(struct mlx5_eswitch *esw)
                                           MLX5_FLOW_NAMESPACE_KERNEL, 1,
                                           modact);
        if (IS_ERR(mod_hdr)) {
+               err = PTR_ERR(mod_hdr);
                esw_warn(dev, "Failed to create restore mod header, err: %d\n",
                         err);
-               err = PTR_ERR(mod_hdr);
                goto err_mod_hdr;
        }
 
@@ -2219,10 +2219,12 @@ static int esw_offloads_steering_init(struct mlx5_eswitch *esw)
                total_vports = num_vfs + MLX5_SPECIAL_VPORTS(esw->dev);
 
        memset(&esw->fdb_table.offloads, 0, sizeof(struct offloads_fdb));
+       mutex_init(&esw->fdb_table.offloads.vports.lock);
+       hash_init(esw->fdb_table.offloads.vports.table);
 
        err = esw_create_uplink_offloads_acl_tables(esw);
        if (err)
-               return err;
+               goto create_acl_err;
 
        err = esw_create_offloads_table(esw, total_vports);
        if (err)
@@ -2240,9 +2242,6 @@ static int esw_offloads_steering_init(struct mlx5_eswitch *esw)
        if (err)
                goto create_fg_err;
 
-       mutex_init(&esw->fdb_table.offloads.vports.lock);
-       hash_init(esw->fdb_table.offloads.vports.table);
-
        return 0;
 
 create_fg_err:
@@ -2253,18 +2252,19 @@ create_restore_err:
        esw_destroy_offloads_table(esw);
 create_offloads_err:
        esw_destroy_uplink_offloads_acl_tables(esw);
-
+create_acl_err:
+       mutex_destroy(&esw->fdb_table.offloads.vports.lock);
        return err;
 }
 
 static void esw_offloads_steering_cleanup(struct mlx5_eswitch *esw)
 {
-       mutex_destroy(&esw->fdb_table.offloads.vports.lock);
        esw_destroy_vport_rx_group(esw);
        esw_destroy_offloads_fdb_tables(esw);
        esw_destroy_restore_table(esw);
        esw_destroy_offloads_table(esw);
        esw_destroy_uplink_offloads_acl_tables(esw);
+       mutex_destroy(&esw->fdb_table.offloads.vports.lock);
 }
 
 static void
@@ -2377,9 +2377,9 @@ int esw_offloads_enable(struct mlx5_eswitch *esw)
 err_vports:
        esw_offloads_unload_rep(esw, MLX5_VPORT_UPLINK);
 err_uplink:
-       esw_set_passing_vport_metadata(esw, false);
-err_steering_init:
        esw_offloads_steering_cleanup(esw);
+err_steering_init:
+       esw_set_passing_vport_metadata(esw, false);
 err_vport_metadata:
        mlx5_rdma_disable_roce(esw->dev);
        mutex_destroy(&esw->offloads.termtbl_mutex);
index c0ab9cf..18719ac 100644 (file)
@@ -695,6 +695,12 @@ static void dr_cq_event(struct mlx5_core_cq *mcq,
        pr_info("CQ event %u on CQ #%u\n", event, mcq->cqn);
 }
 
+static void dr_cq_complete(struct mlx5_core_cq *mcq,
+                          struct mlx5_eqe *eqe)
+{
+       pr_err("CQ completion CQ: #%u\n", mcq->cqn);
+}
+
 static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev,
                                      struct mlx5_uars_page *uar,
                                      size_t ncqe)
@@ -756,6 +762,7 @@ static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev,
        mlx5_fill_page_frag_array(&cq->wq_ctrl.buf, pas);
 
        cq->mcq.event = dr_cq_event;
+       cq->mcq.comp  = dr_cq_complete;
 
        err = mlx5_core_create_cq(mdev, &cq->mcq, in, inlen, out, sizeof(out));
        kvfree(in);
@@ -767,7 +774,12 @@ static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev,
        cq->mcq.set_ci_db = cq->wq_ctrl.db.db;
        cq->mcq.arm_db = cq->wq_ctrl.db.db + 1;
        *cq->mcq.set_ci_db = 0;
-       *cq->mcq.arm_db = 0;
+
+       /* set no-zero value, in order to avoid the HW to run db-recovery on
+        * CQ that used in polling mode.
+        */
+       *cq->mcq.arm_db = cpu_to_be32(2 << 28);
+
        cq->mcq.vector = 0;
        cq->mcq.irqn = irqn;
        cq->mcq.uar = uar;
index 430da69..a6e30e0 100644 (file)
@@ -986,8 +986,9 @@ mlxsw_sp_acl_tcam_vchunk_create(struct mlxsw_sp *mlxsw_sp,
                                unsigned int priority,
                                struct mlxsw_afk_element_usage *elusage)
 {
+       struct mlxsw_sp_acl_tcam_vchunk *vchunk, *vchunk2;
        struct mlxsw_sp_acl_tcam_vregion *vregion;
-       struct mlxsw_sp_acl_tcam_vchunk *vchunk;
+       struct list_head *pos;
        int err;
 
        if (priority == MLXSW_SP_ACL_TCAM_CATCHALL_PRIO)
@@ -1025,7 +1026,14 @@ mlxsw_sp_acl_tcam_vchunk_create(struct mlxsw_sp *mlxsw_sp,
        }
 
        mlxsw_sp_acl_tcam_rehash_ctx_vregion_changed(vregion);
-       list_add_tail(&vchunk->list, &vregion->vchunk_list);
+
+       /* Position the vchunk inside the list according to priority */
+       list_for_each(pos, &vregion->vchunk_list) {
+               vchunk2 = list_entry(pos, typeof(*vchunk2), list);
+               if (vchunk2->priority > priority)
+                       break;
+       }
+       list_add_tail(&vchunk->list, pos);
        mutex_unlock(&vregion->lock);
 
        return vchunk;
index 51117a5..890b078 100644 (file)
@@ -36,7 +36,8 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp,
                err = mlxsw_sp_acl_rulei_act_count(mlxsw_sp, rulei, extack);
                if (err)
                        return err;
-       } else if (act->hw_stats != FLOW_ACTION_HW_STATS_DISABLED) {
+       } else if (act->hw_stats != FLOW_ACTION_HW_STATS_DISABLED &&
+                  act->hw_stats != FLOW_ACTION_HW_STATS_DONT_CARE) {
                NL_SET_ERR_MSG_MOD(extack, "Unsupported action HW stats type");
                return -EOPNOTSUPP;
        }
index e165175..f70bb81 100644 (file)
@@ -564,7 +564,7 @@ static int moxart_remove(struct platform_device *pdev)
        struct net_device *ndev = platform_get_drvdata(pdev);
 
        unregister_netdev(ndev);
-       free_irq(ndev->irq, ndev);
+       devm_free_irq(&pdev->dev, ndev->irq, ndev);
        moxart_mac_free_memory(ndev);
        free_netdev(ndev);
 
index a8c48a4..02350c3 100644 (file)
@@ -1031,10 +1031,8 @@ int ocelot_fdb_dump(struct ocelot *ocelot, int port,
 {
        int i, j;
 
-       /* Loop through all the mac tables entries. There are 1024 rows of 4
-        * entries.
-        */
-       for (i = 0; i < 1024; i++) {
+       /* Loop through all the mac tables entries. */
+       for (i = 0; i < ocelot->num_mact_rows; i++) {
                for (j = 0; j < 4; j++) {
                        struct ocelot_mact_entry entry;
                        bool is_static;
@@ -1453,8 +1451,15 @@ static void ocelot_port_attr_stp_state_set(struct ocelot *ocelot, int port,
 
 void ocelot_set_ageing_time(struct ocelot *ocelot, unsigned int msecs)
 {
-       ocelot_write(ocelot, ANA_AUTOAGE_AGE_PERIOD(msecs / 2),
-                    ANA_AUTOAGE);
+       unsigned int age_period = ANA_AUTOAGE_AGE_PERIOD(msecs / 2000);
+
+       /* Setting AGE_PERIOD to zero effectively disables automatic aging,
+        * which is clearly not what our intention is. So avoid that.
+        */
+       if (!age_period)
+               age_period = 1;
+
+       ocelot_rmw(ocelot, age_period, ANA_AUTOAGE_AGE_PERIOD_M, ANA_AUTOAGE);
 }
 EXPORT_SYMBOL(ocelot_set_ageing_time);
 
index b88b589..7d4fd1b 100644 (file)
@@ -431,6 +431,7 @@ int ocelot_chip_init(struct ocelot *ocelot, const struct ocelot_ops *ops)
        ocelot->stats_layout = ocelot_stats_layout;
        ocelot->num_stats = ARRAY_SIZE(ocelot_stats_layout);
        ocelot->shared_queue_sz = 224 * 1024;
+       ocelot->num_mact_rows = 1024;
        ocelot->ops = ops;
 
        ret = ocelot_regfields_init(ocelot, ocelot_regfields);
index bfa0c0d..8b018ed 100644 (file)
@@ -208,11 +208,13 @@ static int jazz_sonic_probe(struct platform_device *pdev)
 
        err = register_netdev(dev);
        if (err)
-               goto out1;
+               goto undo_probe1;
 
        return 0;
 
-out1:
+undo_probe1:
+       dma_free_coherent(lp->device, SIZEOF_SONIC_DESC * SONIC_BUS_SCALE(lp->dma_bitmode),
+                         lp->descriptors, lp->descriptors_laddr);
        release_mem_region(dev->base_addr, SONIC_MEM_SIZE);
 out:
        free_netdev(dev);
index 9183b3e..354efff 100644 (file)
@@ -283,6 +283,7 @@ nfp_abm_vnic_set_mac(struct nfp_pf *pf, struct nfp_abm *abm, struct nfp_net *nn,
        if (!nfp_nsp_has_hwinfo_lookup(nsp)) {
                nfp_warn(pf->cpp, "NSP doesn't support PF MAC generation\n");
                eth_hw_addr_random(nn->dp.netdev);
+               nfp_nsp_close(nsp);
                return;
        }
 
index 5f8fc58..11621cc 100644 (file)
@@ -170,8 +170,7 @@ void ionic_debugfs_add_qcq(struct ionic_lif *lif, struct ionic_qcq *qcq)
        debugfs_create_x64("base_pa", 0400, cq_dentry, &cq->base_pa);
        debugfs_create_u32("num_descs", 0400, cq_dentry, &cq->num_descs);
        debugfs_create_u32("desc_size", 0400, cq_dentry, &cq->desc_size);
-       debugfs_create_u8("done_color", 0400, cq_dentry,
-                         (u8 *)&cq->done_color);
+       debugfs_create_bool("done_color", 0400, cq_dentry, &cq->done_color);
 
        debugfs_create_file("tail", 0400, cq_dentry, cq, &cq_tail_fops);
 
index 5acf4f4..d5293bf 100644 (file)
@@ -2101,6 +2101,7 @@ static void ionic_lif_handle_fw_down(struct ionic_lif *lif)
                ionic_txrx_free(lif);
        }
        ionic_lifs_deinit(ionic);
+       ionic_reset(ionic);
        ionic_qcqs_free(lif);
 
        dev_info(ionic->dev, "FW Down: LIFs stopped\n");
@@ -2116,6 +2117,7 @@ static void ionic_lif_handle_fw_up(struct ionic_lif *lif)
 
        dev_info(ionic->dev, "FW Up: restarting LIFs\n");
 
+       ionic_init_devinfo(ionic);
        err = ionic_qcqs_alloc(lif);
        if (err)
                goto err_out;
@@ -2549,8 +2551,6 @@ int ionic_lifs_register(struct ionic *ionic)
                dev_err(ionic->dev, "Cannot register net device, aborting\n");
                return err;
        }
-
-       ionic_link_status_check_request(ionic->master_lif);
        ionic->master_lif->registered = true;
 
        return 0;
index 494c859..67ba67e 100644 (file)
@@ -624,7 +624,7 @@ int dwmac5_est_configure(void __iomem *ioaddr, struct stmmac_est *cfg,
                total_offset += offset;
        }
 
-       total_ctr = cfg->ctr[0] + cfg->ctr[1] * 1000000000;
+       total_ctr = cfg->ctr[0] + cfg->ctr[1] * 1000000000ULL;
        total_ctr += total_offset;
 
        ctr_low = do_div(total_ctr, 1000000000);
index 565da64..a999d6b 100644 (file)
@@ -4060,7 +4060,7 @@ static int stmmac_set_features(struct net_device *netdev,
 /**
  *  stmmac_interrupt - main ISR
  *  @irq: interrupt number.
- *  @dev_id: to pass the net device pointer.
+ *  @dev_id: to pass the net device pointer (must be valid).
  *  Description: this is the main driver interrupt service routine.
  *  It can call:
  *  o DMA service routine (to manage incoming frame reception and transmission
@@ -4084,11 +4084,6 @@ static irqreturn_t stmmac_interrupt(int irq, void *dev_id)
        if (priv->irq_wake)
                pm_wakeup_event(priv->device, 0);
 
-       if (unlikely(!dev)) {
-               netdev_err(priv->dev, "%s: invalid dev pointer\n", __func__);
-               return IRQ_NONE;
-       }
-
        /* Check if adapter is up */
        if (test_bit(STMMAC_DOWN, &priv->state))
                return IRQ_HANDLED;
@@ -4991,7 +4986,7 @@ int stmmac_dvr_probe(struct device *device,
                                                 priv->plat->bsp_priv);
 
                if (ret < 0)
-                       return ret;
+                       goto error_serdes_powerup;
        }
 
 #ifdef CONFIG_DEBUG_FS
@@ -5000,6 +4995,8 @@ int stmmac_dvr_probe(struct device *device,
 
        return ret;
 
+error_serdes_powerup:
+       unregister_netdev(ndev);
 error_netdev_register:
        phylink_destroy(priv->phylink);
 error_phy_setup:
index 89cec77..8e34878 100644 (file)
@@ -90,9 +90,8 @@ config TI_CPTS
 config TI_CPTS_MOD
        tristate
        depends on TI_CPTS
+       depends on PTP_1588_CLOCK
        default y if TI_CPSW=y || TI_KEYSTONE_NETCP=y || TI_CPSW_SWITCHDEV=y
-       select NET_PTP_CLASSIFY
-       imply PTP_1588_CLOCK
        default m
 
 config TI_K3_AM65_CPSW_NUSS
index 2bf5673..2517ffb 100644 (file)
@@ -1719,7 +1719,8 @@ static int am65_cpsw_nuss_ndev_add_napi_2g(struct am65_cpsw_common *common)
 
                ret = devm_request_irq(dev, tx_chn->irq,
                                       am65_cpsw_nuss_tx_irq,
-                                      0, tx_chn->tx_chn_name, tx_chn);
+                                      IRQF_TRIGGER_HIGH,
+                                      tx_chn->tx_chn_name, tx_chn);
                if (ret) {
                        dev_err(dev, "failure requesting tx%u irq %u, %d\n",
                                tx_chn->id, tx_chn->irq, ret);
@@ -1744,7 +1745,7 @@ static int am65_cpsw_nuss_ndev_reg_2g(struct am65_cpsw_common *common)
 
        ret = devm_request_irq(dev, common->rx_chns.irq,
                               am65_cpsw_nuss_rx_irq,
-                              0, dev_name(dev), common);
+                              IRQF_TRIGGER_HIGH, dev_name(dev), common);
        if (ret) {
                dev_err(dev, "failure requesting rx irq %u, %d\n",
                        common->rx_chns.irq, ret);
index b50c3ec..6bcda20 100644 (file)
@@ -643,7 +643,7 @@ static int tc_mii_probe(struct net_device *dev)
                linkmode_set_bit(ETHTOOL_LINK_MODE_10baseT_Half_BIT, mask);
                linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT, mask);
        }
-       linkmode_and(phydev->supported, phydev->supported, mask);
+       linkmode_andnot(phydev->supported, phydev->supported, mask);
        linkmode_copy(phydev->advertising, phydev->supported);
 
        lp->link = 0;
index 672cd2c..21640a0 100644 (file)
@@ -1169,11 +1169,11 @@ out_unlock:
 static struct genl_family gtp_genl_family;
 
 static int gtp_genl_fill_info(struct sk_buff *skb, u32 snd_portid, u32 snd_seq,
-                             u32 type, struct pdp_ctx *pctx)
+                             int flags, u32 type, struct pdp_ctx *pctx)
 {
        void *genlh;
 
-       genlh = genlmsg_put(skb, snd_portid, snd_seq, &gtp_genl_family, 0,
+       genlh = genlmsg_put(skb, snd_portid, snd_seq, &gtp_genl_family, flags,
                            type);
        if (genlh == NULL)
                goto nlmsg_failure;
@@ -1227,8 +1227,8 @@ static int gtp_genl_get_pdp(struct sk_buff *skb, struct genl_info *info)
                goto err_unlock;
        }
 
-       err = gtp_genl_fill_info(skb2, NETLINK_CB(skb).portid,
-                                info->snd_seq, info->nlhdr->nlmsg_type, pctx);
+       err = gtp_genl_fill_info(skb2, NETLINK_CB(skb).portid, info->snd_seq,
+                                0, info->nlhdr->nlmsg_type, pctx);
        if (err < 0)
                goto err_unlock_free;
 
@@ -1271,6 +1271,7 @@ static int gtp_genl_dump_pdp(struct sk_buff *skb,
                                    gtp_genl_fill_info(skb,
                                            NETLINK_CB(cb->skb).portid,
                                            cb->nlh->nlmsg_seq,
+                                           NLM_F_MULTI,
                                            cb->nlh->nlmsg_type, pctx)) {
                                        cb->args[0] = i;
                                        cb->args[1] = j;
index d8e86bd..ebcfbae 100644 (file)
@@ -707,7 +707,8 @@ no_memory:
        goto drop;
 }
 
-static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *ndev)
+static netdev_tx_t netvsc_start_xmit(struct sk_buff *skb,
+                                    struct net_device *ndev)
 {
        return netvsc_xmit(skb, ndev, false);
 }
index 845478a..b671bea 100644 (file)
@@ -1041,6 +1041,7 @@ static void gsi_isr_gp_int1(struct gsi *gsi)
 
        complete(&gsi->completion);
 }
+
 /* Inter-EE interrupt handler */
 static void gsi_isr_glob_ee(struct gsi *gsi)
 {
@@ -1493,6 +1494,12 @@ static int gsi_generic_command(struct gsi *gsi, u32 channel_id,
        struct completion *completion = &gsi->completion;
        u32 val;
 
+       /* First zero the result code field */
+       val = ioread32(gsi->virt + GSI_CNTXT_SCRATCH_0_OFFSET);
+       val &= ~GENERIC_EE_RESULT_FMASK;
+       iowrite32(val, gsi->virt + GSI_CNTXT_SCRATCH_0_OFFSET);
+
+       /* Now issue the command */
        val = u32_encode_bits(opcode, GENERIC_OPCODE_FMASK);
        val |= u32_encode_bits(channel_id, GENERIC_CHID_FMASK);
        val |= u32_encode_bits(GSI_EE_MODEM, GENERIC_EE_FMASK);
@@ -1798,9 +1805,9 @@ static int gsi_channel_init_one(struct gsi *gsi,
 
        /* Worst case we need an event for every outstanding TRE */
        if (data->channel.tre_count > data->channel.event_count) {
-               dev_warn(gsi->dev, "channel %u limited to %u TREs\n",
-                       data->channel_id, data->channel.tre_count);
                tre_count = data->channel.event_count;
+               dev_warn(gsi->dev, "channel %u limited to %u TREs\n",
+                        data->channel_id, tre_count);
        } else {
                tre_count = data->channel.tre_count;
        }
index 7613b9c..acc9e74 100644 (file)
 #define INTER_EE_RESULT_FMASK          GENMASK(2, 0)
 #define GENERIC_EE_RESULT_FMASK                GENMASK(7, 5)
 #define GENERIC_EE_SUCCESS_FVAL                        1
+#define GENERIC_EE_INCORRECT_DIRECTION_FVAL    3
+#define GENERIC_EE_INCORRECT_CHANNEL_FVAL      5
 #define GENERIC_EE_NO_RESOURCES_FVAL           7
 #define USB_MAX_PACKET_FMASK           GENMASK(15, 15) /* 0: HS; 1: SS */
 #define MHI_BASE_CHANNEL_FMASK         GENMASK(31, 24)
index 6de03be..a21534f 100644 (file)
@@ -1283,7 +1283,7 @@ static int ipa_endpoint_stop_rx_dma(struct ipa *ipa)
  */
 int ipa_endpoint_stop(struct ipa_endpoint *endpoint)
 {
-       u32 retries = endpoint->toward_ipa ? 0 : IPA_ENDPOINT_STOP_RX_RETRIES;
+       u32 retries = IPA_ENDPOINT_STOP_RX_RETRIES;
        int ret;
 
        do {
@@ -1291,12 +1291,9 @@ int ipa_endpoint_stop(struct ipa_endpoint *endpoint)
                struct gsi *gsi = &ipa->gsi;
 
                ret = gsi_channel_stop(gsi, endpoint->channel_id);
-               if (ret != -EAGAIN)
+               if (ret != -EAGAIN || endpoint->toward_ipa)
                        break;
 
-               if (endpoint->toward_ipa)
-                       continue;
-
                /* For IPA v3.5.1, send a DMA read task and check again */
                if (ipa->version == IPA_VERSION_3_5_1) {
                        ret = ipa_endpoint_stop_rx_dma(ipa);
index 758baf7..d0d31cb 100644 (file)
@@ -1305,7 +1305,8 @@ static struct crypto_aead *macsec_alloc_tfm(char *key, int key_len, int icv_len)
        struct crypto_aead *tfm;
        int ret;
 
-       tfm = crypto_alloc_aead("gcm(aes)", 0, 0);
+       /* Pick a sync gcm(aes) cipher to ensure order is preserved. */
+       tfm = crypto_alloc_aead("gcm(aes)", 0, CRYPTO_ALG_ASYNC);
 
        if (IS_ERR(tfm))
                return tfm;
@@ -2640,11 +2641,12 @@ static int macsec_upd_offload(struct sk_buff *skb, struct genl_info *info)
        if (ret)
                goto rollback;
 
-       rtnl_unlock();
        /* Force features update, since they are different for SW MACSec and
         * HW offloading cases.
         */
        netdev_update_features(dev);
+
+       rtnl_unlock();
        return 0;
 
 rollback:
index 415c273..ecbd5e0 100644 (file)
@@ -1120,7 +1120,7 @@ static struct dp83640_clock *dp83640_clock_get_bus(struct mii_bus *bus)
                goto out;
        }
        dp83640_clock_init(clock, bus);
-       list_add_tail(&phyter_clocks, &clock->list);
+       list_add_tail(&clock->list, &phyter_clocks);
 out:
        mutex_unlock(&phyter_clocks_lock);
 
index fe9aa3a..1dd19d0 100644 (file)
@@ -137,19 +137,18 @@ static int dp83822_set_wol(struct phy_device *phydev,
                        value &= ~DP83822_WOL_SECURE_ON;
                }
 
-               value |= (DP83822_WOL_EN | DP83822_WOL_INDICATION_SEL |
-                         DP83822_WOL_CLR_INDICATION);
-               phy_write_mmd(phydev, DP83822_DEVADDR, MII_DP83822_WOL_CFG,
-                             value);
+               /* Clear any pending WoL interrupt */
+               phy_read(phydev, MII_DP83822_MISR2);
+
+               value |= DP83822_WOL_EN | DP83822_WOL_INDICATION_SEL |
+                        DP83822_WOL_CLR_INDICATION;
+
+               return phy_write_mmd(phydev, DP83822_DEVADDR,
+                                    MII_DP83822_WOL_CFG, value);
        } else {
-               value = phy_read_mmd(phydev, DP83822_DEVADDR,
-                                    MII_DP83822_WOL_CFG);
-               value &= ~DP83822_WOL_EN;
-               phy_write_mmd(phydev, DP83822_DEVADDR, MII_DP83822_WOL_CFG,
-                             value);
+               return phy_clear_bits_mmd(phydev, DP83822_DEVADDR,
+                                         MII_DP83822_WOL_CFG, DP83822_WOL_EN);
        }
-
-       return 0;
 }
 
 static void dp83822_get_wol(struct phy_device *phydev,
@@ -258,12 +257,11 @@ static int dp83822_config_intr(struct phy_device *phydev)
 
 static int dp83822_config_init(struct phy_device *phydev)
 {
-       int value;
-
-       value = DP83822_WOL_MAGIC_EN | DP83822_WOL_SECURE_ON | DP83822_WOL_EN;
+       int value = DP83822_WOL_EN | DP83822_WOL_MAGIC_EN |
+                   DP83822_WOL_SECURE_ON;
 
-       return phy_write_mmd(phydev, DP83822_DEVADDR, MII_DP83822_WOL_CFG,
-             value);
+       return phy_clear_bits_mmd(phydev, DP83822_DEVADDR,
+                                 MII_DP83822_WOL_CFG, value);
 }
 
 static int dp83822_phy_reset(struct phy_device *phydev)
index 06f0883..d737253 100644 (file)
@@ -139,16 +139,19 @@ static int dp83811_set_wol(struct phy_device *phydev,
                        value &= ~DP83811_WOL_SECURE_ON;
                }
 
-               value |= (DP83811_WOL_EN | DP83811_WOL_INDICATION_SEL |
-                         DP83811_WOL_CLR_INDICATION);
-               phy_write_mmd(phydev, DP83811_DEVADDR, MII_DP83811_WOL_CFG,
-                             value);
+               /* Clear any pending WoL interrupt */
+               phy_read(phydev, MII_DP83811_INT_STAT1);
+
+               value |= DP83811_WOL_EN | DP83811_WOL_INDICATION_SEL |
+                        DP83811_WOL_CLR_INDICATION;
+
+               return phy_write_mmd(phydev, DP83811_DEVADDR,
+                                    MII_DP83811_WOL_CFG, value);
        } else {
-               phy_clear_bits_mmd(phydev, DP83811_DEVADDR, MII_DP83811_WOL_CFG,
-                                  DP83811_WOL_EN);
+               return phy_clear_bits_mmd(phydev, DP83811_DEVADDR,
+                                         MII_DP83811_WOL_CFG, DP83811_WOL_EN);
        }
 
-       return 0;
 }
 
 static void dp83811_get_wol(struct phy_device *phydev,
@@ -292,8 +295,8 @@ static int dp83811_config_init(struct phy_device *phydev)
 
        value = DP83811_WOL_MAGIC_EN | DP83811_WOL_SECURE_ON | DP83811_WOL_EN;
 
-       return phy_write_mmd(phydev, DP83811_DEVADDR, MII_DP83811_WOL_CFG,
-             value);
+       return phy_clear_bits_mmd(phydev, DP83811_DEVADDR, MII_DP83811_WOL_CFG,
+                                 value);
 }
 
 static int dp83811_phy_reset(struct phy_device *phydev)
index ff12492..1f1a01c 100644 (file)
@@ -66,6 +66,9 @@ enum {
        MV_PCS_CSSR1_SPD2_2500  = 0x0004,
        MV_PCS_CSSR1_SPD2_10000 = 0x0000,
 
+       /* Temperature read register (88E2110 only) */
+       MV_PCS_TEMP             = 0x8042,
+
        /* These registers appear at 0x800X and 0xa00X - the 0xa00X control
         * registers appear to set themselves to the 0x800X when AN is
         * restarted, but status registers appear readable from either.
@@ -77,6 +80,7 @@ enum {
        MV_V2_PORT_CTRL         = 0xf001,
        MV_V2_PORT_CTRL_SWRST   = BIT(15),
        MV_V2_PORT_CTRL_PWRDOWN = BIT(11),
+       /* Temperature control/read registers (88X3310 only) */
        MV_V2_TEMP_CTRL         = 0xf08a,
        MV_V2_TEMP_CTRL_MASK    = 0xc000,
        MV_V2_TEMP_CTRL_SAMPLE  = 0x0000,
@@ -104,6 +108,24 @@ static umode_t mv3310_hwmon_is_visible(const void *data,
        return 0;
 }
 
+static int mv3310_hwmon_read_temp_reg(struct phy_device *phydev)
+{
+       return phy_read_mmd(phydev, MDIO_MMD_VEND2, MV_V2_TEMP);
+}
+
+static int mv2110_hwmon_read_temp_reg(struct phy_device *phydev)
+{
+       return phy_read_mmd(phydev, MDIO_MMD_PCS, MV_PCS_TEMP);
+}
+
+static int mv10g_hwmon_read_temp_reg(struct phy_device *phydev)
+{
+       if (phydev->drv->phy_id == MARVELL_PHY_ID_88X3310)
+               return mv3310_hwmon_read_temp_reg(phydev);
+       else /* MARVELL_PHY_ID_88E2110 */
+               return mv2110_hwmon_read_temp_reg(phydev);
+}
+
 static int mv3310_hwmon_read(struct device *dev, enum hwmon_sensor_types type,
                             u32 attr, int channel, long *value)
 {
@@ -116,7 +138,7 @@ static int mv3310_hwmon_read(struct device *dev, enum hwmon_sensor_types type,
        }
 
        if (type == hwmon_temp && attr == hwmon_temp_input) {
-               temp = phy_read_mmd(phydev, MDIO_MMD_VEND2, MV_V2_TEMP);
+               temp = mv10g_hwmon_read_temp_reg(phydev);
                if (temp < 0)
                        return temp;
 
@@ -169,6 +191,9 @@ static int mv3310_hwmon_config(struct phy_device *phydev, bool enable)
        u16 val;
        int ret;
 
+       if (phydev->drv->phy_id != MARVELL_PHY_ID_88X3310)
+               return 0;
+
        ret = phy_write_mmd(phydev, MDIO_MMD_VEND2, MV_V2_TEMP,
                            MV_V2_TEMP_UNKNOWN);
        if (ret < 0)
index 6c738a2..4bb8552 100644 (file)
@@ -1359,6 +1359,7 @@ static const struct usb_device_id products[] = {
        {QMI_FIXED_INTF(0x413c, 0x81b3, 8)},    /* Dell Wireless 5809e Gobi(TM) 4G LTE Mobile Broadband Card (rev3) */
        {QMI_FIXED_INTF(0x413c, 0x81b6, 8)},    /* Dell Wireless 5811e */
        {QMI_FIXED_INTF(0x413c, 0x81b6, 10)},   /* Dell Wireless 5811e */
+       {QMI_FIXED_INTF(0x413c, 0x81cc, 8)},    /* Dell Wireless 5816e */
        {QMI_FIXED_INTF(0x413c, 0x81d7, 0)},    /* Dell Wireless 5821e */
        {QMI_FIXED_INTF(0x413c, 0x81d7, 1)},    /* Dell Wireless 5821e preproduction config */
        {QMI_FIXED_INTF(0x413c, 0x81e0, 0)},    /* Dell Wireless 5821e with eSIM support*/
index 5c964fc..71b8e80 100644 (file)
@@ -35,8 +35,10 @@ int wg_packet_queue_init(struct crypt_queue *queue, work_func_t function,
                if (multicore) {
                        queue->worker = wg_packet_percpu_multicore_worker_alloc(
                                function, queue);
-                       if (!queue->worker)
+                       if (!queue->worker) {
+                               ptr_ring_cleanup(&queue->ring, NULL);
                                return -ENOMEM;
+                       }
                } else {
                        INIT_WORK(&queue->work, function);
                }
index da3b782..3bb5b9a 100644 (file)
@@ -226,21 +226,20 @@ void wg_packet_handshake_receive_worker(struct work_struct *work)
 static void keep_key_fresh(struct wg_peer *peer)
 {
        struct noise_keypair *keypair;
-       bool send = false;
+       bool send;
 
        if (peer->sent_lastminute_handshake)
                return;
 
        rcu_read_lock_bh();
        keypair = rcu_dereference_bh(peer->keypairs.current_keypair);
-       if (likely(keypair && READ_ONCE(keypair->sending.is_valid)) &&
-           keypair->i_am_the_initiator &&
-           unlikely(wg_birthdate_has_expired(keypair->sending.birthdate,
-                       REJECT_AFTER_TIME - KEEPALIVE_TIMEOUT - REKEY_TIMEOUT)))
-               send = true;
+       send = keypair && READ_ONCE(keypair->sending.is_valid) &&
+              keypair->i_am_the_initiator &&
+              wg_birthdate_has_expired(keypair->sending.birthdate,
+                       REJECT_AFTER_TIME - KEEPALIVE_TIMEOUT - REKEY_TIMEOUT);
        rcu_read_unlock_bh();
 
-       if (send) {
+       if (unlikely(send)) {
                peer->sent_lastminute_handshake = true;
                wg_packet_send_queued_handshake_initiation(peer, false);
        }
@@ -393,13 +392,11 @@ static void wg_packet_consume_data_done(struct wg_peer *peer,
                len = ntohs(ip_hdr(skb)->tot_len);
                if (unlikely(len < sizeof(struct iphdr)))
                        goto dishonest_packet_size;
-               if (INET_ECN_is_ce(PACKET_CB(skb)->ds))
-                       IP_ECN_set_ce(ip_hdr(skb));
+               INET_ECN_decapsulate(skb, PACKET_CB(skb)->ds, ip_hdr(skb)->tos);
        } else if (skb->protocol == htons(ETH_P_IPV6)) {
                len = ntohs(ipv6_hdr(skb)->payload_len) +
                      sizeof(struct ipv6hdr);
-               if (INET_ECN_is_ce(PACKET_CB(skb)->ds))
-                       IP6_ECN_set_ce(skb, ipv6_hdr(skb));
+               INET_ECN_decapsulate(skb, PACKET_CB(skb)->ds, ipv6_get_dsfield(ipv6_hdr(skb)));
        } else {
                goto dishonest_packet_type;
        }
@@ -518,6 +515,8 @@ void wg_packet_decrypt_worker(struct work_struct *work)
                                &PACKET_CB(skb)->keypair->receiving)) ?
                                PACKET_STATE_CRYPTED : PACKET_STATE_DEAD;
                wg_queue_enqueue_per_peer_napi(skb, state);
+               if (need_resched())
+                       cond_resched();
        }
 }
 
index bcd6462..007cd44 100644 (file)
@@ -120,9 +120,9 @@ bool __init wg_ratelimiter_selftest(void)
        enum { TRIALS_BEFORE_GIVING_UP = 5000 };
        bool success = false;
        int test = 0, trials;
-       struct sk_buff *skb4, *skb6;
+       struct sk_buff *skb4, *skb6 = NULL;
        struct iphdr *hdr4;
-       struct ipv6hdr *hdr6;
+       struct ipv6hdr *hdr6 = NULL;
 
        if (IS_ENABLED(CONFIG_KASAN) || IS_ENABLED(CONFIG_UBSAN))
                return true;
index 7348c10..6687db6 100644 (file)
@@ -124,20 +124,17 @@ void wg_packet_send_handshake_cookie(struct wg_device *wg,
 static void keep_key_fresh(struct wg_peer *peer)
 {
        struct noise_keypair *keypair;
-       bool send = false;
+       bool send;
 
        rcu_read_lock_bh();
        keypair = rcu_dereference_bh(peer->keypairs.current_keypair);
-       if (likely(keypair && READ_ONCE(keypair->sending.is_valid)) &&
-           (unlikely(atomic64_read(&keypair->sending.counter.counter) >
-                     REKEY_AFTER_MESSAGES) ||
-            (keypair->i_am_the_initiator &&
-             unlikely(wg_birthdate_has_expired(keypair->sending.birthdate,
-                                               REKEY_AFTER_TIME)))))
-               send = true;
+       send = keypair && READ_ONCE(keypair->sending.is_valid) &&
+              (atomic64_read(&keypair->sending.counter.counter) > REKEY_AFTER_MESSAGES ||
+               (keypair->i_am_the_initiator &&
+                wg_birthdate_has_expired(keypair->sending.birthdate, REKEY_AFTER_TIME)));
        rcu_read_unlock_bh();
 
-       if (send)
+       if (unlikely(send))
                wg_packet_send_queued_handshake_initiation(peer, false);
 }
 
@@ -281,6 +278,8 @@ void wg_packet_tx_worker(struct work_struct *work)
 
                wg_noise_keypair_put(keypair, false);
                wg_peer_put(peer);
+               if (need_resched())
+                       cond_resched();
        }
 }
 
@@ -304,7 +303,8 @@ void wg_packet_encrypt_worker(struct work_struct *work)
                }
                wg_queue_enqueue_per_peer(&PACKET_PEER(first)->tx_queue, first,
                                          state);
-
+               if (need_resched())
+                       cond_resched();
        }
 }
 
index b0d6541..f901802 100644 (file)
@@ -76,12 +76,6 @@ static int send4(struct wg_device *wg, struct sk_buff *skb,
                        net_dbg_ratelimited("%s: No route to %pISpfsc, error %d\n",
                                            wg->dev->name, &endpoint->addr, ret);
                        goto err;
-               } else if (unlikely(rt->dst.dev == skb->dev)) {
-                       ip_rt_put(rt);
-                       ret = -ELOOP;
-                       net_dbg_ratelimited("%s: Avoiding routing loop to %pISpfsc\n",
-                                           wg->dev->name, &endpoint->addr);
-                       goto err;
                }
                if (cache)
                        dst_cache_set_ip4(cache, &rt->dst, fl.saddr);
@@ -149,12 +143,6 @@ static int send6(struct wg_device *wg, struct sk_buff *skb,
                        net_dbg_ratelimited("%s: No route to %pISpfsc, error %d\n",
                                            wg->dev->name, &endpoint->addr, ret);
                        goto err;
-               } else if (unlikely(dst->dev == skb->dev)) {
-                       dst_release(dst);
-                       ret = -ELOOP;
-                       net_dbg_ratelimited("%s: Avoiding routing loop to %pISpfsc\n",
-                                           wg->dev->name, &endpoint->addr);
-                       goto err;
                }
                if (cache)
                        dst_cache_set_ip6(cache, dst, &fl.saddr);
index f2adea9..f3c037f 100644 (file)
@@ -1110,7 +1110,7 @@ static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, unsigned nsid,
                  * Don't treat an error as fatal, as we potentially already
                  * have a NGUID or EUI-64.
                  */
-               if (status > 0)
+               if (status > 0 && !(status & NVME_SC_DNR))
                        status = 0;
                goto free_data;
        }
index 4e79e41..e13c370 100644 (file)
@@ -973,9 +973,13 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx)
 
 static inline void nvme_update_cq_head(struct nvme_queue *nvmeq)
 {
-       if (++nvmeq->cq_head == nvmeq->q_depth) {
+       u16 tmp = nvmeq->cq_head + 1;
+
+       if (tmp == nvmeq->q_depth) {
                nvmeq->cq_head = 0;
                nvmeq->cq_phase ^= 1;
+       } else {
+               nvmeq->cq_head = tmp;
        }
 }
 
index 3708d43..393011a 100644 (file)
@@ -815,6 +815,13 @@ static const struct of_device_id qusb2_phy_of_match_table[] = {
        }, {
                .compatible     = "qcom,msm8998-qusb2-phy",
                .data           = &msm8998_phy_cfg,
+       }, {
+               /*
+                * Deprecated. Only here to support legacy device
+                * trees that didn't include "qcom,qusb2-v2-phy"
+                */
+               .compatible     = "qcom,sdm845-qusb2-phy",
+               .data           = &qusb2_v2_phy_cfg,
        }, {
                .compatible     = "qcom,qusb2-v2-phy",
                .data           = &qusb2_v2_phy_cfg,
index d998e65..a52a9bf 100644 (file)
@@ -160,18 +160,11 @@ static int qcom_snps_hsphy_power_on(struct phy *phy)
        ret = regulator_bulk_enable(VREG_NUM, priv->vregs);
        if (ret)
                return ret;
-       ret = clk_bulk_prepare_enable(priv->num_clks, priv->clks);
-       if (ret)
-               goto err_disable_regulator;
+
        qcom_snps_hsphy_disable_hv_interrupts(priv);
        qcom_snps_hsphy_exit_retention(priv);
 
        return 0;
-
-err_disable_regulator:
-       regulator_bulk_disable(VREG_NUM, priv->vregs);
-
-       return ret;
 }
 
 static int qcom_snps_hsphy_power_off(struct phy *phy)
@@ -180,7 +173,6 @@ static int qcom_snps_hsphy_power_off(struct phy *phy)
 
        qcom_snps_hsphy_enter_retention(priv);
        qcom_snps_hsphy_enable_hv_interrupts(priv);
-       clk_bulk_disable_unprepare(priv->num_clks, priv->clks);
        regulator_bulk_disable(VREG_NUM, priv->vregs);
 
        return 0;
@@ -266,21 +258,39 @@ static int qcom_snps_hsphy_init(struct phy *phy)
        struct hsphy_priv *priv = phy_get_drvdata(phy);
        int ret;
 
-       ret = qcom_snps_hsphy_reset(priv);
+       ret = clk_bulk_prepare_enable(priv->num_clks, priv->clks);
        if (ret)
                return ret;
 
+       ret = qcom_snps_hsphy_reset(priv);
+       if (ret)
+               goto disable_clocks;
+
        qcom_snps_hsphy_init_sequence(priv);
 
        ret = qcom_snps_hsphy_por_reset(priv);
        if (ret)
-               return ret;
+               goto disable_clocks;
+
+       return 0;
+
+disable_clocks:
+       clk_bulk_disable_unprepare(priv->num_clks, priv->clks);
+       return ret;
+}
+
+static int qcom_snps_hsphy_exit(struct phy *phy)
+{
+       struct hsphy_priv *priv = phy_get_drvdata(phy);
+
+       clk_bulk_disable_unprepare(priv->num_clks, priv->clks);
 
        return 0;
 }
 
 static const struct phy_ops qcom_snps_hsphy_ops = {
        .init = qcom_snps_hsphy_init,
+       .exit = qcom_snps_hsphy_exit,
        .power_on = qcom_snps_hsphy_power_on,
        .power_off = qcom_snps_hsphy_power_off,
        .set_mode = qcom_snps_hsphy_set_mode,
index b7f2c00..9c4af76 100644 (file)
@@ -52,28 +52,15 @@ static int cros_ec_sensorhub_register(struct device *dev,
        int sensor_type[MOTIONSENSE_TYPE_MAX] = { 0 };
        struct cros_ec_command *msg = sensorhub->msg;
        struct cros_ec_dev *ec = sensorhub->ec;
-       int ret, i, sensor_num;
+       int ret, i;
        char *name;
 
-       sensor_num = cros_ec_get_sensor_count(ec);
-       if (sensor_num < 0) {
-               dev_err(dev,
-                       "Unable to retrieve sensor information (err:%d)\n",
-                       sensor_num);
-               return sensor_num;
-       }
-
-       sensorhub->sensor_num = sensor_num;
-       if (sensor_num == 0) {
-               dev_err(dev, "Zero sensors reported.\n");
-               return -EINVAL;
-       }
 
        msg->version = 1;
        msg->insize = sizeof(struct ec_response_motion_sense);
        msg->outsize = sizeof(struct ec_params_motion_sense);
 
-       for (i = 0; i < sensor_num; i++) {
+       for (i = 0; i < sensorhub->sensor_num; i++) {
                sensorhub->params->cmd = MOTIONSENSE_CMD_INFO;
                sensorhub->params->info.sensor_num = i;
 
@@ -140,8 +127,7 @@ static int cros_ec_sensorhub_probe(struct platform_device *pdev)
        struct cros_ec_dev *ec = dev_get_drvdata(dev->parent);
        struct cros_ec_sensorhub *data;
        struct cros_ec_command *msg;
-       int ret;
-       int i;
+       int ret, i, sensor_num;
 
        msg = devm_kzalloc(dev, sizeof(struct cros_ec_command) +
                           max((u16)sizeof(struct ec_params_motion_sense),
@@ -166,10 +152,52 @@ static int cros_ec_sensorhub_probe(struct platform_device *pdev)
        dev_set_drvdata(dev, data);
 
        /* Check whether this EC is a sensor hub. */
-       if (cros_ec_check_features(data->ec, EC_FEATURE_MOTION_SENSE)) {
+       if (cros_ec_check_features(ec, EC_FEATURE_MOTION_SENSE)) {
+               sensor_num = cros_ec_get_sensor_count(ec);
+               if (sensor_num < 0) {
+                       dev_err(dev,
+                               "Unable to retrieve sensor information (err:%d)\n",
+                               sensor_num);
+                       return sensor_num;
+               }
+               if (sensor_num == 0) {
+                       dev_err(dev, "Zero sensors reported.\n");
+                       return -EINVAL;
+               }
+               data->sensor_num = sensor_num;
+
+               /*
+                * Prepare the ring handler before enumering the
+                * sensors.
+                */
+               if (cros_ec_check_features(ec, EC_FEATURE_MOTION_SENSE_FIFO)) {
+                       ret = cros_ec_sensorhub_ring_allocate(data);
+                       if (ret)
+                               return ret;
+               }
+
+               /* Enumerate the sensors.*/
                ret = cros_ec_sensorhub_register(dev, data);
                if (ret)
                        return ret;
+
+               /*
+                * When the EC does not have a FIFO, the sensors will query
+                * their data themselves via sysfs or a software trigger.
+                */
+               if (cros_ec_check_features(ec, EC_FEATURE_MOTION_SENSE_FIFO)) {
+                       ret = cros_ec_sensorhub_ring_add(data);
+                       if (ret)
+                               return ret;
+                       /*
+                        * The msg and its data is not under the control of the
+                        * ring handler.
+                        */
+                       return devm_add_action_or_reset(dev,
+                                       cros_ec_sensorhub_ring_remove,
+                                       data);
+               }
+
        } else {
                /*
                 * If the device has sensors but does not claim to
@@ -184,22 +212,6 @@ static int cros_ec_sensorhub_probe(struct platform_device *pdev)
                }
        }
 
-       /*
-        * If the EC does not have a FIFO, the sensors will query their data
-        * themselves via sysfs or a software trigger.
-        */
-       if (cros_ec_check_features(ec, EC_FEATURE_MOTION_SENSE_FIFO)) {
-               ret = cros_ec_sensorhub_ring_add(data);
-               if (ret)
-                       return ret;
-               /*
-                * The msg and its data is not under the control of the ring
-                * handler.
-                */
-               return devm_add_action_or_reset(dev,
-                                               cros_ec_sensorhub_ring_remove,
-                                               data);
-       }
 
        return 0;
 }
index c48e5b3..24e48d9 100644 (file)
@@ -957,17 +957,15 @@ static int cros_ec_sensorhub_event(struct notifier_block *nb,
 }
 
 /**
- * cros_ec_sensorhub_ring_add() - Add the FIFO functionality if the EC
- *                               supports it.
+ * cros_ec_sensorhub_ring_allocate() - Prepare the FIFO functionality if the EC
+ *                                    supports it.
  *
  * @sensorhub : Sensor Hub object.
  *
  * Return: 0 on success.
  */
-int cros_ec_sensorhub_ring_add(struct cros_ec_sensorhub *sensorhub)
+int cros_ec_sensorhub_ring_allocate(struct cros_ec_sensorhub *sensorhub)
 {
-       struct cros_ec_dev *ec = sensorhub->ec;
-       int ret;
        int fifo_info_length =
                sizeof(struct ec_response_motion_sense_fifo_info) +
                sizeof(u16) * sensorhub->sensor_num;
@@ -978,6 +976,49 @@ int cros_ec_sensorhub_ring_add(struct cros_ec_sensorhub *sensorhub)
        if (!sensorhub->fifo_info)
                return -ENOMEM;
 
+       /*
+        * Allocate the callback area based on the number of sensors.
+        * Add one for the sensor ring.
+        */
+       sensorhub->push_data = devm_kcalloc(sensorhub->dev,
+                       sensorhub->sensor_num,
+                       sizeof(*sensorhub->push_data),
+                       GFP_KERNEL);
+       if (!sensorhub->push_data)
+               return -ENOMEM;
+
+       sensorhub->tight_timestamps = cros_ec_check_features(
+                       sensorhub->ec,
+                       EC_FEATURE_MOTION_SENSE_TIGHT_TIMESTAMPS);
+
+       if (sensorhub->tight_timestamps) {
+               sensorhub->batch_state = devm_kcalloc(sensorhub->dev,
+                               sensorhub->sensor_num,
+                               sizeof(*sensorhub->batch_state),
+                               GFP_KERNEL);
+               if (!sensorhub->batch_state)
+                       return -ENOMEM;
+       }
+
+       return 0;
+}
+
+/**
+ * cros_ec_sensorhub_ring_add() - Add the FIFO functionality if the EC
+ *                               supports it.
+ *
+ * @sensorhub : Sensor Hub object.
+ *
+ * Return: 0 on success.
+ */
+int cros_ec_sensorhub_ring_add(struct cros_ec_sensorhub *sensorhub)
+{
+       struct cros_ec_dev *ec = sensorhub->ec;
+       int ret;
+       int fifo_info_length =
+               sizeof(struct ec_response_motion_sense_fifo_info) +
+               sizeof(u16) * sensorhub->sensor_num;
+
        /* Retrieve FIFO information */
        sensorhub->msg->version = 2;
        sensorhub->params->cmd = MOTIONSENSE_CMD_FIFO_INFO;
@@ -998,31 +1039,9 @@ int cros_ec_sensorhub_ring_add(struct cros_ec_sensorhub *sensorhub)
        if (!sensorhub->ring)
                return -ENOMEM;
 
-       /*
-        * Allocate the callback area based on the number of sensors.
-        */
-       sensorhub->push_data = devm_kcalloc(
-                       sensorhub->dev, sensorhub->sensor_num,
-                       sizeof(*sensorhub->push_data),
-                       GFP_KERNEL);
-       if (!sensorhub->push_data)
-               return -ENOMEM;
-
        sensorhub->fifo_timestamp[CROS_EC_SENSOR_LAST_TS] =
                cros_ec_get_time_ns();
 
-       sensorhub->tight_timestamps = cros_ec_check_features(
-                       ec, EC_FEATURE_MOTION_SENSE_TIGHT_TIMESTAMPS);
-
-       if (sensorhub->tight_timestamps) {
-               sensorhub->batch_state = devm_kcalloc(sensorhub->dev,
-                               sensorhub->sensor_num,
-                               sizeof(*sensorhub->batch_state),
-                               GFP_KERNEL);
-               if (!sensorhub->batch_state)
-                       return -ENOMEM;
-       }
-
        /* Register the notifier that will act as a top half interrupt. */
        sensorhub->notifier.notifier_call = cros_ec_sensorhub_event;
        ret = blocking_notifier_chain_register(&ec->ec_dev->event_notifier,
index c340505..7486f6e 100644 (file)
@@ -5754,10 +5754,6 @@ static DECLARE_DELAYED_WORK(regulator_init_complete_work,
 
 static int __init regulator_init_complete(void)
 {
-       int delay = driver_deferred_probe_timeout;
-
-       if (delay < 0)
-               delay = 0;
        /*
         * Since DT doesn't provide an idiomatic mechanism for
         * enabling full constraints and since it's much more natural
@@ -5768,17 +5764,18 @@ static int __init regulator_init_complete(void)
                has_full_constraints = true;
 
        /*
-        * If driver_deferred_probe_timeout is set, we punt
-        * completion for that many seconds since systems like
-        * distros will load many drivers from userspace so consumers
-        * might not always be ready yet, this is particularly an
-        * issue with laptops where this might bounce the display off
-        * then on.  Ideally we'd get a notification from userspace
-        * when this happens but we don't so just wait a bit and hope
-        * we waited long enough.  It'd be better if we'd only do
-        * this on systems that need it.
+        * We punt completion for an arbitrary amount of time since
+        * systems like distros will load many drivers from userspace
+        * so consumers might not always be ready yet, this is
+        * particularly an issue with laptops where this might bounce
+        * the display off then on.  Ideally we'd get a notification
+        * from userspace when this happens but we don't so just wait
+        * a bit and hope we waited long enough.  It'd be better if
+        * we'd only do this on systems that need it, and a kernel
+        * command line option might be useful.
         */
-       schedule_delayed_work(&regulator_init_complete_work, delay * HZ);
+       schedule_delayed_work(&regulator_init_complete_work,
+                             msecs_to_jiffies(30000));
 
        return 0;
 }
index f768946..569966b 100644 (file)
@@ -6717,17 +6717,17 @@ int qeth_stop(struct net_device *dev)
                unsigned int i;
 
                /* Quiesce the NAPI instances: */
-               qeth_for_each_output_queue(card, queue, i) {
+               qeth_for_each_output_queue(card, queue, i)
                        napi_disable(&queue->napi);
-                       del_timer_sync(&queue->timer);
-               }
 
                /* Stop .ndo_start_xmit, might still access queue->napi. */
                netif_tx_disable(dev);
 
-               /* Queues may get re-allocated, so remove the NAPIs here. */
-               qeth_for_each_output_queue(card, queue, i)
+               qeth_for_each_output_queue(card, queue, i) {
+                       del_timer_sync(&queue->timer);
+                       /* Queues may get re-allocated, so remove the NAPIs. */
                        netif_napi_del(&queue->napi);
+               }
        } else {
                netif_tx_disable(dev);
        }
index 8af2162..21b7cb6 100644 (file)
@@ -410,9 +410,7 @@ static void dax_unlock_pages(struct dax_ctx *ctx, int ccb_index, int nelem)
 
                        if (p) {
                                dax_dbg("freeing page %p", p);
-                               if (j == OUT)
-                                       set_page_dirty(p);
-                               put_page(p);
+                               unpin_user_pages_dirty_lock(&p, 1, j == OUT);
                                ctx->pages[i][j] = NULL;
                        }
                }
@@ -425,13 +423,13 @@ static int dax_lock_page(void *va, struct page **p)
 
        dax_dbg("uva %p", va);
 
-       ret = get_user_pages_fast((unsigned long)va, 1, FOLL_WRITE, p);
+       ret = pin_user_pages_fast((unsigned long)va, 1, FOLL_WRITE, p);
        if (ret == 1) {
                dax_dbg("locked page %p, for VA %p", *p, va);
                return 0;
        }
 
-       dax_dbg("get_user_pages failed, va=%p, ret=%d", va, ret);
+       dax_dbg("pin_user_pages failed, va=%p, ret=%d", va, ret);
        return -1;
 }
 
index 7da9e06..635f6f9 100644 (file)
@@ -3640,6 +3640,11 @@ static void ibmvfc_tgt_implicit_logout_and_del(struct ibmvfc_target *tgt)
        struct ibmvfc_host *vhost = tgt->vhost;
        struct ibmvfc_event *evt;
 
+       if (!vhost->logged_in) {
+               ibmvfc_set_tgt_action(tgt, IBMVFC_TGT_ACTION_DEL_RPORT);
+               return;
+       }
+
        if (vhost->discovery_threads >= disc_threads)
                return;
 
index 7f66a77..59f0f10 100644 (file)
@@ -2320,16 +2320,12 @@ static int ibmvscsi_probe(struct vio_dev *vdev, const struct vio_device_id *id)
 static int ibmvscsi_remove(struct vio_dev *vdev)
 {
        struct ibmvscsi_host_data *hostdata = dev_get_drvdata(&vdev->dev);
-       unsigned long flags;
 
        srp_remove_host(hostdata->host);
        scsi_remove_host(hostdata->host);
 
        purge_requests(hostdata, DID_ERROR);
-
-       spin_lock_irqsave(hostdata->host->host_lock, flags);
        release_event_pool(&hostdata->pool, hostdata);
-       spin_unlock_irqrestore(hostdata->host->host_lock, flags);
 
        ibmvscsi_release_crq_queue(&hostdata->queue, hostdata,
                                        max_events);
index 97cabd7..3325596 100644 (file)
@@ -3031,11 +3031,11 @@ qla24xx_vport_delete(struct fc_vport *fc_vport)
            test_bit(FCPORT_UPDATE_NEEDED, &vha->dpc_flags))
                msleep(1000);
 
-       qla_nvme_delete(vha);
 
        qla24xx_disable_vp(vha);
        qla2x00_wait_for_sess_deletion(vha);
 
+       qla_nvme_delete(vha);
        vha->flags.delete_progress = 1;
 
        qlt_remove_target(ha, vha);
index 4ed9043..d6c991b 100644 (file)
@@ -3153,7 +3153,7 @@ qla24xx_abort_command(srb_t *sp)
        ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x108c,
            "Entered %s.\n", __func__);
 
-       if (vha->flags.qpairs_available && sp->qpair)
+       if (sp->qpair)
                req = sp->qpair->req;
        else
                return QLA_FUNCTION_FAILED;
index 8e0575f..67325fb 100644 (file)
@@ -925,6 +925,10 @@ do_map_region(const struct gasket_dev *gasket_dev, struct vm_area_struct *vma,
                gasket_get_bar_index(gasket_dev,
                                     (vma->vm_pgoff << PAGE_SHIFT) +
                                     driver_desc->legacy_mmap_address_offset);
+
+       if (bar_index < 0)
+               return DO_MAP_REGION_INVALID;
+
        phys_base = gasket_dev->bar_data[bar_index].phys_base + phys_offset;
        while (mapped_bytes < map_length) {
                /*
index 87a6dac..ab6f391 100644 (file)
@@ -30,5 +30,4 @@ Now the TODOs:
 
 Please send any patches to:
 Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-Wolfram Sang <wsa@the-dreams.de>
 Linux Driver Project Developer List <driverdev-devel@linuxdriverproject.org>
index 3d084ce..50c7534 100644 (file)
@@ -182,6 +182,9 @@ static int usb4_switch_op(struct tb_switch *sw, u16 opcode, u8 *status)
                return ret;
 
        ret = tb_sw_read(sw, &val, TB_CFG_SWITCH, ROUTER_CS_26, 1);
+       if (ret)
+               return ret;
+
        if (val & ROUTER_CS_26_ONS)
                return -EOPNOTSUPP;
 
index ed0aa5c..5674da2 100644 (file)
@@ -843,10 +843,8 @@ static int bcm_uart_probe(struct platform_device *pdev)
        if (IS_ERR(clk) && pdev->dev.of_node)
                clk = of_clk_get(pdev->dev.of_node, 0);
 
-       if (IS_ERR(clk)) {
-               clk_put(clk);
+       if (IS_ERR(clk))
                return -ENODEV;
-       }
 
        port->iotype = UPIO_MEM;
        port->irq = res_irq->start;
index ac137b6..35e9e8f 100644 (file)
@@ -1459,6 +1459,7 @@ static int cdns_uart_probe(struct platform_device *pdev)
                cdns_uart_uart_driver.nr = CDNS_UART_NR_PORTS;
 #ifdef CONFIG_SERIAL_XILINX_PS_UART_CONSOLE
                cdns_uart_uart_driver.cons = &cdns_uart_console;
+               cdns_uart_console.index = id;
 #endif
 
                rc = uart_register_driver(&cdns_uart_uart_driver);
index e5ffed7..48a8199 100644 (file)
@@ -365,9 +365,14 @@ static struct uni_screen *vc_uniscr_alloc(unsigned int cols, unsigned int rows)
        return uniscr;
 }
 
+static void vc_uniscr_free(struct uni_screen *uniscr)
+{
+       vfree(uniscr);
+}
+
 static void vc_uniscr_set(struct vc_data *vc, struct uni_screen *new_uniscr)
 {
-       vfree(vc->vc_uni_screen);
+       vc_uniscr_free(vc->vc_uni_screen);
        vc->vc_uni_screen = new_uniscr;
 }
 
@@ -1230,7 +1235,7 @@ static int vc_do_resize(struct tty_struct *tty, struct vc_data *vc,
        err = resize_screen(vc, new_cols, new_rows, user);
        if (err) {
                kfree(newscreen);
-               kfree(new_uniscr);
+               vc_uniscr_free(new_uniscr);
                return err;
        }
 
index af648ba..4610545 100644 (file)
@@ -114,7 +114,7 @@ static int ci_hdrc_msm_notify_event(struct ci_hdrc *ci, unsigned event)
                        hw_write_id_reg(ci, HS_PHY_GENCONFIG_2,
                                        HS_PHY_ULPI_TX_PKT_EN_CLR_FIX, 0);
 
-               if (!IS_ERR(ci->platdata->vbus_extcon.edev)) {
+               if (!IS_ERR(ci->platdata->vbus_extcon.edev) || ci->role_switch) {
                        hw_write_id_reg(ci, HS_PHY_GENCONFIG_2,
                                        HS_PHY_SESS_VLD_CTRL_EN,
                                        HS_PHY_SESS_VLD_CTRL_EN);
index 6833c91..b9db981 100644 (file)
@@ -217,6 +217,7 @@ static int usbdev_mmap(struct file *file, struct vm_area_struct *vma)
 {
        struct usb_memory *usbm = NULL;
        struct usb_dev_state *ps = file->private_data;
+       struct usb_hcd *hcd = bus_to_hcd(ps->dev->bus);
        size_t size = vma->vm_end - vma->vm_start;
        void *mem;
        unsigned long flags;
@@ -250,9 +251,7 @@ static int usbdev_mmap(struct file *file, struct vm_area_struct *vma)
        usbm->vma_use_count = 1;
        INIT_LIST_HEAD(&usbm->memlist);
 
-       if (remap_pfn_range(vma, vma->vm_start,
-                       virt_to_phys(usbm->mem) >> PAGE_SHIFT,
-                       size, vma->vm_page_prot) < 0) {
+       if (dma_mmap_coherent(hcd->self.sysdev, vma, mem, dma_handle, size)) {
                dec_usb_memory_use_count(usbm, &usbm->vma_use_count);
                return -EAGAIN;
        }
index a48678a..6197938 100644 (file)
@@ -1144,11 +1144,11 @@ void usb_disable_endpoint(struct usb_device *dev, unsigned int epaddr,
 
        if (usb_endpoint_out(epaddr)) {
                ep = dev->ep_out[epnum];
-               if (reset_hardware)
+               if (reset_hardware && epnum != 0)
                        dev->ep_out[epnum] = NULL;
        } else {
                ep = dev->ep_in[epnum];
-               if (reset_hardware)
+               if (reset_hardware && epnum != 0)
                        dev->ep_in[epnum] = NULL;
        }
        if (ep) {
index ffd9841..d63072f 100644 (file)
@@ -1138,8 +1138,8 @@ static void garmin_read_process(struct garmin_data *garmin_data_p,
                   send it directly to the tty port */
                if (garmin_data_p->flags & FLAGS_QUEUING) {
                        pkt_add(garmin_data_p, data, data_length);
-               } else if (bulk_data ||
-                          getLayerId(data) == GARMIN_LAYERID_APPL) {
+               } else if (bulk_data || (data_length >= sizeof(u32) &&
+                               getLayerId(data) == GARMIN_LAYERID_APPL)) {
 
                        spin_lock_irqsave(&garmin_data_p->lock, flags);
                        garmin_data_p->flags |= APP_RESP_SEEN;
index 613f91a..ce0401d 100644 (file)
@@ -173,6 +173,7 @@ static const struct usb_device_id id_table[] = {
        {DEVICE_SWI(0x413c, 0x81b3)},   /* Dell Wireless 5809e Gobi(TM) 4G LTE Mobile Broadband Card (rev3) */
        {DEVICE_SWI(0x413c, 0x81b5)},   /* Dell Wireless 5811e QDL */
        {DEVICE_SWI(0x413c, 0x81b6)},   /* Dell Wireless 5811e QDL */
+       {DEVICE_SWI(0x413c, 0x81cc)},   /* Dell Wireless 5816e */
        {DEVICE_SWI(0x413c, 0x81cf)},   /* Dell Wireless 5819 */
        {DEVICE_SWI(0x413c, 0x81d0)},   /* Dell Wireless 5819 */
        {DEVICE_SWI(0x413c, 0x81d1)},   /* Dell Wireless 5818 */
index 1b23741..37157ed 100644 (file)
  * and don't forget to CC: the USB development list <linux-usb@vger.kernel.org>
  */
 
+/* Reported-by: Julian Groß <julian.g@posteo.de> */
+UNUSUAL_DEV(0x059f, 0x105f, 0x0000, 0x9999,
+               "LaCie",
+               "2Big Quadra USB3",
+               USB_SC_DEVICE, USB_PR_DEVICE, NULL,
+               US_FL_NO_REPORT_OPCODES),
+
 /*
  * Apricorn USB3 dongle sometimes returns "USBSUSBSUSBS" in response to SCSI
  * commands in UAS mode.  Observed with the 1.28 firmware; are there others?
index f5c5e0a..67c5139 100644 (file)
@@ -157,6 +157,10 @@ pmc_usb_mux_dp(struct pmc_usb_port *port, struct typec_mux_state *state)
        req.mode_data |= (state->mode - TYPEC_STATE_MODAL) <<
                         PMC_USB_ALTMODE_DP_MODE_SHIFT;
 
+       if (data->status & DP_STATUS_HPD_STATE)
+               req.mode_data |= PMC_USB_DP_HPD_LVL <<
+                                PMC_USB_ALTMODE_DP_MODE_SHIFT;
+
        return pmc_usb_command(port, (void *)&req, sizeof(req));
 }
 
@@ -298,11 +302,11 @@ static int pmc_usb_register_port(struct pmc_usb *pmc, int index,
        struct typec_mux_desc mux_desc = { };
        int ret;
 
-       ret = fwnode_property_read_u8(fwnode, "usb2-port", &port->usb2_port);
+       ret = fwnode_property_read_u8(fwnode, "usb2-port-number", &port->usb2_port);
        if (ret)
                return ret;
 
-       ret = fwnode_property_read_u8(fwnode, "usb3-port", &port->usb3_port);
+       ret = fwnode_property_read_u8(fwnode, "usb3-port-number", &port->usb3_port);
        if (ret)
                return ret;
 
index 0716a9c..fb4e944 100644 (file)
@@ -181,14 +181,14 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
                        break;
                }
 
-               vhost_add_used(vq, head, sizeof(pkt->hdr) + payload_len);
-               added = true;
-
-               /* Deliver to monitoring devices all correctly transmitted
-                * packets.
+               /* Deliver to monitoring devices all packets that we
+                * will transmit.
                 */
                virtio_transport_deliver_tap_pkt(pkt);
 
+               vhost_add_used(vq, head, sizeof(pkt->hdr) + payload_len);
+               added = true;
+
                pkt->off += payload_len;
                total_len += payload_len;
 
@@ -196,6 +196,12 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
                 * to send it with the next available buffer.
                 */
                if (pkt->off < pkt->len) {
+                       /* We are queueing the same virtio_vsock_pkt to handle
+                        * the remaining bytes, and we want to deliver it
+                        * to monitoring devices in the next iteration.
+                        */
+                       pkt->tap_delivered = false;
+
                        spin_lock_bh(&vsock->send_pkt_list_lock);
                        list_add(&pkt->list, &vsock->send_pkt_list);
                        spin_unlock_bh(&vsock->send_pkt_list_lock);
index 185db76..5f3aa4d 100644 (file)
@@ -2749,7 +2749,7 @@ int ceph_try_get_caps(struct inode *inode, int need, int want,
 
        ret = try_get_cap_refs(inode, need, want, 0, flags, got);
        /* three special error codes */
-       if (ret == -EAGAIN || ret == -EFBIG || ret == -EAGAIN)
+       if (ret == -EAGAIN || ret == -EFBIG || ret == -ESTALE)
                ret = 0;
        return ret;
 }
@@ -3746,6 +3746,7 @@ retry:
                WARN_ON(1);
                tsession = NULL;
                target = -1;
+               mutex_lock(&session->s_mutex);
        }
        goto retry;
 
index 481ac97..dcaed75 100644 (file)
@@ -271,7 +271,7 @@ void ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
                                    &congestion_kb_fops);
 
        snprintf(name, sizeof(name), "../../bdi/%s",
-                dev_name(fsc->sb->s_bdi->dev));
+                bdi_dev_name(fsc->sb->s_bdi));
        fsc->debugfs_bdi =
                debugfs_create_symlink("bdi",
                                       fsc->client->debugfs_dir,
index 486f91f..7c63abf 100644 (file)
@@ -3251,8 +3251,7 @@ static void handle_session(struct ceph_mds_session *session,
        void *end = p + msg->front.iov_len;
        struct ceph_mds_session_head *h;
        u32 op;
-       u64 seq;
-       unsigned long features = 0;
+       u64 seq, features = 0;
        int wake = 0;
        bool blacklisted = false;
 
@@ -3271,9 +3270,8 @@ static void handle_session(struct ceph_mds_session *session,
                        goto bad;
                /* version >= 3, feature bits */
                ceph_decode_32_safe(&p, end, len, bad);
-               ceph_decode_need(&p, end, len, bad);
-               memcpy(&features, p, min_t(size_t, len, sizeof(features)));
-               p += len;
+               ceph_decode_64_safe(&p, end, features, bad);
+               p += len - sizeof(features);
        }
 
        mutex_lock(&mdsc->mutex);
index de56dee..19507e2 100644 (file)
@@ -159,8 +159,8 @@ static struct inode *lookup_quotarealm_inode(struct ceph_mds_client *mdsc,
        }
 
        if (IS_ERR(in)) {
-               pr_warn("Can't lookup inode %llx (err: %ld)\n",
-                       realm->ino, PTR_ERR(in));
+               dout("Can't lookup inode %llx (err: %ld)\n",
+                    realm->ino, PTR_ERR(in));
                qri->timeout = jiffies + msecs_to_jiffies(60 * 1000); /* XXX */
        } else {
                qri->timeout = 0;
index cf7b7e1..cb73365 100644 (file)
@@ -1519,6 +1519,7 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
                spin_lock(&configfs_dirent_lock);
                configfs_detach_rollback(dentry);
                spin_unlock(&configfs_dirent_lock);
+               config_item_put(parent_item);
                return -EINTR;
        }
        frag->frag_dead = true;
index 408418e..478a0d8 100644 (file)
@@ -788,6 +788,14 @@ void do_coredump(const kernel_siginfo_t *siginfo)
        if (displaced)
                put_files_struct(displaced);
        if (!dump_interrupted()) {
+               /*
+                * umh disabled with CONFIG_STATIC_USERMODEHELPER_PATH="" would
+                * have this set to NULL.
+                */
+               if (!cprm.file) {
+                       pr_info("Core dump to |%s disabled\n", cn.corename);
+                       goto close_fail;
+               }
                file_start_write(cprm.file);
                core_dumped = binfmt->core_dump(&cprm);
                file_end_write(cprm.file);
index 8c59664..aba03ee 100644 (file)
@@ -1171,6 +1171,10 @@ static inline bool chain_epi_lockless(struct epitem *epi)
 {
        struct eventpoll *ep = epi->ep;
 
+       /* Fast preliminary check */
+       if (epi->next != EP_UNACTIVE_PTR)
+               return false;
+
        /* Check that the same epi has not been just chained from another CPU */
        if (cmpxchg(&epi->next, EP_UNACTIVE_PTR, NULL) != EP_UNACTIVE_PTR)
                return false;
@@ -1237,16 +1241,12 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v
         * chained in ep->ovflist and requeued later on.
         */
        if (READ_ONCE(ep->ovflist) != EP_UNACTIVE_PTR) {
-               if (epi->next == EP_UNACTIVE_PTR &&
-                   chain_epi_lockless(epi))
+               if (chain_epi_lockless(epi))
+                       ep_pm_stay_awake_rcu(epi);
+       } else if (!ep_is_linked(epi)) {
+               /* In the usual case, add event to ready list. */
+               if (list_add_tail_lockless(&epi->rdllink, &ep->rdllist))
                        ep_pm_stay_awake_rcu(epi);
-               goto out_unlock;
-       }
-
-       /* If this file is already in the ready list we exit soon */
-       if (!ep_is_linked(epi) &&
-           list_add_tail_lockless(&epi->rdllink, &ep->rdllist)) {
-               ep_pm_stay_awake_rcu(epi);
        }
 
        /*
@@ -1822,7 +1822,6 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
 {
        int res = 0, eavail, timed_out = 0;
        u64 slack = 0;
-       bool waiter = false;
        wait_queue_entry_t wait;
        ktime_t expires, *to = NULL;
 
@@ -1867,21 +1866,23 @@ fetch_events:
         */
        ep_reset_busy_poll_napi_id(ep);
 
-       /*
-        * We don't have any available event to return to the caller.  We need
-        * to sleep here, and we will be woken by ep_poll_callback() when events
-        * become available.
-        */
-       if (!waiter) {
-               waiter = true;
-               init_waitqueue_entry(&wait, current);
-
+       do {
+               /*
+                * Internally init_wait() uses autoremove_wake_function(),
+                * thus wait entry is removed from the wait queue on each
+                * wakeup. Why it is important? In case of several waiters
+                * each new wakeup will hit the next waiter, giving it the
+                * chance to harvest new event. Otherwise wakeup can be
+                * lost. This is also good performance-wise, because on
+                * normal wakeup path no need to call __remove_wait_queue()
+                * explicitly, thus ep->lock is not taken, which halts the
+                * event delivery.
+                */
+               init_wait(&wait);
                write_lock_irq(&ep->lock);
                __add_wait_queue_exclusive(&ep->wq, &wait);
                write_unlock_irq(&ep->lock);
-       }
 
-       for (;;) {
                /*
                 * We don't want to sleep if the ep_poll_callback() sends us
                 * a wakeup in between. That's why we set the task state
@@ -1911,10 +1912,20 @@ fetch_events:
                        timed_out = 1;
                        break;
                }
-       }
+
+               /* We were woken up, thus go and try to harvest some events */
+               eavail = 1;
+
+       } while (0);
 
        __set_current_state(TASK_RUNNING);
 
+       if (!list_empty_careful(&wait.entry)) {
+               write_lock_irq(&ep->lock);
+               __remove_wait_queue(&ep->wq, &wait);
+               write_unlock_irq(&ep->lock);
+       }
+
 send_events:
        /*
         * Try to transfer events to user space. In case we get 0 events and
@@ -1925,12 +1936,6 @@ send_events:
            !(res = ep_send_events(ep, events, maxevents)) && !timed_out)
                goto fetch_events;
 
-       if (waiter) {
-               write_lock_irq(&ep->lock);
-               __remove_wait_queue(&ep->wq, &wait);
-               write_unlock_irq(&ep->lock);
-       }
-
        return res;
 }
 
index 936a8ec..6306eaa 100644 (file)
@@ -528,10 +528,12 @@ lower_metapath:
 
                /* Advance in metadata tree. */
                (mp->mp_list[hgt])++;
-               if (mp->mp_list[hgt] >= sdp->sd_inptrs) {
-                       if (!hgt)
+               if (hgt) {
+                       if (mp->mp_list[hgt] >= sdp->sd_inptrs)
+                               goto lower_metapath;
+               } else {
+                       if (mp->mp_list[hgt] >= sdp->sd_diptrs)
                                break;
-                       goto lower_metapath;
                }
 
 fill_up_metapath:
@@ -876,10 +878,9 @@ static int gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length,
                                        ret = -ENOENT;
                                        goto unlock;
                                } else {
-                                       /* report a hole */
                                        iomap->offset = pos;
                                        iomap->length = length;
-                                       goto do_alloc;
+                                       goto hole_found;
                                }
                        }
                        iomap->length = size;
@@ -933,8 +934,6 @@ unlock:
        return ret;
 
 do_alloc:
-       iomap->addr = IOMAP_NULL_ADDR;
-       iomap->type = IOMAP_HOLE;
        if (flags & IOMAP_REPORT) {
                if (pos >= size)
                        ret = -ENOENT;
@@ -956,6 +955,9 @@ do_alloc:
                if (pos < size && height == ip->i_height)
                        ret = gfs2_hole_size(inode, lblock, len, mp, iomap);
        }
+hole_found:
+       iomap->addr = IOMAP_NULL_ADDR;
+       iomap->type = IOMAP_HOLE;
        goto out;
 }
 
index 29f9b66..bf70e3b 100644 (file)
@@ -613,7 +613,7 @@ __acquires(&gl->gl_lockref.lock)
                                fs_err(sdp, "Error %d syncing glock \n", ret);
                                gfs2_dump_glock(NULL, gl, true);
                        }
-                       return;
+                       goto skip_inval;
                }
        }
        if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags)) {
@@ -633,6 +633,7 @@ __acquires(&gl->gl_lockref.lock)
                clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags);
        }
 
+skip_inval:
        gfs2_glock_hold(gl);
        /*
         * Check for an error encountered since we called go_sync and go_inval.
@@ -722,9 +723,6 @@ __acquires(&gl->gl_lockref.lock)
                        goto out_unlock;
                if (nonblock)
                        goto out_sched;
-               smp_mb();
-               if (atomic_read(&gl->gl_revokes) != 0)
-                       goto out_sched;
                set_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags);
                GLOCK_BUG_ON(gl, gl->gl_demote_state == LM_ST_EXCLUSIVE);
                gl->gl_target = gl->gl_demote_state;
index 70b2d3a..5acd3ce 100644 (file)
@@ -622,7 +622,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
                                error = finish_no_open(file, NULL);
                }
                gfs2_glock_dq_uninit(ghs);
-               return error;
+               goto fail;
        } else if (error != -ENOENT) {
                goto fail_gunlock;
        }
@@ -764,9 +764,11 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
                error = finish_open(file, dentry, gfs2_open_common);
        }
        gfs2_glock_dq_uninit(ghs);
+       gfs2_qa_put(ip);
        gfs2_glock_dq_uninit(ghs + 1);
        clear_bit(GLF_INODE_CREATING, &io_gl->gl_flags);
        gfs2_glock_put(io_gl);
+       gfs2_qa_put(dip);
        return error;
 
 fail_gunlock3:
@@ -776,7 +778,6 @@ fail_gunlock2:
        clear_bit(GLF_INODE_CREATING, &io_gl->gl_flags);
        gfs2_glock_put(io_gl);
 fail_free_inode:
-       gfs2_qa_put(ip);
        if (ip->i_gl) {
                glock_clear_object(ip->i_gl, ip);
                gfs2_glock_put(ip->i_gl);
@@ -1005,7 +1006,7 @@ out_gunlock:
 out_child:
        gfs2_glock_dq(ghs);
 out_parent:
-       gfs2_qa_put(ip);
+       gfs2_qa_put(dip);
        gfs2_holder_uninit(ghs);
        gfs2_holder_uninit(ghs + 1);
        return error;
index 3a75843..0644e58 100644 (file)
@@ -669,13 +669,13 @@ void gfs2_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
        struct buffer_head *bh = bd->bd_bh;
        struct gfs2_glock *gl = bd->bd_gl;
 
+       sdp->sd_log_num_revoke++;
+       if (atomic_inc_return(&gl->gl_revokes) == 1)
+               gfs2_glock_hold(gl);
        bh->b_private = NULL;
        bd->bd_blkno = bh->b_blocknr;
        gfs2_remove_from_ail(bd); /* drops ref on bh */
        bd->bd_bh = NULL;
-       sdp->sd_log_num_revoke++;
-       if (atomic_inc_return(&gl->gl_revokes) == 1)
-               gfs2_glock_hold(gl);
        set_bit(GLF_LFLUSH, &gl->gl_flags);
        list_add(&bd->bd_list, &sdp->sd_log_revokes);
 }
@@ -1131,6 +1131,10 @@ int gfs2_logd(void *data)
 
        while (!kthread_should_stop()) {
 
+               if (gfs2_withdrawn(sdp)) {
+                       msleep_interruptible(HZ);
+                       continue;
+               }
                /* Check for errors writing to the journal */
                if (sdp->sd_log_error) {
                        gfs2_lm(sdp,
@@ -1139,6 +1143,7 @@ int gfs2_logd(void *data)
                                "prevent further damage.\n",
                                sdp->sd_fsname, sdp->sd_log_error);
                        gfs2_withdraw(sdp);
+                       continue;
                }
 
                did_flush = false;
index 5ea9675..48b54ec 100644 (file)
@@ -263,7 +263,7 @@ static struct bio *gfs2_log_alloc_bio(struct gfs2_sbd *sdp, u64 blkno,
        struct super_block *sb = sdp->sd_vfs;
        struct bio *bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES);
 
-       bio->bi_iter.bi_sector = blkno << (sb->s_blocksize_bits - 9);
+       bio->bi_iter.bi_sector = blkno << sdp->sd_fsb2bb_shift;
        bio_set_dev(bio, sb->s_bdev);
        bio->bi_end_io = end_io;
        bio->bi_private = sdp;
@@ -509,7 +509,7 @@ int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head,
        unsigned int bsize = sdp->sd_sb.sb_bsize, off;
        unsigned int bsize_shift = sdp->sd_sb.sb_bsize_shift;
        unsigned int shift = PAGE_SHIFT - bsize_shift;
-       unsigned int readahead_blocks = BIO_MAX_PAGES << shift;
+       unsigned int max_bio_size = 2 * 1024 * 1024;
        struct gfs2_journal_extent *je;
        int sz, ret = 0;
        struct bio *bio = NULL;
@@ -537,12 +537,17 @@ int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head,
                                off = 0;
                        }
 
-                       if (!bio || (bio_chained && !off)) {
+                       if (!bio || (bio_chained && !off) ||
+                           bio->bi_iter.bi_size >= max_bio_size) {
                                /* start new bio */
                        } else {
-                               sz = bio_add_page(bio, page, bsize, off);
-                               if (sz == bsize)
-                                       goto block_added;
+                               sector_t sector = dblock << sdp->sd_fsb2bb_shift;
+
+                               if (bio_end_sector(bio) == sector) {
+                                       sz = bio_add_page(bio, page, bsize, off);
+                                       if (sz == bsize)
+                                               goto block_added;
+                               }
                                if (off) {
                                        unsigned int blocks =
                                                (PAGE_SIZE - off) >> bsize_shift;
@@ -568,7 +573,7 @@ block_added:
                        off += bsize;
                        if (off == PAGE_SIZE)
                                page = NULL;
-                       if (blocks_submitted < blocks_read + readahead_blocks) {
+                       if (blocks_submitted < 2 * max_bio_size >> bsize_shift) {
                                /* Keep at least one bio in flight */
                                continue;
                        }
index 4b72abc..9856cc2 100644 (file)
@@ -252,7 +252,7 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags,
        int num = 0;
 
        if (unlikely(gfs2_withdrawn(sdp)) &&
-           (!sdp->sd_jdesc || (blkno != sdp->sd_jdesc->jd_no_addr))) {
+           (!sdp->sd_jdesc || gl != sdp->sd_jinode_gl)) {
                *bhp = NULL;
                return -EIO;
        }
index cc0c4b5..8259fef 100644 (file)
@@ -1051,8 +1051,7 @@ int gfs2_quota_lock(struct gfs2_inode *ip, kuid_t uid, kgid_t gid)
        u32 x;
        int error = 0;
 
-       if (capable(CAP_SYS_RESOURCE) ||
-           sdp->sd_args.ar_quota != GFS2_QUOTA_ON)
+       if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON)
                return 0;
 
        error = gfs2_quota_hold(ip, uid, gid);
@@ -1125,7 +1124,7 @@ void gfs2_quota_unlock(struct gfs2_inode *ip)
        int found;
 
        if (!test_and_clear_bit(GIF_QD_LOCKED, &ip->i_flags))
-               goto out;
+               return;
 
        for (x = 0; x < ip->i_qadata->qa_qd_num; x++) {
                struct gfs2_quota_data *qd;
@@ -1162,7 +1161,6 @@ void gfs2_quota_unlock(struct gfs2_inode *ip)
                        qd_unlock(qda[x]);
        }
 
-out:
        gfs2_quota_unhold(ip);
 }
 
@@ -1210,9 +1208,6 @@ int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid,
        if (!test_bit(GIF_QD_LOCKED, &ip->i_flags))
                return 0;
 
-        if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON)
-                return 0;
-
        for (x = 0; x < ip->i_qadata->qa_qd_num; x++) {
                qd = ip->i_qadata->qa_qd[x];
 
@@ -1270,7 +1265,9 @@ void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
        if (ip->i_diskflags & GFS2_DIF_SYSTEM)
                return;
 
-       BUG_ON(ip->i_qadata->qa_ref <= 0);
+       if (gfs2_assert_withdraw(sdp, ip->i_qadata &&
+                                ip->i_qadata->qa_ref > 0))
+               return;
        for (x = 0; x < ip->i_qadata->qa_qd_num; x++) {
                qd = ip->i_qadata->qa_qd[x];
 
index 7f9ca8e..21ada33 100644 (file)
@@ -44,7 +44,8 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip,
        int ret;
 
        ap->allowed = UINT_MAX; /* Assume we are permitted a whole lot */
-       if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF)
+       if (capable(CAP_SYS_RESOURCE) ||
+           sdp->sd_args.ar_quota == GFS2_QUOTA_OFF)
                return 0;
        ret = gfs2_quota_lock(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE);
        if (ret)
index 37fc416..956fced 100644 (file)
@@ -1404,7 +1404,6 @@ out:
        if (ip->i_qadata)
                gfs2_assert_warn(sdp, ip->i_qadata->qa_ref == 0);
        gfs2_rs_delete(ip, NULL);
-       gfs2_qa_put(ip);
        gfs2_ordered_del_inode(ip);
        clear_inode(inode);
        gfs2_dir_hash_inval(ip);
index 9b64d40..aa087a5 100644 (file)
@@ -119,6 +119,12 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp)
        if (!sb_rdonly(sdp->sd_vfs))
                ret = gfs2_make_fs_ro(sdp);
 
+       if (sdp->sd_lockstruct.ls_ops->lm_lock == NULL) { /* lock_nolock */
+               if (!ret)
+                       ret = -EIO;
+               clear_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags);
+               goto skip_recovery;
+       }
        /*
         * Drop the glock for our journal so another node can recover it.
         */
@@ -159,10 +165,6 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp)
                wait_on_bit(&gl->gl_flags, GLF_FREEING, TASK_UNINTERRUPTIBLE);
        }
 
-       if (sdp->sd_lockstruct.ls_ops->lm_lock == NULL) { /* lock_nolock */
-               clear_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags);
-               goto skip_recovery;
-       }
        /*
         * Dequeue the "live" glock, but keep a reference so it's never freed.
         */
index 0b91b06..979d9f9 100644 (file)
@@ -680,8 +680,6 @@ struct io_op_def {
        unsigned                needs_mm : 1;
        /* needs req->file assigned */
        unsigned                needs_file : 1;
-       /* needs req->file assigned IFF fd is >= 0 */
-       unsigned                fd_non_neg : 1;
        /* hash wq insertion if file is a regular file */
        unsigned                hash_reg_file : 1;
        /* unbound wq insertion if file is a non-regular file */
@@ -784,8 +782,6 @@ static const struct io_op_def io_op_defs[] = {
                .needs_file             = 1,
        },
        [IORING_OP_OPENAT] = {
-               .needs_file             = 1,
-               .fd_non_neg             = 1,
                .file_table             = 1,
                .needs_fs               = 1,
        },
@@ -799,8 +795,6 @@ static const struct io_op_def io_op_defs[] = {
        },
        [IORING_OP_STATX] = {
                .needs_mm               = 1,
-               .needs_file             = 1,
-               .fd_non_neg             = 1,
                .needs_fs               = 1,
                .file_table             = 1,
        },
@@ -837,8 +831,6 @@ static const struct io_op_def io_op_defs[] = {
                .buffer_select          = 1,
        },
        [IORING_OP_OPENAT2] = {
-               .needs_file             = 1,
-               .fd_non_neg             = 1,
                .file_table             = 1,
                .needs_fs               = 1,
        },
@@ -5368,15 +5360,6 @@ static void io_wq_submit_work(struct io_wq_work **workptr)
        io_steal_work(req, workptr);
 }
 
-static int io_req_needs_file(struct io_kiocb *req, int fd)
-{
-       if (!io_op_defs[req->opcode].needs_file)
-               return 0;
-       if ((fd == -1 || fd == AT_FDCWD) && io_op_defs[req->opcode].fd_non_neg)
-               return 0;
-       return 1;
-}
-
 static inline struct file *io_file_from_index(struct io_ring_ctx *ctx,
                                              int index)
 {
@@ -5414,14 +5397,11 @@ static int io_file_get(struct io_submit_state *state, struct io_kiocb *req,
 }
 
 static int io_req_set_file(struct io_submit_state *state, struct io_kiocb *req,
-                          int fd, unsigned int flags)
+                          int fd)
 {
        bool fixed;
 
-       if (!io_req_needs_file(req, fd))
-               return 0;
-
-       fixed = (flags & IOSQE_FIXED_FILE);
+       fixed = (req->flags & REQ_F_FIXED_FILE) != 0;
        if (unlikely(!fixed && req->needs_fixed_file))
                return -EBADF;
 
@@ -5798,7 +5778,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
                       struct io_submit_state *state, bool async)
 {
        unsigned int sqe_flags;
-       int id, fd;
+       int id;
 
        /*
         * All io need record the previous position, if LINK vs DARIN,
@@ -5850,8 +5830,10 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
                                        IOSQE_ASYNC | IOSQE_FIXED_FILE |
                                        IOSQE_BUFFER_SELECT | IOSQE_IO_LINK);
 
-       fd = READ_ONCE(sqe->fd);
-       return io_req_set_file(state, req, fd, sqe_flags);
+       if (!io_op_defs[req->opcode].needs_file)
+               return 0;
+
+       return io_req_set_file(state, req, READ_ONCE(sqe->fd));
 }
 
 static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
@@ -7360,11 +7342,9 @@ static int io_uring_release(struct inode *inode, struct file *file)
 static void io_uring_cancel_files(struct io_ring_ctx *ctx,
                                  struct files_struct *files)
 {
-       struct io_kiocb *req;
-       DEFINE_WAIT(wait);
-
        while (!list_empty_careful(&ctx->inflight_list)) {
-               struct io_kiocb *cancel_req = NULL;
+               struct io_kiocb *cancel_req = NULL, *req;
+               DEFINE_WAIT(wait);
 
                spin_lock_irq(&ctx->inflight_lock);
                list_for_each_entry(req, &ctx->inflight_list, inflight_entry) {
@@ -7404,6 +7384,7 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx,
                         */
                        if (refcount_sub_and_test(2, &cancel_req->refs)) {
                                io_put_req(cancel_req);
+                               finish_wait(&ctx->inflight_wait, &wait);
                                continue;
                        }
                }
@@ -7411,8 +7392,8 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx,
                io_wq_cancel_work(ctx->io_wq, &cancel_req->work);
                io_put_req(cancel_req);
                schedule();
+               finish_wait(&ctx->inflight_wait, &wait);
        }
-       finish_wait(&ctx->inflight_wait, &wait);
 }
 
 static int io_uring_flush(struct file *file, void *data)
@@ -7761,7 +7742,8 @@ err:
        return ret;
 }
 
-static int io_uring_create(unsigned entries, struct io_uring_params *p)
+static int io_uring_create(unsigned entries, struct io_uring_params *p,
+                          struct io_uring_params __user *params)
 {
        struct user_struct *user = NULL;
        struct io_ring_ctx *ctx;
@@ -7853,6 +7835,14 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p)
        p->cq_off.overflow = offsetof(struct io_rings, cq_overflow);
        p->cq_off.cqes = offsetof(struct io_rings, cqes);
 
+       p->features = IORING_FEAT_SINGLE_MMAP | IORING_FEAT_NODROP |
+                       IORING_FEAT_SUBMIT_STABLE | IORING_FEAT_RW_CUR_POS |
+                       IORING_FEAT_CUR_PERSONALITY | IORING_FEAT_FAST_POLL;
+
+       if (copy_to_user(params, p, sizeof(*p))) {
+               ret = -EFAULT;
+               goto err;
+       }
        /*
         * Install ring fd as the very last thing, so we don't risk someone
         * having closed it before we finish setup
@@ -7861,9 +7851,6 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p)
        if (ret < 0)
                goto err;
 
-       p->features = IORING_FEAT_SINGLE_MMAP | IORING_FEAT_NODROP |
-                       IORING_FEAT_SUBMIT_STABLE | IORING_FEAT_RW_CUR_POS |
-                       IORING_FEAT_CUR_PERSONALITY | IORING_FEAT_FAST_POLL;
        trace_io_uring_create(ret, ctx, p->sq_entries, p->cq_entries, p->flags);
        return ret;
 err:
@@ -7879,7 +7866,6 @@ err:
 static long io_uring_setup(u32 entries, struct io_uring_params __user *params)
 {
        struct io_uring_params p;
-       long ret;
        int i;
 
        if (copy_from_user(&p, params, sizeof(p)))
@@ -7894,14 +7880,7 @@ static long io_uring_setup(u32 entries, struct io_uring_params __user *params)
                        IORING_SETUP_CLAMP | IORING_SETUP_ATTACH_WQ))
                return -EINVAL;
 
-       ret = io_uring_create(entries, &p);
-       if (ret < 0)
-               return ret;
-
-       if (copy_to_user(params, &p, sizeof(p)))
-               return -EFAULT;
-
-       return ret;
+       return  io_uring_create(entries, &p, params);
 }
 
 SYSCALL_DEFINE2(io_uring_setup, u32, entries,
index 4735def..fd0a1e7 100644 (file)
@@ -1118,6 +1118,10 @@ long do_splice(struct file *in, loff_t __user *off_in,
        loff_t offset;
        long ret;
 
+       if (unlikely(!(in->f_mode & FMODE_READ) ||
+                    !(out->f_mode & FMODE_WRITE)))
+               return -EBADF;
+
        ipipe = get_pipe_info(in);
        opipe = get_pipe_info(out);
 
@@ -1125,12 +1129,6 @@ long do_splice(struct file *in, loff_t __user *off_in,
                if (off_in || off_out)
                        return -ESPIPE;
 
-               if (!(in->f_mode & FMODE_READ))
-                       return -EBADF;
-
-               if (!(out->f_mode & FMODE_WRITE))
-                       return -EBADF;
-
                /* Splicing to self would be fun, but... */
                if (ipipe == opipe)
                        return -EINVAL;
@@ -1153,9 +1151,6 @@ long do_splice(struct file *in, loff_t __user *off_in,
                        offset = out->f_pos;
                }
 
-               if (unlikely(!(out->f_mode & FMODE_WRITE)))
-                       return -EBADF;
-
                if (unlikely(out->f_flags & O_APPEND))
                        return -EINVAL;
 
@@ -1440,15 +1435,11 @@ SYSCALL_DEFINE6(splice, int, fd_in, loff_t __user *, off_in,
        error = -EBADF;
        in = fdget(fd_in);
        if (in.file) {
-               if (in.file->f_mode & FMODE_READ) {
-                       out = fdget(fd_out);
-                       if (out.file) {
-                               if (out.file->f_mode & FMODE_WRITE)
-                                       error = do_splice(in.file, off_in,
-                                                         out.file, off_out,
-                                                         len, flags);
-                               fdput(out);
-                       }
+               out = fdget(fd_out);
+               if (out.file) {
+                       error = do_splice(in.file, off_in, out.file, off_out,
+                                         len, flags);
+                       fdput(out);
                }
                fdput(in);
        }
@@ -1770,6 +1761,10 @@ static long do_tee(struct file *in, struct file *out, size_t len,
        struct pipe_inode_info *opipe = get_pipe_info(out);
        int ret = -EINVAL;
 
+       if (unlikely(!(in->f_mode & FMODE_READ) ||
+                    !(out->f_mode & FMODE_WRITE)))
+               return -EBADF;
+
        /*
         * Duplicate the contents of ipipe to opipe without actually
         * copying the data.
@@ -1795,7 +1790,7 @@ static long do_tee(struct file *in, struct file *out, size_t len,
 
 SYSCALL_DEFINE4(tee, int, fdin, int, fdout, size_t, len, unsigned int, flags)
 {
-       struct fd in;
+       struct fd in, out;
        int error;
 
        if (unlikely(flags & ~SPLICE_F_ALL))
@@ -1807,14 +1802,10 @@ SYSCALL_DEFINE4(tee, int, fdin, int, fdout, size_t, len, unsigned int, flags)
        error = -EBADF;
        in = fdget(fdin);
        if (in.file) {
-               if (in.file->f_mode & FMODE_READ) {
-                       struct fd out = fdget(fdout);
-                       if (out.file) {
-                               if (out.file->f_mode & FMODE_WRITE)
-                                       error = do_tee(in.file, out.file,
-                                                       len, flags);
-                               fdput(out);
-                       }
+               out = fdget(fdout);
+               if (out.file) {
+                       error = do_tee(in.file, out.file, len, flags);
+                       fdput(out);
                }
                fdput(in);
        }
index 675e269..8fe03b4 100644 (file)
@@ -164,7 +164,7 @@ static int vboxsf_fill_super(struct super_block *sb, struct fs_context *fc)
                goto fail_free;
        }
 
-       err = super_setup_bdi_name(sb, "vboxsf-%s.%d", fc->source, sbi->bdi_id);
+       err = super_setup_bdi_name(sb, "vboxsf-%d", sbi->bdi_id);
        if (err)
                goto fail_free;
 
index 99134d4..320f811 100644 (file)
@@ -48,7 +48,7 @@ struct videomode;
  * @MODE_HSYNC: hsync out of range
  * @MODE_VSYNC: vsync out of range
  * @MODE_H_ILLEGAL: mode has illegal horizontal timings
- * @MODE_V_ILLEGAL: mode has illegal horizontal timings
+ * @MODE_V_ILLEGAL: mode has illegal vertical timings
  * @MODE_BAD_WIDTH: requires an unsupported linepitch
  * @MODE_NOMODE: no mode with a matching name
  * @MODE_NO_INTERLACE: interlaced mode not supported
index 26f0ecf..0bbfd64 100644 (file)
@@ -65,6 +65,7 @@ struct amba_device {
        struct device           dev;
        struct resource         res;
        struct clk              *pclk;
+       struct device_dma_parameters dma_parms;
        unsigned int            periphid;
        unsigned int            cid;
        struct amba_cs_uci_id   uci;
index ee577a8..7367150 100644 (file)
@@ -219,6 +219,7 @@ struct backing_dev_info {
        wait_queue_head_t wb_waitq;
 
        struct device *dev;
+       char dev_name[64];
        struct device *owner;
 
        struct timer_list laptop_mode_wb_timer;
index f88197c..c9ad5c3 100644 (file)
@@ -505,13 +505,6 @@ static inline int bdi_rw_congested(struct backing_dev_info *bdi)
                                  (1 << WB_async_congested));
 }
 
-extern const char *bdi_unknown_name;
-
-static inline const char *bdi_dev_name(struct backing_dev_info *bdi)
-{
-       if (!bdi || !bdi->dev)
-               return bdi_unknown_name;
-       return dev_name(bdi->dev);
-}
+const char *bdi_dev_name(struct backing_dev_info *bdi);
 
 #endif /* _LINUX_BACKING_DEV_H */
index 9cd4455..1bdd027 100644 (file)
@@ -55,7 +55,7 @@ LSM_HOOK(void, LSM_RET_VOID, bprm_committing_creds, struct linux_binprm *bprm)
 LSM_HOOK(void, LSM_RET_VOID, bprm_committed_creds, struct linux_binprm *bprm)
 LSM_HOOK(int, 0, fs_context_dup, struct fs_context *fc,
         struct fs_context *src_sc)
-LSM_HOOK(int, 0, fs_context_parse_param, struct fs_context *fc,
+LSM_HOOK(int, -ENOPARAM, fs_context_parse_param, struct fs_context *fc,
         struct fs_parameter *param)
 LSM_HOOK(int, 0, sb_alloc_security, struct super_block *sb)
 LSM_HOOK(void, LSM_RET_VOID, sb_free_security, struct super_block *sb)
index ad19960..3d7c3c2 100644 (file)
@@ -53,9 +53,9 @@ enum mhi_callback {
  * @MHI_CHAIN: Linked transfer
  */
 enum mhi_flags {
-       MHI_EOB,
-       MHI_EOT,
-       MHI_CHAIN,
+       MHI_EOB = BIT(0),
+       MHI_EOT = BIT(1),
+       MHI_CHAIN = BIT(2),
 };
 
 /**
@@ -335,14 +335,15 @@ struct mhi_controller_config {
  * @syserr_worker: System error worker
  * @state_event: State change event
  * @status_cb: CB function to notify power states of the device (required)
- * @link_status: CB function to query link status of the device (required)
  * @wake_get: CB function to assert device wake (optional)
  * @wake_put: CB function to de-assert device wake (optional)
  * @wake_toggle: CB function to assert and de-assert device wake (optional)
  * @runtime_get: CB function to controller runtime resume (required)
- * @runtimet_put: CB function to decrement pm usage (required)
+ * @runtime_put: CB function to decrement pm usage (required)
  * @map_single: CB function to create TRE buffer
  * @unmap_single: CB function to destroy TRE buffer
+ * @read_reg: Read a MHI register via the physical link (required)
+ * @write_reg: Write a MHI register via the physical link (required)
  * @buffer_len: Bounce buffer length
  * @bounce_buf: Use of bounce buffer
  * @fbc_download: MHI host needs to do complete image transfer (optional)
@@ -417,7 +418,6 @@ struct mhi_controller {
 
        void (*status_cb)(struct mhi_controller *mhi_cntrl,
                          enum mhi_callback cb);
-       int (*link_status)(struct mhi_controller *mhi_cntrl);
        void (*wake_get)(struct mhi_controller *mhi_cntrl, bool override);
        void (*wake_put)(struct mhi_controller *mhi_cntrl, bool override);
        void (*wake_toggle)(struct mhi_controller *mhi_cntrl);
@@ -427,6 +427,10 @@ struct mhi_controller {
                          struct mhi_buf_info *buf);
        void (*unmap_single)(struct mhi_controller *mhi_cntrl,
                             struct mhi_buf_info *buf);
+       int (*read_reg)(struct mhi_controller *mhi_cntrl, void __iomem *addr,
+                       u32 *out);
+       void (*write_reg)(struct mhi_controller *mhi_cntrl, void __iomem *addr,
+                         u32 val);
 
        size_t buffer_len;
        bool bounce_buf;
index c588be8..0ecce6a 100644 (file)
@@ -185,6 +185,7 @@ int cros_ec_sensorhub_register_push_data(struct cros_ec_sensorhub *sensorhub,
 void cros_ec_sensorhub_unregister_push_data(struct cros_ec_sensorhub *sensorhub,
                                            u8 sensor_num);
 
+int cros_ec_sensorhub_ring_allocate(struct cros_ec_sensorhub *sensorhub);
 int cros_ec_sensorhub_ring_add(struct cros_ec_sensorhub *sensorhub);
 void cros_ec_sensorhub_ring_remove(void *arg);
 int cros_ec_sensorhub_ring_fifo_enable(struct cros_ec_sensorhub *sensorhub,
index bdc3575..77a2aad 100644 (file)
@@ -25,6 +25,7 @@ struct platform_device {
        bool            id_auto;
        struct device   dev;
        u64             platform_dma_mask;
+       struct device_dma_parameters dma_parms;
        u32             num_resources;
        struct resource *resource;
 
index 48c1b16..bc07e51 100644 (file)
@@ -21,6 +21,7 @@
 struct gss_ctx {
        struct gss_api_mech     *mech_type;
        void                    *internal_ctx_id;
+       unsigned int            slack, align;
 };
 
 #define GSS_C_NO_BUFFER                ((struct xdr_netobj) 0)
@@ -66,6 +67,7 @@ u32 gss_wrap(
 u32 gss_unwrap(
                struct gss_ctx          *ctx_id,
                int                     offset,
+               int                     len,
                struct xdr_buf          *inbuf);
 u32 gss_delete_sec_context(
                struct gss_ctx          **ctx_id);
@@ -126,6 +128,7 @@ struct gss_api_ops {
        u32 (*gss_unwrap)(
                        struct gss_ctx          *ctx_id,
                        int                     offset,
+                       int                     len,
                        struct xdr_buf          *buf);
        void (*gss_delete_sec_context)(
                        void                    *internal_ctx_id);
index c1d77dd..e8f8ffe 100644 (file)
@@ -83,7 +83,7 @@ struct gss_krb5_enctype {
        u32 (*encrypt_v2) (struct krb5_ctx *kctx, u32 offset,
                           struct xdr_buf *buf,
                           struct page **pages); /* v2 encryption function */
-       u32 (*decrypt_v2) (struct krb5_ctx *kctx, u32 offset,
+       u32 (*decrypt_v2) (struct krb5_ctx *kctx, u32 offset, u32 len,
                           struct xdr_buf *buf, u32 *headskip,
                           u32 *tailskip);      /* v2 decryption function */
 };
@@ -255,7 +255,7 @@ gss_wrap_kerberos(struct gss_ctx *ctx_id, int offset,
                struct xdr_buf *outbuf, struct page **pages);
 
 u32
-gss_unwrap_kerberos(struct gss_ctx *ctx_id, int offset,
+gss_unwrap_kerberos(struct gss_ctx *ctx_id, int offset, int len,
                struct xdr_buf *buf);
 
 
@@ -312,7 +312,7 @@ gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset,
                     struct page **pages);
 
 u32
-gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset,
+gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, u32 len,
                     struct xdr_buf *buf, u32 *plainoffset,
                     u32 *plainlen);
 
index 01bb419..22c207b 100644 (file)
@@ -184,6 +184,7 @@ xdr_adjust_iovec(struct kvec *iov, __be32 *p)
 extern void xdr_shift_buf(struct xdr_buf *, size_t);
 extern void xdr_buf_from_iov(struct kvec *, struct xdr_buf *);
 extern int xdr_buf_subsegment(struct xdr_buf *, struct xdr_buf *, unsigned int, unsigned int);
+extern void xdr_buf_trim(struct xdr_buf *, unsigned int);
 extern int read_bytes_from_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int);
 extern int write_bytes_to_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int);
 
index 421c99c..4f8159e 100644 (file)
@@ -78,47 +78,6 @@ struct tcp_sack_block {
 #define TCP_SACK_SEEN     (1 << 0)   /*1 = peer is SACK capable, */
 #define TCP_DSACK_SEEN    (1 << 2)   /*1 = DSACK was received from peer*/
 
-#if IS_ENABLED(CONFIG_MPTCP)
-struct mptcp_options_received {
-       u64     sndr_key;
-       u64     rcvr_key;
-       u64     data_ack;
-       u64     data_seq;
-       u32     subflow_seq;
-       u16     data_len;
-       u16     mp_capable : 1,
-               mp_join : 1,
-               dss : 1,
-               add_addr : 1,
-               rm_addr : 1,
-               family : 4,
-               echo : 1,
-               backup : 1;
-       u32     token;
-       u32     nonce;
-       u64     thmac;
-       u8      hmac[20];
-       u8      join_id;
-       u8      use_map:1,
-               dsn64:1,
-               data_fin:1,
-               use_ack:1,
-               ack64:1,
-               mpc_map:1,
-               __unused:2;
-       u8      addr_id;
-       u8      rm_id;
-       union {
-               struct in_addr  addr;
-#if IS_ENABLED(CONFIG_MPTCP_IPV6)
-               struct in6_addr addr6;
-#endif
-       };
-       u64     ahmac;
-       u16     port;
-};
-#endif
-
 struct tcp_options_received {
 /*     PAWS/RTTM data  */
        int     ts_recent_stamp;/* Time we stored ts_recent (for aging) */
@@ -136,9 +95,6 @@ struct tcp_options_received {
        u8      num_sacks;      /* Number of SACK blocks                */
        u16     user_mss;       /* mss requested by user in ioctl       */
        u16     mss_clamp;      /* Maximal mss, negotiated at connection setup */
-#if IS_ENABLED(CONFIG_MPTCP)
-       struct mptcp_options_received   mptcp;
-#endif
 };
 
 static inline void tcp_clear_options(struct tcp_options_received *rx_opt)
@@ -148,13 +104,6 @@ static inline void tcp_clear_options(struct tcp_options_received *rx_opt)
 #if IS_ENABLED(CONFIG_SMC)
        rx_opt->smc_ok = 0;
 #endif
-#if IS_ENABLED(CONFIG_MPTCP)
-       rx_opt->mptcp.mp_capable = 0;
-       rx_opt->mptcp.mp_join = 0;
-       rx_opt->mptcp.add_addr = 0;
-       rx_opt->mptcp.rm_addr = 0;
-       rx_opt->mptcp.dss = 0;
-#endif
 }
 
 /* This is the max number of SACKS that we'll generate and process. It's safe
index 0d1fe92..6f6ade6 100644 (file)
@@ -3,6 +3,8 @@
 #define _LINUX_VIRTIO_NET_H
 
 #include <linux/if_vlan.h>
+#include <uapi/linux/tcp.h>
+#include <uapi/linux/udp.h>
 #include <uapi/linux/virtio_net.h>
 
 static inline int virtio_net_hdr_set_proto(struct sk_buff *skb,
@@ -28,17 +30,25 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
                                        bool little_endian)
 {
        unsigned int gso_type = 0;
+       unsigned int thlen = 0;
+       unsigned int ip_proto;
 
        if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
                switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
                case VIRTIO_NET_HDR_GSO_TCPV4:
                        gso_type = SKB_GSO_TCPV4;
+                       ip_proto = IPPROTO_TCP;
+                       thlen = sizeof(struct tcphdr);
                        break;
                case VIRTIO_NET_HDR_GSO_TCPV6:
                        gso_type = SKB_GSO_TCPV6;
+                       ip_proto = IPPROTO_TCP;
+                       thlen = sizeof(struct tcphdr);
                        break;
                case VIRTIO_NET_HDR_GSO_UDP:
                        gso_type = SKB_GSO_UDP;
+                       ip_proto = IPPROTO_UDP;
+                       thlen = sizeof(struct udphdr);
                        break;
                default:
                        return -EINVAL;
@@ -57,16 +67,22 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
 
                if (!skb_partial_csum_set(skb, start, off))
                        return -EINVAL;
+
+               if (skb_transport_offset(skb) + thlen > skb_headlen(skb))
+                       return -EINVAL;
        } else {
                /* gso packets without NEEDS_CSUM do not set transport_offset.
                 * probe and drop if does not match one of the above types.
                 */
                if (gso_type && skb->network_header) {
+                       struct flow_keys_basic keys;
+
                        if (!skb->protocol)
                                virtio_net_hdr_set_proto(skb, hdr);
 retry:
-                       skb_probe_transport_header(skb);
-                       if (!skb_transport_header_was_set(skb)) {
+                       if (!skb_flow_dissect_flow_keys_basic(NULL, skb, &keys,
+                                                             NULL, 0, 0, 0,
+                                                             0)) {
                                /* UFO does not specify ipv4 or 6: try both */
                                if (gso_type & SKB_GSO_UDP &&
                                    skb->protocol == htons(ETH_P_IP)) {
@@ -75,6 +91,12 @@ retry:
                                }
                                return -EINVAL;
                        }
+
+                       if (keys.control.thoff + thlen > skb_headlen(skb) ||
+                           keys.basic.ip_proto != ip_proto)
+                               return -EINVAL;
+
+                       skb_set_transport_header(skb, keys.control.thoff);
                }
        }
 
index 71c81e0..dc636b7 100644 (file)
@@ -48,6 +48,7 @@ struct virtio_vsock_pkt {
        u32 len;
        u32 off;
        bool reply;
+       bool tap_delivered;
 };
 
 struct virtio_vsock_pkt_info {
index 3619c6a..efc8350 100644 (file)
@@ -166,15 +166,18 @@ enum flow_action_mangle_base {
 enum flow_action_hw_stats_bit {
        FLOW_ACTION_HW_STATS_IMMEDIATE_BIT,
        FLOW_ACTION_HW_STATS_DELAYED_BIT,
+       FLOW_ACTION_HW_STATS_DISABLED_BIT,
 };
 
 enum flow_action_hw_stats {
-       FLOW_ACTION_HW_STATS_DISABLED = 0,
+       FLOW_ACTION_HW_STATS_DONT_CARE = 0,
        FLOW_ACTION_HW_STATS_IMMEDIATE =
                BIT(FLOW_ACTION_HW_STATS_IMMEDIATE_BIT),
        FLOW_ACTION_HW_STATS_DELAYED = BIT(FLOW_ACTION_HW_STATS_DELAYED_BIT),
        FLOW_ACTION_HW_STATS_ANY = FLOW_ACTION_HW_STATS_IMMEDIATE |
                                   FLOW_ACTION_HW_STATS_DELAYED,
+       FLOW_ACTION_HW_STATS_DISABLED =
+               BIT(FLOW_ACTION_HW_STATS_DISABLED_BIT),
 };
 
 typedef void (*action_destr)(void *priv);
@@ -325,7 +328,11 @@ __flow_action_hw_stats_check(const struct flow_action *action,
                return true;
        if (!flow_action_mixed_hw_stats_check(action, extack))
                return false;
+
        action_entry = flow_action_first_entry_get(action);
+       if (action_entry->hw_stats == FLOW_ACTION_HW_STATS_DONT_CARE)
+               return true;
+
        if (!check_allow_bit &&
            action_entry->hw_stats != FLOW_ACTION_HW_STATS_ANY) {
                NL_SET_ERR_MSG_MOD(extack, "Driver supports only default HW stats type \"any\"");
index c8e2beb..0f0d1ef 100644 (file)
@@ -99,6 +99,20 @@ static inline int IP_ECN_set_ce(struct iphdr *iph)
        return 1;
 }
 
+static inline int IP_ECN_set_ect1(struct iphdr *iph)
+{
+       u32 check = (__force u32)iph->check;
+
+       if ((iph->tos & INET_ECN_MASK) != INET_ECN_ECT_0)
+               return 0;
+
+       check += (__force u16)htons(0x100);
+
+       iph->check = (__force __sum16)(check + (check>=0xFFFF));
+       iph->tos ^= INET_ECN_MASK;
+       return 1;
+}
+
 static inline void IP_ECN_clear(struct iphdr *iph)
 {
        iph->tos &= ~INET_ECN_MASK;
@@ -134,6 +148,22 @@ static inline int IP6_ECN_set_ce(struct sk_buff *skb, struct ipv6hdr *iph)
        return 1;
 }
 
+static inline int IP6_ECN_set_ect1(struct sk_buff *skb, struct ipv6hdr *iph)
+{
+       __be32 from, to;
+
+       if ((ipv6_get_dsfield(iph) & INET_ECN_MASK) != INET_ECN_ECT_0)
+               return 0;
+
+       from = *(__be32 *)iph;
+       to = from ^ htonl(INET_ECN_MASK << 20);
+       *(__be32 *)iph = to;
+       if (skb->ip_summed == CHECKSUM_COMPLETE)
+               skb->csum = csum_add(csum_sub(skb->csum, (__force __wsum)from),
+                                    (__force __wsum)to);
+       return 1;
+}
+
 static inline void ipv6_copy_dscp(unsigned int dscp, struct ipv6hdr *inner)
 {
        dscp &= ~INET_ECN_MASK;
@@ -159,6 +189,25 @@ static inline int INET_ECN_set_ce(struct sk_buff *skb)
        return 0;
 }
 
+static inline int INET_ECN_set_ect1(struct sk_buff *skb)
+{
+       switch (skb->protocol) {
+       case cpu_to_be16(ETH_P_IP):
+               if (skb_network_header(skb) + sizeof(struct iphdr) <=
+                   skb_tail_pointer(skb))
+                       return IP_ECN_set_ect1(ip_hdr(skb));
+               break;
+
+       case cpu_to_be16(ETH_P_IPV6):
+               if (skb_network_header(skb) + sizeof(struct ipv6hdr) <=
+                   skb_tail_pointer(skb))
+                       return IP6_ECN_set_ect1(skb, ipv6_hdr(skb));
+               break;
+       }
+
+       return 0;
+}
+
 /*
  * RFC 6040 4.2
  *  To decapsulate the inner header at the tunnel egress, a compliant
@@ -208,8 +257,12 @@ static inline int INET_ECN_decapsulate(struct sk_buff *skb,
        int rc;
 
        rc = __INET_ECN_decapsulate(outer, inner, &set_ce);
-       if (!rc && set_ce)
-               INET_ECN_set_ce(skb);
+       if (!rc) {
+               if (set_ce)
+                       INET_ECN_set_ce(skb);
+               else if ((outer & INET_ECN_MASK) == INET_ECN_ECT_1)
+                       INET_ECN_set_ect1(skb);
+       }
 
        return rc;
 }
index 80262d2..1d98828 100644 (file)
@@ -203,6 +203,7 @@ struct fib6_info {
 struct rt6_info {
        struct dst_entry                dst;
        struct fib6_info __rcu          *from;
+       int                             sernum;
 
        struct rt6key                   rt6i_dst;
        struct rt6key                   rt6i_src;
@@ -291,6 +292,9 @@ static inline u32 rt6_get_cookie(const struct rt6_info *rt)
        struct fib6_info *from;
        u32 cookie = 0;
 
+       if (rt->sernum)
+               return rt->sernum;
+
        rcu_read_lock();
 
        from = rcu_dereference(rt->from);
index 0e7c547..3bce201 100644 (file)
@@ -68,11 +68,8 @@ static inline bool rsk_is_mptcp(const struct request_sock *req)
        return tcp_rsk(req)->is_mptcp;
 }
 
-void mptcp_parse_option(const struct sk_buff *skb, const unsigned char *ptr,
-                       int opsize, struct tcp_options_received *opt_rx);
 bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb,
                       unsigned int *size, struct mptcp_out_options *opts);
-void mptcp_rcv_synsent(struct sock *sk);
 bool mptcp_synack_options(const struct request_sock *req, unsigned int *size,
                          struct mptcp_out_options *opts);
 bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
index ab96fb5..8e001e0 100644 (file)
@@ -437,6 +437,13 @@ static inline int rt_genid_ipv4(const struct net *net)
        return atomic_read(&net->ipv4.rt_genid);
 }
 
+#if IS_ENABLED(CONFIG_IPV6)
+static inline int rt_genid_ipv6(const struct net *net)
+{
+       return atomic_read(&net->ipv6.fib6_sernum);
+}
+#endif
+
 static inline void rt_genid_bump_ipv4(struct net *net)
 {
        atomic_inc(&net->ipv4.rt_genid);
index 25d2ec4..8428aa6 100644 (file)
@@ -407,6 +407,7 @@ struct tcf_block {
        struct mutex lock;
        struct list_head chain_list;
        u32 index; /* block index for shared blocks */
+       u32 classid; /* which class this block belongs to */
        refcount_t refcnt;
        struct net *net;
        struct Qdisc *q;
index 6d6a394..efc8b61 100644 (file)
@@ -502,6 +502,7 @@ struct ocelot {
        unsigned int                    num_stats;
 
        int                             shared_queue_sz;
+       int                             num_mact_rows;
 
        struct net_device               *hw_bridge_dev;
        u16                             bridge_mask;
index 1897822..26d871f 100644 (file)
@@ -24,7 +24,7 @@
  *
  * @pid: Put 0 for global total, while positive pid for process total.
  *
- * @size: Virtual size of the allocation in bytes.
+ * @size: Size of the allocation in bytes.
  *
  */
 TRACE_EVENT(gpu_mem_total,
index 7848141..9c66e59 100644 (file)
@@ -33,7 +33,7 @@ TRACE_EVENT(wbt_stat,
        ),
 
        TP_fast_assign(
-               strlcpy(__entry->name, dev_name(bdi->dev),
+               strlcpy(__entry->name, bdi_dev_name(bdi),
                        ARRAY_SIZE(__entry->name));
                __entry->rmean          = stat[0].mean;
                __entry->rmin           = stat[0].min;
@@ -68,7 +68,7 @@ TRACE_EVENT(wbt_lat,
        ),
 
        TP_fast_assign(
-               strlcpy(__entry->name, dev_name(bdi->dev),
+               strlcpy(__entry->name, bdi_dev_name(bdi),
                        ARRAY_SIZE(__entry->name));
                __entry->lat = div_u64(lat, 1000);
        ),
@@ -105,7 +105,7 @@ TRACE_EVENT(wbt_step,
        ),
 
        TP_fast_assign(
-               strlcpy(__entry->name, dev_name(bdi->dev),
+               strlcpy(__entry->name, bdi_dev_name(bdi),
                        ARRAY_SIZE(__entry->name));
                __entry->msg    = msg;
                __entry->step   = step;
@@ -141,7 +141,7 @@ TRACE_EVENT(wbt_timer,
        ),
 
        TP_fast_assign(
-               strlcpy(__entry->name, dev_name(bdi->dev),
+               strlcpy(__entry->name, bdi_dev_name(bdi),
                        ARRAY_SIZE(__entry->name));
                __entry->status         = status;
                __entry->step           = step;
index 9e22ee8..9278a60 100644 (file)
@@ -39,22 +39,6 @@ config TOOLS_SUPPORT_RELR
 config CC_HAS_ASM_INLINE
        def_bool $(success,echo 'void foo(void) { asm inline (""); }' | $(CC) -x c - -c -o /dev/null)
 
-config CC_HAS_WARN_MAYBE_UNINITIALIZED
-       def_bool $(cc-option,-Wmaybe-uninitialized)
-       help
-         GCC >= 4.7 supports this option.
-
-config CC_DISABLE_WARN_MAYBE_UNINITIALIZED
-       bool
-       depends on CC_HAS_WARN_MAYBE_UNINITIALIZED
-       default CC_IS_GCC && GCC_VERSION < 40900  # unreliable for GCC < 4.9
-       help
-         GCC's -Wmaybe-uninitialized is not reliable by definition.
-         Lots of false positive warnings are produced in some cases.
-
-         If this option is enabled, -Wno-maybe-uninitialzed is passed
-         to the compiler to suppress maybe-uninitialized warnings.
-
 config CONSTRUCTORS
        bool
        depends on !UML
@@ -1257,14 +1241,12 @@ config CC_OPTIMIZE_FOR_PERFORMANCE
 config CC_OPTIMIZE_FOR_PERFORMANCE_O3
        bool "Optimize more for performance (-O3)"
        depends on ARC
-       imply CC_DISABLE_WARN_MAYBE_UNINITIALIZED  # avoid false positives
        help
          Choosing this option will pass "-O3" to your compiler to optimize
          the kernel yet more for performance.
 
 config CC_OPTIMIZE_FOR_SIZE
        bool "Optimize for size (-Os)"
-       imply CC_DISABLE_WARN_MAYBE_UNINITIALIZED  # avoid false positives
        help
          Choosing this option will pass "-Os" to your compiler resulting
          in a smaller kernel.
index 8ec1be4..7a38012 100644 (file)
@@ -542,7 +542,7 @@ void __weak free_initrd_mem(unsigned long start, unsigned long end)
 }
 
 #ifdef CONFIG_KEXEC_CORE
-static bool kexec_free_initrd(void)
+static bool __init kexec_free_initrd(void)
 {
        unsigned long crashk_start = (unsigned long)__va(crashk_res.start);
        unsigned long crashk_end   = (unsigned long)__va(crashk_res.end);
index a48617f..5803ecb 100644 (file)
@@ -257,6 +257,47 @@ static int __init loglevel(char *str)
 
 early_param("loglevel", loglevel);
 
+#ifdef CONFIG_BLK_DEV_INITRD
+static void * __init get_boot_config_from_initrd(u32 *_size, u32 *_csum)
+{
+       u32 size, csum;
+       char *data;
+       u32 *hdr;
+
+       if (!initrd_end)
+               return NULL;
+
+       data = (char *)initrd_end - BOOTCONFIG_MAGIC_LEN;
+       if (memcmp(data, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN))
+               return NULL;
+
+       hdr = (u32 *)(data - 8);
+       size = hdr[0];
+       csum = hdr[1];
+
+       data = ((void *)hdr) - size;
+       if ((unsigned long)data < initrd_start) {
+               pr_err("bootconfig size %d is greater than initrd size %ld\n",
+                       size, initrd_end - initrd_start);
+               return NULL;
+       }
+
+       /* Remove bootconfig from initramfs/initrd */
+       initrd_end = (unsigned long)data;
+       if (_size)
+               *_size = size;
+       if (_csum)
+               *_csum = csum;
+
+       return data;
+}
+#else
+static void * __init get_boot_config_from_initrd(u32 *_size, u32 *_csum)
+{
+       return NULL;
+}
+#endif
+
 #ifdef CONFIG_BOOT_CONFIG
 
 char xbc_namebuf[XBC_KEYLEN_MAX] __initdata;
@@ -357,9 +398,11 @@ static void __init setup_boot_config(const char *cmdline)
        int pos;
        u32 size, csum;
        char *data, *copy;
-       u32 *hdr;
        int ret;
 
+       /* Cut out the bootconfig data even if we have no bootconfig option */
+       data = get_boot_config_from_initrd(&size, &csum);
+
        strlcpy(tmp_cmdline, boot_command_line, COMMAND_LINE_SIZE);
        parse_args("bootconfig", tmp_cmdline, NULL, 0, 0, 0, NULL,
                   bootconfig_params);
@@ -367,16 +410,10 @@ static void __init setup_boot_config(const char *cmdline)
        if (!bootconfig_found)
                return;
 
-       if (!initrd_end)
-               goto not_found;
-
-       data = (char *)initrd_end - BOOTCONFIG_MAGIC_LEN;
-       if (memcmp(data, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN))
-               goto not_found;
-
-       hdr = (u32 *)(data - 8);
-       size = hdr[0];
-       csum = hdr[1];
+       if (!data) {
+               pr_err("'bootconfig' found on command line, but no bootconfig found\n");
+               return;
+       }
 
        if (size >= XBC_DATA_MAX) {
                pr_err("bootconfig size %d greater than max size %d\n",
@@ -384,10 +421,6 @@ static void __init setup_boot_config(const char *cmdline)
                return;
        }
 
-       data = ((void *)hdr) - size;
-       if ((unsigned long)data < initrd_start)
-               goto not_found;
-
        if (boot_config_checksum((unsigned char *)data, size) != csum) {
                pr_err("bootconfig checksum failed\n");
                return;
@@ -417,11 +450,15 @@ static void __init setup_boot_config(const char *cmdline)
                extra_init_args = xbc_make_cmdline("init");
        }
        return;
-not_found:
-       pr_err("'bootconfig' found on command line, but no bootconfig found\n");
 }
+
 #else
-#define setup_boot_config(cmdline)     do { } while (0)
+
+static void __init setup_boot_config(const char *cmdline)
+{
+       /* Remove bootconfig data from initrd */
+       get_boot_config_from_initrd(NULL, NULL);
+}
 
 static int __init warn_bootconfig(char *str)
 {
index dc8307b..beff0cf 100644 (file)
@@ -142,6 +142,7 @@ struct mqueue_inode_info {
 
        struct sigevent notify;
        struct pid *notify_owner;
+       u32 notify_self_exec_id;
        struct user_namespace *notify_user_ns;
        struct user_struct *user;       /* user who created, for accounting */
        struct sock *notify_sock;
@@ -773,28 +774,44 @@ static void __do_notify(struct mqueue_inode_info *info)
         * synchronously. */
        if (info->notify_owner &&
            info->attr.mq_curmsgs == 1) {
-               struct kernel_siginfo sig_i;
                switch (info->notify.sigev_notify) {
                case SIGEV_NONE:
                        break;
-               case SIGEV_SIGNAL:
-                       /* sends signal */
+               case SIGEV_SIGNAL: {
+                       struct kernel_siginfo sig_i;
+                       struct task_struct *task;
+
+                       /* do_mq_notify() accepts sigev_signo == 0, why?? */
+                       if (!info->notify.sigev_signo)
+                               break;
 
                        clear_siginfo(&sig_i);
                        sig_i.si_signo = info->notify.sigev_signo;
                        sig_i.si_errno = 0;
                        sig_i.si_code = SI_MESGQ;
                        sig_i.si_value = info->notify.sigev_value;
-                       /* map current pid/uid into info->owner's namespaces */
                        rcu_read_lock();
+                       /* map current pid/uid into info->owner's namespaces */
                        sig_i.si_pid = task_tgid_nr_ns(current,
                                                ns_of_pid(info->notify_owner));
-                       sig_i.si_uid = from_kuid_munged(info->notify_user_ns, current_uid());
+                       sig_i.si_uid = from_kuid_munged(info->notify_user_ns,
+                                               current_uid());
+                       /*
+                        * We can't use kill_pid_info(), this signal should
+                        * bypass check_kill_permission(). It is from kernel
+                        * but si_fromuser() can't know this.
+                        * We do check the self_exec_id, to avoid sending
+                        * signals to programs that don't expect them.
+                        */
+                       task = pid_task(info->notify_owner, PIDTYPE_TGID);
+                       if (task && task->self_exec_id ==
+                                               info->notify_self_exec_id) {
+                               do_send_sig_info(info->notify.sigev_signo,
+                                               &sig_i, task, PIDTYPE_TGID);
+                       }
                        rcu_read_unlock();
-
-                       kill_pid_info(info->notify.sigev_signo,
-                                     &sig_i, info->notify_owner);
                        break;
+               }
                case SIGEV_THREAD:
                        set_cookie(info->notify_cookie, NOTIFY_WOKENUP);
                        netlink_sendskb(info->notify_sock, info->notify_cookie);
@@ -1383,6 +1400,7 @@ retry:
                        info->notify.sigev_signo = notification->sigev_signo;
                        info->notify.sigev_value = notification->sigev_value;
                        info->notify.sigev_notify = SIGEV_SIGNAL;
+                       info->notify_self_exec_id = current->self_exec_id;
                        break;
                }
 
index f503542..8accc97 100644 (file)
@@ -740,8 +740,8 @@ static const struct file_operations kcov_fops = {
  * kcov_remote_handle() with KCOV_SUBSYSTEM_COMMON as the subsystem id and an
  * arbitrary 4-byte non-zero number as the instance id). This common handle
  * then gets saved into the task_struct of the process that issued the
- * KCOV_REMOTE_ENABLE ioctl. When this proccess issues system calls that spawn
- * kernel threads, the common handle must be retrived via kcov_common_handle()
+ * KCOV_REMOTE_ENABLE ioctl. When this process issues system calls that spawn
+ * kernel threads, the common handle must be retrieved via kcov_common_handle()
  * and passed to the spawned threads via custom annotations. Those kernel
  * threads must in turn be annotated with kcov_remote_start(common_handle) and
  * kcov_remote_stop(). All of the threads that are spawned by the same process
index 402eef8..7436470 100644 (file)
@@ -466,7 +466,6 @@ config PROFILE_ANNOTATED_BRANCHES
 config PROFILE_ALL_BRANCHES
        bool "Profile all if conditionals" if !FORTIFY_SOURCE
        select TRACE_BRANCH_PROFILING
-       imply CC_DISABLE_WARN_MAYBE_UNINITIALIZED  # avoid false positives
        help
          This tracer profiles all branch conditions. Every if ()
          taken in the kernel is recorded whether it hit or miss.
index 31c0fad..312d1a0 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/printk.h>
 #include <linux/string.h>
 #include <linux/sysfs.h>
+#include <linux/completion.h>
 
 static ulong delay = 100;
 static char test_mode[12] = "irq";
@@ -28,6 +29,8 @@ MODULE_PARM_DESC(delay, "Period in microseconds (100 us default)");
 MODULE_PARM_DESC(test_mode, "Mode of the test such as preempt, irq, or alternate (default irq)");
 MODULE_PARM_DESC(burst_size, "The size of a burst (default 1)");
 
+static struct completion done;
+
 #define MIN(x, y) ((x) < (y) ? (x) : (y))
 
 static void busy_wait(ulong time)
@@ -113,22 +116,47 @@ static int preemptirq_delay_run(void *data)
 
        for (i = 0; i < s; i++)
                (testfuncs[i])(i);
+
+       complete(&done);
+
+       set_current_state(TASK_INTERRUPTIBLE);
+       while (!kthread_should_stop()) {
+               schedule();
+               set_current_state(TASK_INTERRUPTIBLE);
+       }
+
+       __set_current_state(TASK_RUNNING);
+
        return 0;
 }
 
-static struct task_struct *preemptirq_start_test(void)
+static int preemptirq_run_test(void)
 {
+       struct task_struct *task;
        char task_name[50];
 
+       init_completion(&done);
+
        snprintf(task_name, sizeof(task_name), "%s_test", test_mode);
-       return kthread_run(preemptirq_delay_run, NULL, task_name);
+       task =  kthread_run(preemptirq_delay_run, NULL, task_name);
+       if (IS_ERR(task))
+               return PTR_ERR(task);
+       if (task) {
+               wait_for_completion(&done);
+               kthread_stop(task);
+       }
+       return 0;
 }
 
 
 static ssize_t trigger_store(struct kobject *kobj, struct kobj_attribute *attr,
                         const char *buf, size_t count)
 {
-       preemptirq_start_test();
+       ssize_t ret;
+
+       ret = preemptirq_run_test();
+       if (ret)
+               return ret;
        return count;
 }
 
@@ -148,11 +176,9 @@ static struct kobject *preemptirq_delay_kobj;
 
 static int __init preemptirq_delay_init(void)
 {
-       struct task_struct *test_task;
        int retval;
 
-       test_task = preemptirq_start_test();
-       retval = PTR_ERR_OR_ZERO(test_task);
+       retval = preemptirq_run_test();
        if (retval != 0)
                return retval;
 
index 8d2b988..29615f1 100644 (file)
@@ -947,7 +947,8 @@ int __trace_bputs(unsigned long ip, const char *str)
 EXPORT_SYMBOL_GPL(__trace_bputs);
 
 #ifdef CONFIG_TRACER_SNAPSHOT
-void tracing_snapshot_instance_cond(struct trace_array *tr, void *cond_data)
+static void tracing_snapshot_instance_cond(struct trace_array *tr,
+                                          void *cond_data)
 {
        struct tracer *tracer = tr->current_trace;
        unsigned long flags;
@@ -8525,6 +8526,19 @@ static int allocate_trace_buffers(struct trace_array *tr, int size)
         */
        allocate_snapshot = false;
 #endif
+
+       /*
+        * Because of some magic with the way alloc_percpu() works on
+        * x86_64, we need to synchronize the pgd of all the tables,
+        * otherwise the trace events that happen in x86_64 page fault
+        * handlers can't cope with accessing the chance that a
+        * alloc_percpu()'d memory might be touched in the page fault trace
+        * event. Oh, and we need to audit all other alloc_percpu() and vmalloc()
+        * calls in tracing, because something might get triggered within a
+        * page fault trace event!
+        */
+       vmalloc_sync_mappings();
+
        return 0;
 }
 
index 06d7feb..9de29bb 100644 (file)
@@ -95,24 +95,20 @@ trace_boot_add_kprobe_event(struct xbc_node *node, const char *event)
        struct xbc_node *anode;
        char buf[MAX_BUF_LEN];
        const char *val;
-       int ret;
+       int ret = 0;
 
-       kprobe_event_cmd_init(&cmd, buf, MAX_BUF_LEN);
+       xbc_node_for_each_array_value(node, "probes", anode, val) {
+               kprobe_event_cmd_init(&cmd, buf, MAX_BUF_LEN);
 
-       ret = kprobe_event_gen_cmd_start(&cmd, event, NULL);
-       if (ret)
-               return ret;
+               ret = kprobe_event_gen_cmd_start(&cmd, event, val);
+               if (ret)
+                       break;
 
-       xbc_node_for_each_array_value(node, "probes", anode, val) {
-               ret = kprobe_event_add_field(&cmd, val);
+               ret = kprobe_event_gen_cmd_end(&cmd);
                if (ret)
-                       return ret;
+                       pr_err("Failed to add probe: %s\n", buf);
        }
 
-       ret = kprobe_event_gen_cmd_end(&cmd);
-       if (ret)
-               pr_err("Failed to add probe: %s\n", buf);
-
        return ret;
 }
 #else
index d0568af..3598938 100644 (file)
@@ -453,7 +453,7 @@ static bool __within_notrace_func(unsigned long addr)
 
 static bool within_notrace_func(struct trace_kprobe *tk)
 {
-       unsigned long addr = addr = trace_kprobe_address(tk);
+       unsigned long addr = trace_kprobe_address(tk);
        char symname[KSYM_NAME_LEN], *p;
 
        if (!__within_notrace_func(addr))
@@ -940,6 +940,9 @@ EXPORT_SYMBOL_GPL(kprobe_event_cmd_init);
  * complete command or only the first part of it; in the latter case,
  * kprobe_event_add_fields() can be used to add more fields following this.
  *
+ * Unlikely the synth_event_gen_cmd_start(), @loc must be specified. This
+ * returns -EINVAL if @loc == NULL.
+ *
  * Return: 0 if successful, error otherwise.
  */
 int __kprobe_event_gen_cmd_start(struct dynevent_cmd *cmd, bool kretprobe,
@@ -953,6 +956,9 @@ int __kprobe_event_gen_cmd_start(struct dynevent_cmd *cmd, bool kretprobe,
        if (cmd->type != DYNEVENT_TYPE_KPROBE)
                return -EINVAL;
 
+       if (!loc)
+               return -EINVAL;
+
        if (kretprobe)
                snprintf(buf, MAX_EVENT_NAME_LEN, "r:kprobes/%s", name);
        else
index 7f255b5..11bf5ee 100644 (file)
@@ -544,6 +544,11 @@ EXPORT_SYMBOL_GPL(fork_usermode_blob);
  * Runs a user-space application.  The application is started
  * asynchronously if wait is not set, and runs as a child of system workqueues.
  * (ie. it runs with full root capabilities and optimized affinity).
+ *
+ * Note: successful return value does not guarantee the helper was called at
+ * all. You can't rely on sub_info->{init,cleanup} being called even for
+ * UMH_WAIT_* wait modes as STATIC_USERMODEHELPER_PATH="" turns all helpers
+ * into a successful no-op.
  */
 int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait)
 {
index 48469c9..9292110 100644 (file)
@@ -60,18 +60,15 @@ config UBSAN_SANITIZE_ALL
          Enabling this option will get kernel image size increased
          significantly.
 
-config UBSAN_NO_ALIGNMENT
-       bool "Disable checking of pointers alignment"
-       default y if HAVE_EFFICIENT_UNALIGNED_ACCESS
+config UBSAN_ALIGNMENT
+       bool "Enable checks for pointers alignment"
+       default !HAVE_EFFICIENT_UNALIGNED_ACCESS
+       depends on !X86 || !COMPILE_TEST
        help
-         This option disables the check of unaligned memory accesses.
-         This option should be used when building allmodconfig.
-         Disabling this option on architectures that support unaligned
+         This option enables the check of unaligned memory accesses.
+         Enabling this option on architectures that support unaligned
          accesses may produce a lot of false positives.
 
-config UBSAN_ALIGNMENT
-       def_bool !UBSAN_NO_ALIGNMENT
-
 config TEST_UBSAN
        tristate "Module for testing for undefined behavior detection"
        depends on m
index c1acc34..9745811 100644 (file)
@@ -192,6 +192,9 @@ config MEMORY_HOTREMOVE
 # Default to 4 for wider testing, though 8 might be more appropriate.
 # ARM's adjust_pte (unused if VIPT) depends on mm-wide page_table_lock.
 # PA-RISC 7xxx's spinlock_t would enlarge struct page from 32 to 44 bytes.
+# SPARC32 allocates multiple pte tables within a single page, and therefore
+# a per-page lock leads to problems when multiple tables need to be locked
+# at the same time (e.g. copy_page_range()).
 # DEBUG_SPINLOCK and DEBUG_LOCK_ALLOC spinlock_t also enlarge struct page.
 #
 config SPLIT_PTLOCK_CPUS
@@ -199,6 +202,7 @@ config SPLIT_PTLOCK_CPUS
        default "999999" if !MMU
        default "999999" if ARM && !CPU_CACHE_VIPT
        default "999999" if PARISC && !PA20
+       default "999999" if SPARC32
        default "4"
 
 config ARCH_ENABLE_SPLIT_PMD_PTLOCK
index c81b4f3..efc5b83 100644 (file)
@@ -21,7 +21,7 @@ struct backing_dev_info noop_backing_dev_info = {
 EXPORT_SYMBOL_GPL(noop_backing_dev_info);
 
 static struct class *bdi_class;
-const char *bdi_unknown_name = "(unknown)";
+static const char *bdi_unknown_name = "(unknown)";
 
 /*
  * bdi_lock protects bdi_tree and updates to bdi_list. bdi_list has RCU
@@ -938,7 +938,8 @@ int bdi_register_va(struct backing_dev_info *bdi, const char *fmt, va_list args)
        if (bdi->dev)   /* The driver needs to use separate queues per device */
                return 0;
 
-       dev = device_create_vargs(bdi_class, NULL, MKDEV(0, 0), bdi, fmt, args);
+       vsnprintf(bdi->dev_name, sizeof(bdi->dev_name), fmt, args);
+       dev = device_create(bdi_class, NULL, MKDEV(0, 0), bdi, bdi->dev_name);
        if (IS_ERR(dev))
                return PTR_ERR(dev);
 
@@ -1043,6 +1044,14 @@ void bdi_put(struct backing_dev_info *bdi)
 }
 EXPORT_SYMBOL(bdi_put);
 
+const char *bdi_dev_name(struct backing_dev_info *bdi)
+{
+       if (!bdi || !bdi->dev)
+               return bdi_unknown_name;
+       return bdi->dev_name;
+}
+EXPORT_SYMBOL_GPL(bdi_dev_name);
+
 static wait_queue_head_t congestion_wqh[2] = {
                __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[0]),
                __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1])
index 5beea03..a3b97f1 100644 (file)
@@ -4990,19 +4990,22 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
        unsigned int size;
        int node;
        int __maybe_unused i;
+       long error = -ENOMEM;
 
        size = sizeof(struct mem_cgroup);
        size += nr_node_ids * sizeof(struct mem_cgroup_per_node *);
 
        memcg = kzalloc(size, GFP_KERNEL);
        if (!memcg)
-               return NULL;
+               return ERR_PTR(error);
 
        memcg->id.id = idr_alloc(&mem_cgroup_idr, NULL,
                                 1, MEM_CGROUP_ID_MAX,
                                 GFP_KERNEL);
-       if (memcg->id.id < 0)
+       if (memcg->id.id < 0) {
+               error = memcg->id.id;
                goto fail;
+       }
 
        memcg->vmstats_local = alloc_percpu(struct memcg_vmstats_percpu);
        if (!memcg->vmstats_local)
@@ -5046,7 +5049,7 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
 fail:
        mem_cgroup_id_remove(memcg);
        __mem_cgroup_free(memcg);
-       return NULL;
+       return ERR_PTR(error);
 }
 
 static struct cgroup_subsys_state * __ref
@@ -5057,8 +5060,8 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
        long error = -ENOMEM;
 
        memcg = mem_cgroup_alloc();
-       if (!memcg)
-               return ERR_PTR(error);
+       if (IS_ERR(memcg))
+               return ERR_CAST(memcg);
 
        WRITE_ONCE(memcg->high, PAGE_COUNTER_MAX);
        memcg->soft_limit = PAGE_COUNTER_MAX;
@@ -5108,7 +5111,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
 fail:
        mem_cgroup_id_remove(memcg);
        mem_cgroup_free(memcg);
-       return ERR_PTR(-ENOMEM);
+       return ERR_PTR(error);
 }
 
 static int mem_cgroup_css_online(struct cgroup_subsys_state *css)
index 69827d4..13cc653 100644 (file)
@@ -1607,6 +1607,7 @@ void set_zone_contiguous(struct zone *zone)
                if (!__pageblock_pfn_to_page(block_start_pfn,
                                             block_end_pfn, zone))
                        return;
+               cond_resched();
        }
 
        /* We confirm that there is no hole */
@@ -2400,6 +2401,14 @@ static inline void boost_watermark(struct zone *zone)
 
        if (!watermark_boost_factor)
                return;
+       /*
+        * Don't bother in zones that are unlikely to produce results.
+        * On small machines, including kdump capture kernels running
+        * in a small area, boosting the watermark can cause an out of
+        * memory situation immediately.
+        */
+       if ((pageblock_nr_pages * 4) > zone_managed_pages(zone))
+               return;
 
        max_boost = mult_frac(zone->_watermark[WMARK_HIGH],
                        watermark_boost_factor, 10000);
index d7e3bc6..7da7d77 100644 (file)
@@ -80,6 +80,7 @@
 #include <linux/workqueue.h>
 #include <linux/kmemleak.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 
 #include <asm/cacheflush.h>
 #include <asm/sections.h>
@@ -1557,10 +1558,9 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr)
 static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved,
                                 gfp_t gfp)
 {
-       /* whitelisted flags that can be passed to the backing allocators */
-       gfp_t pcpu_gfp = gfp & (GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
-       bool is_atomic = (gfp & GFP_KERNEL) != GFP_KERNEL;
-       bool do_warn = !(gfp & __GFP_NOWARN);
+       gfp_t pcpu_gfp;
+       bool is_atomic;
+       bool do_warn;
        static int warn_limit = 10;
        struct pcpu_chunk *chunk, *next;
        const char *err;
@@ -1569,6 +1569,12 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved,
        void __percpu *ptr;
        size_t bits, bit_align;
 
+       gfp = current_gfp_context(gfp);
+       /* whitelisted flags that can be passed to the backing allocators */
+       pcpu_gfp = gfp & (GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
+       is_atomic = (gfp & GFP_KERNEL) != GFP_KERNEL;
+       do_warn = !(gfp & __GFP_NOWARN);
+
        /*
         * There is now a minimum allocation size of PCPU_MIN_ALLOC_SIZE,
         * therefore alignment must be a minimum of that many bytes.
index 9bf4495..b762450 100644 (file)
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -551,15 +551,32 @@ static void print_section(char *level, char *text, u8 *addr,
        metadata_access_disable();
 }
 
+/*
+ * See comment in calculate_sizes().
+ */
+static inline bool freeptr_outside_object(struct kmem_cache *s)
+{
+       return s->offset >= s->inuse;
+}
+
+/*
+ * Return offset of the end of info block which is inuse + free pointer if
+ * not overlapping with object.
+ */
+static inline unsigned int get_info_end(struct kmem_cache *s)
+{
+       if (freeptr_outside_object(s))
+               return s->inuse + sizeof(void *);
+       else
+               return s->inuse;
+}
+
 static struct track *get_track(struct kmem_cache *s, void *object,
        enum track_item alloc)
 {
        struct track *p;
 
-       if (s->offset)
-               p = object + s->offset + sizeof(void *);
-       else
-               p = object + s->inuse;
+       p = object + get_info_end(s);
 
        return p + alloc;
 }
@@ -686,10 +703,7 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
                print_section(KERN_ERR, "Redzone ", p + s->object_size,
                        s->inuse - s->object_size);
 
-       if (s->offset)
-               off = s->offset + sizeof(void *);
-       else
-               off = s->inuse;
+       off = get_info_end(s);
 
        if (s->flags & SLAB_STORE_USER)
                off += 2 * sizeof(struct track);
@@ -782,7 +796,7 @@ static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
  * object address
  *     Bytes of the object to be managed.
  *     If the freepointer may overlay the object then the free
- *     pointer is the first word of the object.
+ *     pointer is at the middle of the object.
  *
  *     Poisoning uses 0x6b (POISON_FREE) and the last byte is
  *     0xa5 (POISON_END)
@@ -816,11 +830,7 @@ static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
 
 static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p)
 {
-       unsigned long off = s->inuse;   /* The end of info */
-
-       if (s->offset)
-               /* Freepointer is placed after the object. */
-               off += sizeof(void *);
+       unsigned long off = get_info_end(s);    /* The end of info */
 
        if (s->flags & SLAB_STORE_USER)
                /* We also have user information there */
@@ -907,7 +917,7 @@ static int check_object(struct kmem_cache *s, struct page *page,
                check_pad_bytes(s, page, p);
        }
 
-       if (!s->offset && val == SLUB_RED_ACTIVE)
+       if (!freeptr_outside_object(s) && val == SLUB_RED_ACTIVE)
                /*
                 * Object and freepointer overlap. Cannot check
                 * freepointer while object is allocated.
@@ -3587,6 +3597,11 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
                 *
                 * This is the case if we do RCU, have a constructor or
                 * destructor or are poisoning the objects.
+                *
+                * The assumption that s->offset >= s->inuse means free
+                * pointer is outside of the object is used in the
+                * freeptr_outside_object() function. If that is no
+                * longer true, the function needs to be modified.
                 */
                s->offset = size;
                size += sizeof(void *);
index b06868f..a37c87b 100644 (file)
@@ -1625,7 +1625,6 @@ static __always_inline void update_lru_sizes(struct lruvec *lruvec,
  * @dst:       The temp list to put pages on to.
  * @nr_scanned:        The number of pages that were scanned.
  * @sc:                The scan_control struct for this reclaim session
- * @mode:      One of the LRU isolation modes
  * @lru:       LRU list id for isolating
  *
  * returns how many pages were moved onto *@dst.
index 0ce530a..8575f5d 100644 (file)
@@ -177,18 +177,18 @@ static void vcc_destroy_socket(struct sock *sk)
 
        set_bit(ATM_VF_CLOSE, &vcc->flags);
        clear_bit(ATM_VF_READY, &vcc->flags);
-       if (vcc->dev) {
-               if (vcc->dev->ops->close)
-                       vcc->dev->ops->close(vcc);
-               if (vcc->push)
-                       vcc->push(vcc, NULL); /* atmarpd has no push */
-               module_put(vcc->owner);
-
-               while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
-                       atm_return(vcc, skb->truesize);
-                       kfree_skb(skb);
-               }
+       if (vcc->dev && vcc->dev->ops->close)
+               vcc->dev->ops->close(vcc);
+       if (vcc->push)
+               vcc->push(vcc, NULL); /* atmarpd has no push */
+       module_put(vcc->owner);
+
+       while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
+               atm_return(vcc, skb->truesize);
+               kfree_skb(skb);
+       }
 
+       if (vcc->dev && vcc->dev->ops->owner) {
                module_put(vcc->dev->ops->owner);
                atm_dev_put(vcc->dev);
        }
index 25fa3a7..ca37f5a 100644 (file)
@@ -1264,6 +1264,12 @@ static void lec_arp_clear_vccs(struct lec_arp_table *entry)
                entry->vcc = NULL;
        }
        if (entry->recv_vcc) {
+               struct atm_vcc *vcc = entry->recv_vcc;
+               struct lec_vcc_priv *vpriv = LEC_VCC_PRIV(vcc);
+
+               kfree(vpriv);
+               vcc->user_back = NULL;
+
                entry->recv_vcc->push = entry->old_recv_push;
                vcc_release_async(entry->recv_vcc, -EPIPE);
                entry->recv_vcc = NULL;
index 9694662..80b87b1 100644 (file)
@@ -893,7 +893,7 @@ static void batadv_v_ogm_process(const struct sk_buff *skb, int ogm_offset,
 
        orig_node = batadv_v_ogm_orig_get(bat_priv, ogm_packet->orig);
        if (!orig_node)
-               return;
+               goto out;
 
        neigh_node = batadv_neigh_node_get_or_create(orig_node, if_incoming,
                                                     ethhdr->h_source);
index 8f0717c..b0469d1 100644 (file)
@@ -1009,15 +1009,8 @@ static struct batadv_nc_path *batadv_nc_get_path(struct batadv_priv *bat_priv,
  */
 static u8 batadv_nc_random_weight_tq(u8 tq)
 {
-       u8 rand_val, rand_tq;
-
-       get_random_bytes(&rand_val, sizeof(rand_val));
-
        /* randomize the estimated packet loss (max TQ - estimated TQ) */
-       rand_tq = rand_val * (BATADV_TQ_MAX_VALUE - tq);
-
-       /* normalize the randomized packet loss */
-       rand_tq /= BATADV_TQ_MAX_VALUE;
+       u8 rand_tq = prandom_u32_max(BATADV_TQ_MAX_VALUE + 1 - tq);
 
        /* convert to (randomized) estimated tq again */
        return BATADV_TQ_MAX_VALUE - rand_tq;
index c45962d..0f962dc 100644 (file)
@@ -1150,7 +1150,7 @@ static ssize_t batadv_store_throughput_override(struct kobject *kobj,
        ret = batadv_parse_throughput(net_dev, buff, "throughput_override",
                                      &tp_override);
        if (!ret)
-               return count;
+               goto out;
 
        old_tp_override = atomic_read(&hard_iface->bat_v.throughput_override);
        if (old_tp_override == tp_override)
@@ -1190,6 +1190,7 @@ static ssize_t batadv_show_throughput_override(struct kobject *kobj,
 
        tp_override = atomic_read(&hard_iface->bat_v.throughput_override);
 
+       batadv_hardif_put(hard_iface);
        return sprintf(buff, "%u.%u MBit\n", tp_override / 10,
                       tp_override % 10);
 }
index 43dab40..a0f5dbe 100644 (file)
@@ -612,6 +612,7 @@ int br_process_vlan_info(struct net_bridge *br,
                                               v - 1, rtm_cmd);
                                v_change_start = 0;
                        }
+                       cond_resched();
                }
                /* v_change_start is set only if the last/whole range changed */
                if (v_change_start)
index 80f9772..899edce 100644 (file)
@@ -4283,6 +4283,11 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb,
                end_offset = nla_get_u64(attrs[DEVLINK_ATTR_REGION_CHUNK_ADDR]);
                end_offset += nla_get_u64(attrs[DEVLINK_ATTR_REGION_CHUNK_LEN]);
                dump = false;
+
+               if (start_offset == end_offset) {
+                       err = 0;
+                       goto nla_put_failure;
+               }
        }
 
        err = devlink_nl_region_read_snapshot_fill(skb, devlink,
@@ -5363,6 +5368,7 @@ int devlink_health_report(struct devlink_health_reporter *reporter,
 {
        enum devlink_health_reporter_state prev_health_state;
        struct devlink *devlink = reporter->devlink;
+       unsigned long recover_ts_threshold;
 
        /* write a log message of the current error */
        WARN_ON(!msg);
@@ -5373,10 +5379,12 @@ int devlink_health_report(struct devlink_health_reporter *reporter,
        devlink_recover_notify(reporter, DEVLINK_CMD_HEALTH_REPORTER_RECOVER);
 
        /* abort if the previous error wasn't recovered */
+       recover_ts_threshold = reporter->last_recovery_ts +
+                              msecs_to_jiffies(reporter->graceful_period);
        if (reporter->auto_recover &&
            (prev_health_state != DEVLINK_HEALTH_REPORTER_STATE_HEALTHY ||
-            jiffies - reporter->last_recovery_ts <
-            msecs_to_jiffies(reporter->graceful_period))) {
+            (reporter->last_recovery_ts && reporter->recovery_count &&
+             time_is_after_jiffies(recover_ts_threshold)))) {
                trace_devlink_health_recover_aborted(devlink,
                                                     reporter->ops->name,
                                                     reporter->health_state,
index 8e33cec..2ee7bc4 100644 (file)
@@ -213,6 +213,7 @@ static void sched_send_work(struct timer_list *t)
 static void trace_drop_common(struct sk_buff *skb, void *location)
 {
        struct net_dm_alert_msg *msg;
+       struct net_dm_drop_point *point;
        struct nlmsghdr *nlh;
        struct nlattr *nla;
        int i;
@@ -231,11 +232,13 @@ static void trace_drop_common(struct sk_buff *skb, void *location)
        nlh = (struct nlmsghdr *)dskb->data;
        nla = genlmsg_data(nlmsg_data(nlh));
        msg = nla_data(nla);
+       point = msg->points;
        for (i = 0; i < msg->entries; i++) {
-               if (!memcmp(&location, msg->points[i].pc, sizeof(void *))) {
-                       msg->points[i].count++;
+               if (!memcmp(&location, &point->pc, sizeof(void *))) {
+                       point->count++;
                        goto out;
                }
+               point++;
        }
        if (msg->entries == dm_hit_limit)
                goto out;
@@ -244,8 +247,8 @@ static void trace_drop_common(struct sk_buff *skb, void *location)
         */
        __nla_reserve_nohdr(dskb, sizeof(struct net_dm_drop_point));
        nla->nla_len += NLA_ALIGN(sizeof(struct net_dm_drop_point));
-       memcpy(msg->points[msg->entries].pc, &location, sizeof(void *));
-       msg->points[msg->entries].count = 1;
+       memcpy(point->pc, &location, sizeof(void *));
+       point->count = 1;
        msg->entries++;
 
        if (!timer_pending(&data->send_timer)) {
index 39d37d0..1161392 100644 (file)
@@ -1956,6 +1956,9 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
                                   NEIGH_UPDATE_F_OVERRIDE_ISROUTER);
        }
 
+       if (protocol)
+               neigh->protocol = protocol;
+
        if (ndm->ndm_flags & NTF_EXT_LEARNED)
                flags |= NEIGH_UPDATE_F_EXT_LEARNED;
 
@@ -1969,9 +1972,6 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
                err = __neigh_update(neigh, lladdr, ndm->ndm_state, flags,
                                     NETLINK_CB(skb).portid, extack);
 
-       if (protocol)
-               neigh->protocol = protocol;
-
        neigh_release(neigh);
 
 out:
index 90509c3..b714162 100644 (file)
@@ -2364,7 +2364,6 @@ static void sk_leave_memory_pressure(struct sock *sk)
        }
 }
 
-/* On 32bit arches, an skb frag is limited to 2^15 */
 #define SKB_FRAG_PAGE_ORDER    get_order(32768)
 DEFINE_STATIC_KEY_FALSE(net_high_order_alloc_disable_key);
 
index 9a271a5..d90665b 100644 (file)
@@ -459,7 +459,7 @@ static int dsa_tree_setup_switches(struct dsa_switch_tree *dst)
        list_for_each_entry(dp, &dst->ports, list) {
                err = dsa_port_setup(dp);
                if (err)
-                       goto teardown;
+                       continue;
        }
 
        return 0;
index b5c535a..a621367 100644 (file)
@@ -289,7 +289,8 @@ static void dsa_master_ndo_teardown(struct net_device *dev)
 {
        struct dsa_port *cpu_dp = dev->dsa_ptr;
 
-       dev->netdev_ops = cpu_dp->orig_ndo_ops;
+       if (cpu_dp->orig_ndo_ops)
+               dev->netdev_ops = cpu_dp->orig_ndo_ops;
        cpu_dp->orig_ndo_ops = NULL;
 }
 
index d106880..62f4ee3 100644 (file)
@@ -856,20 +856,18 @@ dsa_slave_add_cls_matchall_mirred(struct net_device *dev,
        struct dsa_port *to_dp;
        int err;
 
-       act = &cls->rule->action.entries[0];
-
        if (!ds->ops->port_mirror_add)
                return -EOPNOTSUPP;
 
-       if (!act->dev)
-               return -EINVAL;
-
        if (!flow_action_basic_hw_stats_check(&cls->rule->action,
                                              cls->common.extack))
                return -EOPNOTSUPP;
 
        act = &cls->rule->action.entries[0];
 
+       if (!act->dev)
+               return -EINVAL;
+
        if (!dsa_slave_dev_check(act->dev))
                return -EOPNOTSUPP;
 
index f4b9f7a..25b6ffb 100644 (file)
@@ -18,7 +18,7 @@ static rx_handler_result_t hsr_handle_frame(struct sk_buff **pskb)
 {
        struct sk_buff *skb = *pskb;
        struct hsr_port *port;
-       u16 protocol;
+       __be16 protocol;
 
        if (!skb_mac_header_was_set(skb)) {
                WARN_ONCE(1, "%s: skb invalid", __func__);
index bf4ced9..b996dc1 100644 (file)
@@ -3926,10 +3926,6 @@ void tcp_parse_options(const struct net *net,
                                 */
                                break;
 #endif
-                       case TCPOPT_MPTCP:
-                               mptcp_parse_option(skb, ptr, opsize, opt_rx);
-                               break;
-
                        case TCPOPT_FASTOPEN:
                                tcp_parse_fastopen_option(
                                        opsize - TCPOLEN_FASTOPEN_BASE,
@@ -5990,9 +5986,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
                tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
                tcp_initialize_rcv_mss(sk);
 
-               if (sk_is_mptcp(sk))
-                       mptcp_rcv_synsent(sk);
-
                /* Remember, tcp_poll() does not lock socket!
                 * Change state from SYN-SENT only after copied_seq
                 * is initialized. */
index 310cbdd..8d41803 100644 (file)
@@ -1385,9 +1385,18 @@ static struct rt6_info *ip6_rt_pcpu_alloc(const struct fib6_result *res)
        }
        ip6_rt_copy_init(pcpu_rt, res);
        pcpu_rt->rt6i_flags |= RTF_PCPU;
+
+       if (f6i->nh)
+               pcpu_rt->sernum = rt_genid_ipv6(dev_net(dev));
+
        return pcpu_rt;
 }
 
+static bool rt6_is_valid(const struct rt6_info *rt6)
+{
+       return rt6->sernum == rt_genid_ipv6(dev_net(rt6->dst.dev));
+}
+
 /* It should be called with rcu_read_lock() acquired */
 static struct rt6_info *rt6_get_pcpu_route(const struct fib6_result *res)
 {
@@ -1395,6 +1404,19 @@ static struct rt6_info *rt6_get_pcpu_route(const struct fib6_result *res)
 
        pcpu_rt = this_cpu_read(*res->nh->rt6i_pcpu);
 
+       if (pcpu_rt && pcpu_rt->sernum && !rt6_is_valid(pcpu_rt)) {
+               struct rt6_info *prev, **p;
+
+               p = this_cpu_ptr(res->nh->rt6i_pcpu);
+               prev = xchg(p, NULL);
+               if (prev) {
+                       dst_dev_put(&prev->dst);
+                       dst_release(&prev->dst);
+               }
+
+               pcpu_rt = NULL;
+       }
+
        return pcpu_rt;
 }
 
@@ -2593,6 +2615,9 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
 
        rt = container_of(dst, struct rt6_info, dst);
 
+       if (rt->sernum)
+               return rt6_is_valid(rt) ? dst : NULL;
+
        rcu_read_lock();
 
        /* All IPV6 dsts are created with ->obsolete set to the value
index 4c7e0a2..37b4342 100644 (file)
@@ -27,8 +27,9 @@
 
 bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len)
 {
-       int trailing;
        unsigned int tlv_offset;
+       int max_last_entry;
+       int trailing;
 
        if (srh->type != IPV6_SRCRT_TYPE_4)
                return false;
@@ -36,7 +37,12 @@ bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len)
        if (((srh->hdrlen + 1) << 3) != len)
                return false;
 
-       if (srh->segments_left > srh->first_segment)
+       max_last_entry = (srh->hdrlen / 2) - 1;
+
+       if (srh->first_segment > max_last_entry)
+               return false;
+
+       if (srh->segments_left > srh->first_segment + 1)
                return false;
 
        tlv_offset = sizeof(*srh) + ((srh->first_segment + 1) << 4);
index 4a7c467..45497af 100644 (file)
@@ -16,10 +16,10 @@ static bool mptcp_cap_flag_sha256(u8 flags)
        return (flags & MPTCP_CAP_FLAG_MASK) == MPTCP_CAP_HMAC_SHA256;
 }
 
-void mptcp_parse_option(const struct sk_buff *skb, const unsigned char *ptr,
-                       int opsize, struct tcp_options_received *opt_rx)
+static void mptcp_parse_option(const struct sk_buff *skb,
+                              const unsigned char *ptr, int opsize,
+                              struct mptcp_options_received *mp_opt)
 {
-       struct mptcp_options_received *mp_opt = &opt_rx->mptcp;
        u8 subtype = *ptr >> 4;
        int expected_opsize;
        u8 version;
@@ -283,12 +283,20 @@ void mptcp_parse_option(const struct sk_buff *skb, const unsigned char *ptr,
 }
 
 void mptcp_get_options(const struct sk_buff *skb,
-                      struct tcp_options_received *opt_rx)
+                      struct mptcp_options_received *mp_opt)
 {
-       const unsigned char *ptr;
        const struct tcphdr *th = tcp_hdr(skb);
-       int length = (th->doff * 4) - sizeof(struct tcphdr);
+       const unsigned char *ptr;
+       int length;
+
+       /* initialize option status */
+       mp_opt->mp_capable = 0;
+       mp_opt->mp_join = 0;
+       mp_opt->add_addr = 0;
+       mp_opt->rm_addr = 0;
+       mp_opt->dss = 0;
 
+       length = (th->doff * 4) - sizeof(struct tcphdr);
        ptr = (const unsigned char *)(th + 1);
 
        while (length > 0) {
@@ -308,7 +316,7 @@ void mptcp_get_options(const struct sk_buff *skb,
                        if (opsize > length)
                                return; /* don't parse partial options */
                        if (opcode == TCPOPT_MPTCP)
-                               mptcp_parse_option(skb, ptr, opsize, opt_rx);
+                               mptcp_parse_option(skb, ptr, opsize, mp_opt);
                        ptr += opsize - 2;
                        length -= opsize;
                }
@@ -344,28 +352,6 @@ bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb,
        return false;
 }
 
-void mptcp_rcv_synsent(struct sock *sk)
-{
-       struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
-       struct tcp_sock *tp = tcp_sk(sk);
-
-       if (subflow->request_mptcp && tp->rx_opt.mptcp.mp_capable) {
-               subflow->mp_capable = 1;
-               subflow->can_ack = 1;
-               subflow->remote_key = tp->rx_opt.mptcp.sndr_key;
-               pr_debug("subflow=%p, remote_key=%llu", subflow,
-                        subflow->remote_key);
-       } else if (subflow->request_join && tp->rx_opt.mptcp.mp_join) {
-               subflow->mp_join = 1;
-               subflow->thmac = tp->rx_opt.mptcp.thmac;
-               subflow->remote_nonce = tp->rx_opt.mptcp.nonce;
-               pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u", subflow,
-                        subflow->thmac, subflow->remote_nonce);
-       } else if (subflow->request_mptcp) {
-               tcp_sk(sk)->is_mptcp = 0;
-       }
-}
-
 /* MP_JOIN client subflow must wait for 4th ack before sending any data:
  * TCP can't schedule delack timer before the subflow is fully established.
  * MPTCP uses the delack timer to do 3rd ack retransmissions
@@ -709,7 +695,7 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *sk,
        if (TCP_SKB_CB(skb)->seq != subflow->ssn_offset + 1)
                return subflow->mp_capable;
 
-       if (mp_opt->use_ack) {
+       if (mp_opt->dss && mp_opt->use_ack) {
                /* subflows are fully established as soon as we get any
                 * additional ack.
                 */
@@ -717,8 +703,6 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *sk,
                goto fully_established;
        }
 
-       WARN_ON_ONCE(subflow->can_ack);
-
        /* If the first established packet does not contain MP_CAPABLE + data
         * then fallback to TCP
         */
@@ -728,6 +712,8 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *sk,
                return false;
        }
 
+       if (unlikely(!READ_ONCE(msk->pm.server_side)))
+               pr_warn_once("bogus mpc option on established client sk");
        subflow->fully_established = 1;
        subflow->remote_key = mp_opt->sndr_key;
        subflow->can_ack = 1;
@@ -819,41 +805,41 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb,
 {
        struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
        struct mptcp_sock *msk = mptcp_sk(subflow->conn);
-       struct mptcp_options_received *mp_opt;
+       struct mptcp_options_received mp_opt;
        struct mptcp_ext *mpext;
 
-       mp_opt = &opt_rx->mptcp;
-       if (!check_fully_established(msk, sk, subflow, skb, mp_opt))
+       mptcp_get_options(skb, &mp_opt);
+       if (!check_fully_established(msk, sk, subflow, skb, &mp_opt))
                return;
 
-       if (mp_opt->add_addr && add_addr_hmac_valid(msk, mp_opt)) {
+       if (mp_opt.add_addr && add_addr_hmac_valid(msk, &mp_opt)) {
                struct mptcp_addr_info addr;
 
-               addr.port = htons(mp_opt->port);
-               addr.id = mp_opt->addr_id;
-               if (mp_opt->family == MPTCP_ADDR_IPVERSION_4) {
+               addr.port = htons(mp_opt.port);
+               addr.id = mp_opt.addr_id;
+               if (mp_opt.family == MPTCP_ADDR_IPVERSION_4) {
                        addr.family = AF_INET;
-                       addr.addr = mp_opt->addr;
+                       addr.addr = mp_opt.addr;
                }
 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
-               else if (mp_opt->family == MPTCP_ADDR_IPVERSION_6) {
+               else if (mp_opt.family == MPTCP_ADDR_IPVERSION_6) {
                        addr.family = AF_INET6;
-                       addr.addr6 = mp_opt->addr6;
+                       addr.addr6 = mp_opt.addr6;
                }
 #endif
-               if (!mp_opt->echo)
+               if (!mp_opt.echo)
                        mptcp_pm_add_addr_received(msk, &addr);
-               mp_opt->add_addr = 0;
+               mp_opt.add_addr = 0;
        }
 
-       if (!mp_opt->dss)
+       if (!mp_opt.dss)
                return;
 
        /* we can't wait for recvmsg() to update the ack_seq, otherwise
         * monodirectional flows will stuck
         */
-       if (mp_opt->use_ack)
-               update_una(msk, mp_opt);
+       if (mp_opt.use_ack)
+               update_una(msk, &mp_opt);
 
        mpext = skb_ext_add(skb, SKB_EXT_MPTCP);
        if (!mpext)
@@ -861,8 +847,8 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb,
 
        memset(mpext, 0, sizeof(*mpext));
 
-       if (mp_opt->use_map) {
-               if (mp_opt->mpc_map) {
+       if (mp_opt.use_map) {
+               if (mp_opt.mpc_map) {
                        /* this is an MP_CAPABLE carrying MPTCP data
                         * we know this map the first chunk of data
                         */
@@ -872,13 +858,14 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb,
                        mpext->subflow_seq = 1;
                        mpext->dsn64 = 1;
                        mpext->mpc_map = 1;
+                       mpext->data_fin = 0;
                } else {
-                       mpext->data_seq = mp_opt->data_seq;
-                       mpext->subflow_seq = mp_opt->subflow_seq;
-                       mpext->dsn64 = mp_opt->dsn64;
-                       mpext->data_fin = mp_opt->data_fin;
+                       mpext->data_seq = mp_opt.data_seq;
+                       mpext->subflow_seq = mp_opt.subflow_seq;
+                       mpext->dsn64 = mp_opt.dsn64;
+                       mpext->data_fin = mp_opt.data_fin;
                }
-               mpext->data_len = mp_opt->data_len;
+               mpext->data_len = mp_opt.data_len;
                mpext->use_map = 1;
        }
 }
index b22a63b..e1f2301 100644 (file)
@@ -1316,11 +1316,12 @@ static void mptcp_copy_inaddrs(struct sock *msk, const struct sock *ssk)
 
 static int mptcp_disconnect(struct sock *sk, int flags)
 {
-       lock_sock(sk);
-       __mptcp_clear_xmit(sk);
-       release_sock(sk);
-       mptcp_cancel_work(sk);
-       return tcp_disconnect(sk, flags);
+       /* Should never be called.
+        * inet_stream_connect() calls ->disconnect, but that
+        * refers to the subflow socket, not the mptcp one.
+        */
+       WARN_ON_ONCE(1);
+       return 0;
 }
 
 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
@@ -1333,7 +1334,7 @@ static struct ipv6_pinfo *mptcp_inet6_sk(const struct sock *sk)
 #endif
 
 struct sock *mptcp_sk_clone(const struct sock *sk,
-                           const struct tcp_options_received *opt_rx,
+                           const struct mptcp_options_received *mp_opt,
                            struct request_sock *req)
 {
        struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
@@ -1372,9 +1373,9 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
 
        msk->write_seq = subflow_req->idsn + 1;
        atomic64_set(&msk->snd_una, msk->write_seq);
-       if (opt_rx->mptcp.mp_capable) {
+       if (mp_opt->mp_capable) {
                msk->can_ack = true;
-               msk->remote_key = opt_rx->mptcp.sndr_key;
+               msk->remote_key = mp_opt->sndr_key;
                mptcp_crypto_key_sha(msk->remote_key, NULL, &ack_seq);
                ack_seq++;
                msk->ack_seq = ack_seq;
index a2b3048..e4ca632 100644 (file)
 #define MPTCP_WORK_RTX         2
 #define MPTCP_WORK_EOF         3
 
+struct mptcp_options_received {
+       u64     sndr_key;
+       u64     rcvr_key;
+       u64     data_ack;
+       u64     data_seq;
+       u32     subflow_seq;
+       u16     data_len;
+       u16     mp_capable : 1,
+               mp_join : 1,
+               dss : 1,
+               add_addr : 1,
+               rm_addr : 1,
+               family : 4,
+               echo : 1,
+               backup : 1;
+       u32     token;
+       u32     nonce;
+       u64     thmac;
+       u8      hmac[20];
+       u8      join_id;
+       u8      use_map:1,
+               dsn64:1,
+               data_fin:1,
+               use_ack:1,
+               ack64:1,
+               mpc_map:1,
+               __unused:2;
+       u8      addr_id;
+       u8      rm_id;
+       union {
+               struct in_addr  addr;
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+               struct in6_addr addr6;
+#endif
+       };
+       u64     ahmac;
+       u16     port;
+};
+
 static inline __be32 mptcp_option(u8 subopt, u8 len, u8 nib, u8 field)
 {
        return htonl((TCPOPT_MPTCP << 24) | (len << 16) | (subopt << 12) |
@@ -331,10 +370,10 @@ int mptcp_proto_v6_init(void);
 #endif
 
 struct sock *mptcp_sk_clone(const struct sock *sk,
-                           const struct tcp_options_received *opt_rx,
+                           const struct mptcp_options_received *mp_opt,
                            struct request_sock *req);
 void mptcp_get_options(const struct sk_buff *skb,
-                      struct tcp_options_received *opt_rx);
+                      struct mptcp_options_received *mp_opt);
 
 void mptcp_finish_connect(struct sock *sk);
 void mptcp_data_ready(struct sock *sk, struct sock *ssk);
index fabd06f..67a4e35 100644 (file)
@@ -124,12 +124,11 @@ static void subflow_init_req(struct request_sock *req,
 {
        struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk_listener);
        struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
-       struct tcp_options_received rx_opt;
+       struct mptcp_options_received mp_opt;
 
        pr_debug("subflow_req=%p, listener=%p", subflow_req, listener);
 
-       memset(&rx_opt.mptcp, 0, sizeof(rx_opt.mptcp));
-       mptcp_get_options(skb, &rx_opt);
+       mptcp_get_options(skb, &mp_opt);
 
        subflow_req->mp_capable = 0;
        subflow_req->mp_join = 0;
@@ -142,16 +141,16 @@ static void subflow_init_req(struct request_sock *req,
                return;
 #endif
 
-       if (rx_opt.mptcp.mp_capable) {
+       if (mp_opt.mp_capable) {
                SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVE);
 
-               if (rx_opt.mptcp.mp_join)
+               if (mp_opt.mp_join)
                        return;
-       } else if (rx_opt.mptcp.mp_join) {
+       } else if (mp_opt.mp_join) {
                SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINSYNRX);
        }
 
-       if (rx_opt.mptcp.mp_capable && listener->request_mptcp) {
+       if (mp_opt.mp_capable && listener->request_mptcp) {
                int err;
 
                err = mptcp_token_new_request(req);
@@ -159,13 +158,13 @@ static void subflow_init_req(struct request_sock *req,
                        subflow_req->mp_capable = 1;
 
                subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq;
-       } else if (rx_opt.mptcp.mp_join && listener->request_mptcp) {
+       } else if (mp_opt.mp_join && listener->request_mptcp) {
                subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq;
                subflow_req->mp_join = 1;
-               subflow_req->backup = rx_opt.mptcp.backup;
-               subflow_req->remote_id = rx_opt.mptcp.join_id;
-               subflow_req->token = rx_opt.mptcp.token;
-               subflow_req->remote_nonce = rx_opt.mptcp.nonce;
+               subflow_req->backup = mp_opt.backup;
+               subflow_req->remote_id = mp_opt.join_id;
+               subflow_req->token = mp_opt.token;
+               subflow_req->remote_nonce = mp_opt.nonce;
                pr_debug("token=%u, remote_nonce=%u", subflow_req->token,
                         subflow_req->remote_nonce);
                if (!subflow_token_join_request(req, skb)) {
@@ -221,23 +220,47 @@ static bool subflow_thmac_valid(struct mptcp_subflow_context *subflow)
 static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
 {
        struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+       struct mptcp_options_received mp_opt;
        struct sock *parent = subflow->conn;
+       struct tcp_sock *tp = tcp_sk(sk);
 
        subflow->icsk_af_ops->sk_rx_dst_set(sk, skb);
 
-       if (inet_sk_state_load(parent) != TCP_ESTABLISHED) {
+       if (inet_sk_state_load(parent) == TCP_SYN_SENT) {
                inet_sk_state_store(parent, TCP_ESTABLISHED);
                parent->sk_state_change(parent);
        }
 
-       if (subflow->conn_finished || !tcp_sk(sk)->is_mptcp)
+       /* be sure no special action on any packet other than syn-ack */
+       if (subflow->conn_finished)
+               return;
+
+       subflow->conn_finished = 1;
+
+       mptcp_get_options(skb, &mp_opt);
+       if (subflow->request_mptcp && mp_opt.mp_capable) {
+               subflow->mp_capable = 1;
+               subflow->can_ack = 1;
+               subflow->remote_key = mp_opt.sndr_key;
+               pr_debug("subflow=%p, remote_key=%llu", subflow,
+                        subflow->remote_key);
+       } else if (subflow->request_join && mp_opt.mp_join) {
+               subflow->mp_join = 1;
+               subflow->thmac = mp_opt.thmac;
+               subflow->remote_nonce = mp_opt.nonce;
+               pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u", subflow,
+                        subflow->thmac, subflow->remote_nonce);
+       } else if (subflow->request_mptcp) {
+               tp->is_mptcp = 0;
+       }
+
+       if (!tp->is_mptcp)
                return;
 
        if (subflow->mp_capable) {
                pr_debug("subflow=%p, remote_key=%llu", mptcp_subflow_ctx(sk),
                         subflow->remote_key);
                mptcp_finish_connect(sk);
-               subflow->conn_finished = 1;
 
                if (skb) {
                        pr_debug("synack seq=%u", TCP_SKB_CB(skb)->seq);
@@ -264,7 +287,6 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
                if (!mptcp_finish_join(sk))
                        goto do_reset;
 
-               subflow->conn_finished = 1;
                MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKRX);
        } else {
 do_reset:
@@ -322,7 +344,7 @@ drop:
 
 /* validate hmac received in third ACK */
 static bool subflow_hmac_valid(const struct request_sock *req,
-                              const struct tcp_options_received *rx_opt)
+                              const struct mptcp_options_received *mp_opt)
 {
        const struct mptcp_subflow_request_sock *subflow_req;
        u8 hmac[MPTCPOPT_HMAC_LEN];
@@ -339,7 +361,7 @@ static bool subflow_hmac_valid(const struct request_sock *req,
                              subflow_req->local_nonce, hmac);
 
        ret = true;
-       if (crypto_memneq(hmac, rx_opt->mptcp.hmac, sizeof(hmac)))
+       if (crypto_memneq(hmac, mp_opt->hmac, sizeof(hmac)))
                ret = false;
 
        sock_put((struct sock *)msk);
@@ -395,7 +417,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
 {
        struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk);
        struct mptcp_subflow_request_sock *subflow_req;
-       struct tcp_options_received opt_rx;
+       struct mptcp_options_received mp_opt;
        bool fallback_is_fatal = false;
        struct sock *new_msk = NULL;
        bool fallback = false;
@@ -403,7 +425,10 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
 
        pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn);
 
-       opt_rx.mptcp.mp_capable = 0;
+       /* we need later a valid 'mp_capable' value even when options are not
+        * parsed
+        */
+       mp_opt.mp_capable = 0;
        if (tcp_rsk(req)->is_mptcp == 0)
                goto create_child;
 
@@ -418,22 +443,21 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
                        goto create_msk;
                }
 
-               mptcp_get_options(skb, &opt_rx);
-               if (!opt_rx.mptcp.mp_capable) {
+               mptcp_get_options(skb, &mp_opt);
+               if (!mp_opt.mp_capable) {
                        fallback = true;
                        goto create_child;
                }
 
 create_msk:
-               new_msk = mptcp_sk_clone(listener->conn, &opt_rx, req);
+               new_msk = mptcp_sk_clone(listener->conn, &mp_opt, req);
                if (!new_msk)
                        fallback = true;
        } else if (subflow_req->mp_join) {
                fallback_is_fatal = true;
-               opt_rx.mptcp.mp_join = 0;
-               mptcp_get_options(skb, &opt_rx);
-               if (!opt_rx.mptcp.mp_join ||
-                   !subflow_hmac_valid(req, &opt_rx)) {
+               mptcp_get_options(skb, &mp_opt);
+               if (!mp_opt.mp_join ||
+                   !subflow_hmac_valid(req, &mp_opt)) {
                        SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC);
                        return NULL;
                }
@@ -473,9 +497,9 @@ create_child:
                        /* with OoO packets we can reach here without ingress
                         * mpc option
                         */
-                       ctx->remote_key = opt_rx.mptcp.sndr_key;
-                       ctx->fully_established = opt_rx.mptcp.mp_capable;
-                       ctx->can_ack = opt_rx.mptcp.mp_capable;
+                       ctx->remote_key = mp_opt.sndr_key;
+                       ctx->fully_established = mp_opt.mp_capable;
+                       ctx->can_ack = mp_opt.mp_capable;
                } else if (ctx->mp_join) {
                        struct mptcp_sock *owner;
 
@@ -499,7 +523,7 @@ out:
        /* check for expected invariant - should never trigger, just help
         * catching eariler subtle bugs
         */
-       WARN_ON_ONCE(*own_req && child && tcp_sk(child)->is_mptcp &&
+       WARN_ON_ONCE(child && *own_req && tcp_sk(child)->is_mptcp &&
                     (!mptcp_subflow_ctx(child) ||
                      !mptcp_subflow_ctx(child)->conn));
        return child;
index 3d816a1..59151dc 100644 (file)
@@ -68,15 +68,13 @@ static bool udp_manip_pkt(struct sk_buff *skb,
                          enum nf_nat_manip_type maniptype)
 {
        struct udphdr *hdr;
-       bool do_csum;
 
        if (skb_ensure_writable(skb, hdroff + sizeof(*hdr)))
                return false;
 
        hdr = (struct udphdr *)(skb->data + hdroff);
-       do_csum = hdr->check || skb->ip_summed == CHECKSUM_PARTIAL;
+       __udp_manip_pkt(skb, iphdroff, hdr, tuple, maniptype, !!hdr->check);
 
-       __udp_manip_pkt(skb, iphdroff, hdr, tuple, maniptype, do_csum);
        return true;
 }
 
index 9f5dea0..916a3c7 100644 (file)
@@ -165,12 +165,12 @@ static bool nf_osf_match_one(const struct sk_buff *skb,
 static const struct tcphdr *nf_osf_hdr_ctx_init(struct nf_osf_hdr_ctx *ctx,
                                                const struct sk_buff *skb,
                                                const struct iphdr *ip,
-                                               unsigned char *opts)
+                                               unsigned char *opts,
+                                               struct tcphdr *_tcph)
 {
        const struct tcphdr *tcp;
-       struct tcphdr _tcph;
 
-       tcp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(struct tcphdr), &_tcph);
+       tcp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(struct tcphdr), _tcph);
        if (!tcp)
                return NULL;
 
@@ -205,10 +205,11 @@ nf_osf_match(const struct sk_buff *skb, u_int8_t family,
        int fmatch = FMATCH_WRONG;
        struct nf_osf_hdr_ctx ctx;
        const struct tcphdr *tcp;
+       struct tcphdr _tcph;
 
        memset(&ctx, 0, sizeof(ctx));
 
-       tcp = nf_osf_hdr_ctx_init(&ctx, skb, ip, opts);
+       tcp = nf_osf_hdr_ctx_init(&ctx, skb, ip, opts, &_tcph);
        if (!tcp)
                return false;
 
@@ -265,10 +266,11 @@ bool nf_osf_find(const struct sk_buff *skb,
        const struct nf_osf_finger *kf;
        struct nf_osf_hdr_ctx ctx;
        const struct tcphdr *tcp;
+       struct tcphdr _tcph;
 
        memset(&ctx, 0, sizeof(ctx));
 
-       tcp = nf_osf_hdr_ctx_init(&ctx, skb, ip, opts);
+       tcp = nf_osf_hdr_ctx_init(&ctx, skb, ip, opts, &_tcph);
        if (!tcp)
                return false;
 
index 55bd142..0a7ecc2 100644 (file)
@@ -2070,6 +2070,7 @@ replay:
                err = PTR_ERR(block);
                goto errout;
        }
+       block->classid = parent;
 
        chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
        if (chain_index > TC_ACT_EXT_VAL_MASK) {
@@ -2612,12 +2613,10 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
                        return skb->len;
 
                parent = tcm->tcm_parent;
-               if (!parent) {
+               if (!parent)
                        q = dev->qdisc;
-                       parent = q->handle;
-               } else {
+               else
                        q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
-               }
                if (!q)
                        goto out;
                cops = q->ops->cl_ops;
@@ -2633,6 +2632,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
                block = cops->tcf_block(q, cl, NULL);
                if (!block)
                        goto out;
+               parent = block->classid;
                if (tcf_block_shared(block))
                        q = NULL;
        }
@@ -3523,6 +3523,16 @@ static void tcf_sample_get_group(struct flow_action_entry *entry,
 #endif
 }
 
+static enum flow_action_hw_stats tc_act_hw_stats(u8 hw_stats)
+{
+       if (WARN_ON_ONCE(hw_stats > TCA_ACT_HW_STATS_ANY))
+               return FLOW_ACTION_HW_STATS_DONT_CARE;
+       else if (!hw_stats)
+               return FLOW_ACTION_HW_STATS_DISABLED;
+
+       return hw_stats;
+}
+
 int tc_setup_flow_action(struct flow_action *flow_action,
                         const struct tcf_exts *exts)
 {
@@ -3546,7 +3556,7 @@ int tc_setup_flow_action(struct flow_action *flow_action,
                if (err)
                        goto err_out_locked;
 
-               entry->hw_stats = act->hw_stats;
+               entry->hw_stats = tc_act_hw_stats(act->hw_stats);
 
                if (is_tcf_gact_ok(act)) {
                        entry->id = FLOW_ACTION_ACCEPT;
@@ -3614,7 +3624,7 @@ int tc_setup_flow_action(struct flow_action *flow_action,
                                entry->mangle.mask = tcf_pedit_mask(act, k);
                                entry->mangle.val = tcf_pedit_val(act, k);
                                entry->mangle.offset = tcf_pedit_offset(act, k);
-                               entry->hw_stats = act->hw_stats;
+                               entry->hw_stats = tc_act_hw_stats(act->hw_stats);
                                entry = &flow_action->entries[++j];
                        }
                } else if (is_tcf_csum(act)) {
index a36974e..1bcf8fb 100644 (file)
@@ -323,7 +323,8 @@ static void choke_reset(struct Qdisc *sch)
 
        sch->q.qlen = 0;
        sch->qstats.backlog = 0;
-       memset(q->tab, 0, (q->tab_mask + 1) * sizeof(struct sk_buff *));
+       if (q->tab)
+               memset(q->tab, 0, (q->tab_mask + 1) * sizeof(struct sk_buff *));
        q->head = q->tail = 0;
        red_restart(&q->vars);
 }
index 968519f..436160b 100644 (file)
@@ -416,7 +416,7 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt,
                q->quantum = max(256U, nla_get_u32(tb[TCA_FQ_CODEL_QUANTUM]));
 
        if (tb[TCA_FQ_CODEL_DROP_BATCH_SIZE])
-               q->drop_batch_size = min(1U, nla_get_u32(tb[TCA_FQ_CODEL_DROP_BATCH_SIZE]));
+               q->drop_batch_size = max(1U, nla_get_u32(tb[TCA_FQ_CODEL_DROP_BATCH_SIZE]));
 
        if (tb[TCA_FQ_CODEL_MEMORY_LIMIT])
                q->memory_limit = min(1U << 31, nla_get_u32(tb[TCA_FQ_CODEL_MEMORY_LIMIT]));
index c787d4d..5a6def5 100644 (file)
@@ -637,6 +637,15 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
        if (ctl->divisor &&
            (!is_power_of_2(ctl->divisor) || ctl->divisor > 65536))
                return -EINVAL;
+
+       /* slot->allot is a short, make sure quantum is not too big. */
+       if (ctl->quantum) {
+               unsigned int scaled = SFQ_ALLOT_SIZE(ctl->quantum);
+
+               if (scaled <= 0 || scaled > SHRT_MAX)
+                       return -EINVAL;
+       }
+
        if (ctl_v1 && !red_check_params(ctl_v1->qth_min, ctl_v1->qth_max,
                                        ctl_v1->Wlog))
                return -EINVAL;
index 0fb10ab..7a5e4c4 100644 (file)
@@ -169,6 +169,9 @@ static int skbprio_change(struct Qdisc *sch, struct nlattr *opt,
 {
        struct tc_skbprio_qopt *ctl = nla_data(opt);
 
+       if (opt->nla_len != nla_attr_size(sizeof(*ctl)))
+               return -EINVAL;
+
        sch->limit = ctl->limit;
        return 0;
 }
index 25fbd8d..ac5cac0 100644 (file)
@@ -2032,7 +2032,6 @@ gss_unwrap_resp_priv(struct rpc_task *task, struct rpc_cred *cred,
        struct xdr_buf *rcv_buf = &rqstp->rq_rcv_buf;
        struct kvec *head = rqstp->rq_rcv_buf.head;
        struct rpc_auth *auth = cred->cr_auth;
-       unsigned int savedlen = rcv_buf->len;
        u32 offset, opaque_len, maj_stat;
        __be32 *p;
 
@@ -2043,9 +2042,9 @@ gss_unwrap_resp_priv(struct rpc_task *task, struct rpc_cred *cred,
        offset = (u8 *)(p) - (u8 *)head->iov_base;
        if (offset + opaque_len > rcv_buf->len)
                goto unwrap_failed;
-       rcv_buf->len = offset + opaque_len;
 
-       maj_stat = gss_unwrap(ctx->gc_gss_ctx, offset, rcv_buf);
+       maj_stat = gss_unwrap(ctx->gc_gss_ctx, offset,
+                             offset + opaque_len, rcv_buf);
        if (maj_stat == GSS_S_CONTEXT_EXPIRED)
                clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
        if (maj_stat != GSS_S_COMPLETE)
@@ -2059,10 +2058,9 @@ gss_unwrap_resp_priv(struct rpc_task *task, struct rpc_cred *cred,
         */
        xdr_init_decode(xdr, rcv_buf, p, rqstp);
 
-       auth->au_rslack = auth->au_verfsize + 2 +
-                         XDR_QUADLEN(savedlen - rcv_buf->len);
-       auth->au_ralign = auth->au_verfsize + 2 +
-                         XDR_QUADLEN(savedlen - rcv_buf->len);
+       auth->au_rslack = auth->au_verfsize + 2 + ctx->gc_gss_ctx->slack;
+       auth->au_ralign = auth->au_verfsize + 2 + ctx->gc_gss_ctx->align;
+
        return 0;
 unwrap_failed:
        trace_rpcgss_unwrap_failed(task);
index 6f2d30d..e7180da 100644 (file)
@@ -851,8 +851,8 @@ out_err:
 }
 
 u32
-gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf,
-                    u32 *headskip, u32 *tailskip)
+gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, u32 len,
+                    struct xdr_buf *buf, u32 *headskip, u32 *tailskip)
 {
        struct xdr_buf subbuf;
        u32 ret = 0;
@@ -881,7 +881,7 @@ gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf,
 
        /* create a segment skipping the header and leaving out the checksum */
        xdr_buf_subsegment(buf, &subbuf, offset + GSS_KRB5_TOK_HDR_LEN,
-                                   (buf->len - offset - GSS_KRB5_TOK_HDR_LEN -
+                                   (len - offset - GSS_KRB5_TOK_HDR_LEN -
                                     kctx->gk5e->cksumlength));
 
        nblocks = (subbuf.len + blocksize - 1) / blocksize;
@@ -926,7 +926,7 @@ gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf,
                goto out_err;
 
        /* Get the packet's hmac value */
-       ret = read_bytes_from_xdr_buf(buf, buf->len - kctx->gk5e->cksumlength,
+       ret = read_bytes_from_xdr_buf(buf, len - kctx->gk5e->cksumlength,
                                      pkt_hmac, kctx->gk5e->cksumlength);
        if (ret)
                goto out_err;
index 6c1920e..cf0fd17 100644 (file)
@@ -261,7 +261,9 @@ gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset,
 }
 
 static u32
-gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
+gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, int len,
+                      struct xdr_buf *buf, unsigned int *slack,
+                      unsigned int *align)
 {
        int                     signalg;
        int                     sealalg;
@@ -279,12 +281,13 @@ gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
        u32                     conflen = kctx->gk5e->conflen;
        int                     crypt_offset;
        u8                      *cksumkey;
+       unsigned int            saved_len = buf->len;
 
        dprintk("RPC:       gss_unwrap_kerberos\n");
 
        ptr = (u8 *)buf->head[0].iov_base + offset;
        if (g_verify_token_header(&kctx->mech_used, &bodysize, &ptr,
-                                       buf->len - offset))
+                                       len - offset))
                return GSS_S_DEFECTIVE_TOKEN;
 
        if ((ptr[0] != ((KG_TOK_WRAP_MSG >> 8) & 0xff)) ||
@@ -324,6 +327,7 @@ gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
            (!kctx->initiate && direction != 0))
                return GSS_S_BAD_SIG;
 
+       buf->len = len;
        if (kctx->enctype == ENCTYPE_ARCFOUR_HMAC) {
                struct crypto_sync_skcipher *cipher;
                int err;
@@ -376,11 +380,15 @@ gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
        data_len = (buf->head[0].iov_base + buf->head[0].iov_len) - data_start;
        memmove(orig_start, data_start, data_len);
        buf->head[0].iov_len -= (data_start - orig_start);
-       buf->len -= (data_start - orig_start);
+       buf->len = len - (data_start - orig_start);
 
        if (gss_krb5_remove_padding(buf, blocksize))
                return GSS_S_DEFECTIVE_TOKEN;
 
+       /* slack must include room for krb5 padding */
+       *slack = XDR_QUADLEN(saved_len - buf->len);
+       /* The GSS blob always precedes the RPC message payload */
+       *align = *slack;
        return GSS_S_COMPLETE;
 }
 
@@ -486,7 +494,9 @@ gss_wrap_kerberos_v2(struct krb5_ctx *kctx, u32 offset,
 }
 
 static u32
-gss_unwrap_kerberos_v2(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
+gss_unwrap_kerberos_v2(struct krb5_ctx *kctx, int offset, int len,
+                      struct xdr_buf *buf, unsigned int *slack,
+                      unsigned int *align)
 {
        time64_t        now;
        u8              *ptr;
@@ -532,7 +542,7 @@ gss_unwrap_kerberos_v2(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
        if (rrc != 0)
                rotate_left(offset + 16, buf, rrc);
 
-       err = (*kctx->gk5e->decrypt_v2)(kctx, offset, buf,
+       err = (*kctx->gk5e->decrypt_v2)(kctx, offset, len, buf,
                                        &headskip, &tailskip);
        if (err)
                return GSS_S_FAILURE;
@@ -542,7 +552,7 @@ gss_unwrap_kerberos_v2(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
         * it against the original
         */
        err = read_bytes_from_xdr_buf(buf,
-                               buf->len - GSS_KRB5_TOK_HDR_LEN - tailskip,
+                               len - GSS_KRB5_TOK_HDR_LEN - tailskip,
                                decrypted_hdr, GSS_KRB5_TOK_HDR_LEN);
        if (err) {
                dprintk("%s: error %u getting decrypted_hdr\n", __func__, err);
@@ -568,18 +578,19 @@ gss_unwrap_kerberos_v2(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
         * Note that buf->head[0].iov_len may indicate the available
         * head buffer space rather than that actually occupied.
         */
-       movelen = min_t(unsigned int, buf->head[0].iov_len, buf->len);
+       movelen = min_t(unsigned int, buf->head[0].iov_len, len);
        movelen -= offset + GSS_KRB5_TOK_HDR_LEN + headskip;
-       if (offset + GSS_KRB5_TOK_HDR_LEN + headskip + movelen >
-           buf->head[0].iov_len)
-               return GSS_S_FAILURE;
+       BUG_ON(offset + GSS_KRB5_TOK_HDR_LEN + headskip + movelen >
+                                                       buf->head[0].iov_len);
        memmove(ptr, ptr + GSS_KRB5_TOK_HDR_LEN + headskip, movelen);
        buf->head[0].iov_len -= GSS_KRB5_TOK_HDR_LEN + headskip;
-       buf->len -= GSS_KRB5_TOK_HDR_LEN + headskip;
+       buf->len = len - GSS_KRB5_TOK_HDR_LEN + headskip;
 
        /* Trim off the trailing "extra count" and checksum blob */
-       buf->len -= ec + GSS_KRB5_TOK_HDR_LEN + tailskip;
+       xdr_buf_trim(buf, ec + GSS_KRB5_TOK_HDR_LEN + tailskip);
 
+       *align = XDR_QUADLEN(GSS_KRB5_TOK_HDR_LEN + headskip);
+       *slack = *align + XDR_QUADLEN(ec + GSS_KRB5_TOK_HDR_LEN + tailskip);
        return GSS_S_COMPLETE;
 }
 
@@ -603,7 +614,8 @@ gss_wrap_kerberos(struct gss_ctx *gctx, int offset,
 }
 
 u32
-gss_unwrap_kerberos(struct gss_ctx *gctx, int offset, struct xdr_buf *buf)
+gss_unwrap_kerberos(struct gss_ctx *gctx, int offset,
+                   int len, struct xdr_buf *buf)
 {
        struct krb5_ctx *kctx = gctx->internal_ctx_id;
 
@@ -613,9 +625,11 @@ gss_unwrap_kerberos(struct gss_ctx *gctx, int offset, struct xdr_buf *buf)
        case ENCTYPE_DES_CBC_RAW:
        case ENCTYPE_DES3_CBC_RAW:
        case ENCTYPE_ARCFOUR_HMAC:
-               return gss_unwrap_kerberos_v1(kctx, offset, buf);
+               return gss_unwrap_kerberos_v1(kctx, offset, len, buf,
+                                             &gctx->slack, &gctx->align);
        case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
        case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
-               return gss_unwrap_kerberos_v2(kctx, offset, buf);
+               return gss_unwrap_kerberos_v2(kctx, offset, len, buf,
+                                             &gctx->slack, &gctx->align);
        }
 }
index db550bf..69316ab 100644 (file)
@@ -411,10 +411,11 @@ gss_wrap(struct gss_ctx   *ctx_id,
 u32
 gss_unwrap(struct gss_ctx      *ctx_id,
           int                  offset,
+          int                  len,
           struct xdr_buf       *buf)
 {
        return ctx_id->mech_type->gm_ops
-               ->gss_unwrap(ctx_id, offset, buf);
+               ->gss_unwrap(ctx_id, offset, len, buf);
 }
 
 
index 54ae5be..50d93c4 100644 (file)
@@ -906,7 +906,7 @@ unwrap_integ_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct g
        if (svc_getnl(&buf->head[0]) != seq)
                goto out;
        /* trim off the mic and padding at the end before returning */
-       buf->len -= 4 + round_up_to_quad(mic.len);
+       xdr_buf_trim(buf, round_up_to_quad(mic.len) + 4);
        stat = 0;
 out:
        kfree(mic.data);
@@ -934,7 +934,7 @@ static int
 unwrap_priv_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct gss_ctx *ctx)
 {
        u32 priv_len, maj_stat;
-       int pad, saved_len, remaining_len, offset;
+       int pad, remaining_len, offset;
 
        clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
 
@@ -954,12 +954,8 @@ unwrap_priv_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct gs
        buf->len -= pad;
        fix_priv_head(buf, pad);
 
-       /* Maybe it would be better to give gss_unwrap a length parameter: */
-       saved_len = buf->len;
-       buf->len = priv_len;
-       maj_stat = gss_unwrap(ctx, 0, buf);
+       maj_stat = gss_unwrap(ctx, 0, priv_len, buf);
        pad = priv_len - buf->len;
-       buf->len = saved_len;
        buf->len -= pad;
        /* The upper layers assume the buffer is aligned on 4-byte boundaries.
         * In the krb5p case, at least, the data ends up offset, so we need to
index 15b58c5..6f7d82f 100644 (file)
@@ -1150,6 +1150,47 @@ xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf,
 }
 EXPORT_SYMBOL_GPL(xdr_buf_subsegment);
 
+/**
+ * xdr_buf_trim - lop at most "len" bytes off the end of "buf"
+ * @buf: buf to be trimmed
+ * @len: number of bytes to reduce "buf" by
+ *
+ * Trim an xdr_buf by the given number of bytes by fixing up the lengths. Note
+ * that it's possible that we'll trim less than that amount if the xdr_buf is
+ * too small, or if (for instance) it's all in the head and the parser has
+ * already read too far into it.
+ */
+void xdr_buf_trim(struct xdr_buf *buf, unsigned int len)
+{
+       size_t cur;
+       unsigned int trim = len;
+
+       if (buf->tail[0].iov_len) {
+               cur = min_t(size_t, buf->tail[0].iov_len, trim);
+               buf->tail[0].iov_len -= cur;
+               trim -= cur;
+               if (!trim)
+                       goto fix_len;
+       }
+
+       if (buf->page_len) {
+               cur = min_t(unsigned int, buf->page_len, trim);
+               buf->page_len -= cur;
+               trim -= cur;
+               if (!trim)
+                       goto fix_len;
+       }
+
+       if (buf->head[0].iov_len) {
+               cur = min_t(size_t, buf->head[0].iov_len, trim);
+               buf->head[0].iov_len -= cur;
+               trim -= cur;
+       }
+fix_len:
+       buf->len -= (len - trim);
+}
+EXPORT_SYMBOL_GPL(xdr_buf_trim);
+
 static void __read_bytes_from_xdr_buf(struct xdr_buf *subbuf, void *obj, unsigned int len)
 {
        unsigned int this_len;
index 3a12fc1..73dbed0 100644 (file)
@@ -402,10 +402,11 @@ static int tipc_conn_rcv_from_sock(struct tipc_conn *con)
                read_lock_bh(&sk->sk_callback_lock);
                ret = tipc_conn_rcv_sub(srv, con, &s);
                read_unlock_bh(&sk->sk_callback_lock);
+               if (!ret)
+                       return 0;
        }
-       if (ret < 0)
-               tipc_conn_close(con);
 
+       tipc_conn_close(con);
        return ret;
 }
 
index c98e602..e23f94a 100644 (file)
@@ -800,6 +800,8 @@ static int bpf_exec_tx_verdict(struct sk_msg *msg, struct sock *sk,
                        *copied -= sk_msg_free(sk, msg);
                        tls_free_open_rec(sk);
                }
+               if (psock)
+                       sk_psock_put(sk, psock);
                return err;
        }
 more_data:
@@ -2081,8 +2083,9 @@ static void tls_data_ready(struct sock *sk)
        strp_data_ready(&ctx->strp);
 
        psock = sk_psock_get(sk);
-       if (psock && !list_empty(&psock->ingress_msg)) {
-               ctx->saved_data_ready(sk);
+       if (psock) {
+               if (!list_empty(&psock->ingress_msg))
+                       ctx->saved_data_ready(sk);
                sk_psock_put(sk, psock);
        }
 }
index 709038a..69efc89 100644 (file)
@@ -157,7 +157,11 @@ static struct sk_buff *virtio_transport_build_skb(void *opaque)
 
 void virtio_transport_deliver_tap_pkt(struct virtio_vsock_pkt *pkt)
 {
+       if (pkt->tap_delivered)
+               return;
+
        vsock_deliver_tap(virtio_transport_build_skb, pkt);
+       pkt->tap_delivered = true;
 }
 EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt);
 
index 8aa415a..0285aaa 100644 (file)
@@ -357,6 +357,12 @@ void x25_disconnect(struct sock *sk, int reason, unsigned char cause,
                sk->sk_state_change(sk);
                sock_set_flag(sk, SOCK_DEAD);
        }
+       if (x25->neighbour) {
+               read_lock_bh(&x25_list_lock);
+               x25_neigh_put(x25->neighbour);
+               x25->neighbour = NULL;
+               read_unlock_bh(&x25_list_lock);
+       }
 }
 
 /*
index 80b4a70..13a35f7 100644 (file)
@@ -416,7 +416,7 @@ TRACE_EVENT_FN(foo_bar_with_fn,
  * Note, TRACE_EVENT() itself is simply defined as:
  *
  * #define TRACE_EVENT(name, proto, args, tstruct, assign, printk)  \
- *  DEFINE_EVENT_CLASS(name, proto, args, tstruct, assign, printk); \
+ *  DECLARE_EVENT_CLASS(name, proto, args, tstruct, assign, printk); \
  *  DEFINE_EVENT(name, name, proto, args)
  *
  * The DEFINE_EVENT() also can be declared with conditions and reg functions:
index ba8b8d5..fbdb325 100755 (executable)
@@ -126,7 +126,7 @@ faultlinenum=$(( $(wc -l $T.oo  | cut -d" " -f1) - \
 faultline=`cat $T.dis | head -1 | cut -d":" -f2-`
 faultline=`echo "$faultline" | sed -e 's/\[/\\\[/g; s/\]/\\\]/g'`
 
-cat $T.oo | sed -e "${faultlinenum}s/^\(.*:\)\(.*\)/\1\*\2\t\t<-- trapping instruction/"
+cat $T.oo | sed -e "${faultlinenum}s/^\([^:]*:\)\(.*\)/\1\*\2\t\t<-- trapping instruction/"
 echo
 cat $T.aa
 cleanup
index 39db889..c4b9916 100644 (file)
@@ -12,7 +12,7 @@ rb_node_type = utils.CachedType("struct rb_node")
 
 def rb_first(root):
     if root.type == rb_root_type.get_type():
-        node = node.address.cast(rb_root_type.get_type().pointer())
+        node = root.address.cast(rb_root_type.get_type().pointer())
     elif root.type != rb_root_type.get_type().pointer():
         raise gdb.GdbError("Must be struct rb_root not {}".format(root.type))
 
@@ -28,7 +28,7 @@ def rb_first(root):
 
 def rb_last(root):
     if root.type == rb_root_type.get_type():
-        node = node.address.cast(rb_root_type.get_type().pointer())
+        node = root.address.cast(rb_root_type.get_type().pointer())
     elif root.type != rb_root_type.get_type().pointer():
         raise gdb.GdbError("Must be struct rb_root not {}".format(root.type))
 
index 16b9a42..0efaf45 100644 (file)
@@ -314,6 +314,7 @@ int apply_xbc(const char *path, const char *xbc_path)
        ret = delete_xbc(path);
        if (ret < 0) {
                pr_err("Failed to delete previous boot config: %d\n", ret);
+               free(data);
                return ret;
        }
 
@@ -321,24 +322,27 @@ int apply_xbc(const char *path, const char *xbc_path)
        fd = open(path, O_RDWR | O_APPEND);
        if (fd < 0) {
                pr_err("Failed to open %s: %d\n", path, fd);
+               free(data);
                return fd;
        }
        /* TODO: Ensure the @path is initramfs/initrd image */
        ret = write(fd, data, size + 8);
        if (ret < 0) {
                pr_err("Failed to apply a boot config: %d\n", ret);
-               return ret;
+               goto out;
        }
        /* Write a magic word of the bootconfig */
        ret = write(fd, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN);
        if (ret < 0) {
                pr_err("Failed to apply a boot config magic: %d\n", ret);
-               return ret;
+               goto out;
        }
+       ret = 0;
+out:
        close(fd);
        free(data);
 
-       return 0;
+       return ret;
 }
 
 int usage(void)
index 7427a5e..9d8e961 100644 (file)
@@ -159,7 +159,12 @@ class IocgStat:
         else:
             self.inflight_pct = 0
 
-        self.debt_ms = iocg.abs_vdebt.counter.value_() / VTIME_PER_USEC / 1000
+        # vdebt used to be an atomic64_t and is now u64, support both
+        try:
+            self.debt_ms = iocg.abs_vdebt.counter.value_() / VTIME_PER_USEC / 1000
+        except:
+            self.debt_ms = iocg.abs_vdebt.value_() / VTIME_PER_USEC / 1000
+
         self.use_delay = blkg.use_delay.counter.value_()
         self.delay_ms = blkg.delay_nsec.counter.value_() / 1_000_000
 
index 4b170fd..3c6da70 100644 (file)
@@ -72,6 +72,17 @@ static struct instruction *next_insn_same_func(struct objtool_file *file,
        return find_insn(file, func->cfunc->sec, func->cfunc->offset);
 }
 
+static struct instruction *prev_insn_same_sym(struct objtool_file *file,
+                                              struct instruction *insn)
+{
+       struct instruction *prev = list_prev_entry(insn, list);
+
+       if (&prev->list != &file->insn_list && prev->func == insn->func)
+               return prev;
+
+       return NULL;
+}
+
 #define func_for_each_insn(file, func, insn)                           \
        for (insn = find_insn(file, func->sec, func->offset);           \
             insn;                                                      \
@@ -1050,8 +1061,8 @@ static struct rela *find_jump_table(struct objtool_file *file,
         * it.
         */
        for (;
-            &insn->list != &file->insn_list && insn->func && insn->func->pfunc == func;
-            insn = insn->first_jump_src ?: list_prev_entry(insn, list)) {
+            insn && insn->func && insn->func->pfunc == func;
+            insn = insn->first_jump_src ?: prev_insn_same_sym(file, insn)) {
 
                if (insn != orig_insn && insn->type == INSN_JUMP_DYNAMIC)
                        break;
@@ -1449,7 +1460,7 @@ static int update_insn_state_regs(struct instruction *insn, struct insn_state *s
        struct cfi_reg *cfa = &state->cfa;
        struct stack_op *op = &insn->stack_op;
 
-       if (cfa->base != CFI_SP)
+       if (cfa->base != CFI_SP && cfa->base != CFI_SP_INDIRECT)
                return 0;
 
        /* push */
index 0b79c23..12e01ac 100644 (file)
@@ -87,9 +87,10 @@ struct elf {
 #define OFFSET_STRIDE          (1UL << OFFSET_STRIDE_BITS)
 #define OFFSET_STRIDE_MASK     (~(OFFSET_STRIDE - 1))
 
-#define for_offset_range(_offset, _start, _end)                \
-       for (_offset = ((_start) & OFFSET_STRIDE_MASK); \
-            _offset <= ((_end) & OFFSET_STRIDE_MASK);  \
+#define for_offset_range(_offset, _start, _end)                        \
+       for (_offset = ((_start) & OFFSET_STRIDE_MASK);         \
+            _offset >= ((_start) & OFFSET_STRIDE_MASK) &&      \
+            _offset <= ((_end) & OFFSET_STRIDE_MASK);          \
             _offset += OFFSET_STRIDE)
 
 static inline u32 sec_offset_hash(struct section *sec, unsigned long offset)
index 11eee0b..d979ff1 100644 (file)
@@ -3,6 +3,7 @@
 #define _GNU_SOURCE
 #include <poll.h>
 #include <unistd.h>
+#include <assert.h>
 #include <signal.h>
 #include <pthread.h>
 #include <sys/epoll.h>
@@ -3136,4 +3137,149 @@ TEST(epoll59)
        close(ctx.sfd[0]);
 }
 
+enum {
+       EPOLL60_EVENTS_NR = 10,
+};
+
+struct epoll60_ctx {
+       volatile int stopped;
+       int ready;
+       int waiters;
+       int epfd;
+       int evfd[EPOLL60_EVENTS_NR];
+};
+
+static void *epoll60_wait_thread(void *ctx_)
+{
+       struct epoll60_ctx *ctx = ctx_;
+       struct epoll_event e;
+       sigset_t sigmask;
+       uint64_t v;
+       int ret;
+
+       /* Block SIGUSR1 */
+       sigemptyset(&sigmask);
+       sigaddset(&sigmask, SIGUSR1);
+       sigprocmask(SIG_SETMASK, &sigmask, NULL);
+
+       /* Prepare empty mask for epoll_pwait() */
+       sigemptyset(&sigmask);
+
+       while (!ctx->stopped) {
+               /* Mark we are ready */
+               __atomic_fetch_add(&ctx->ready, 1, __ATOMIC_ACQUIRE);
+
+               /* Start when all are ready */
+               while (__atomic_load_n(&ctx->ready, __ATOMIC_ACQUIRE) &&
+                      !ctx->stopped);
+
+               /* Account this waiter */
+               __atomic_fetch_add(&ctx->waiters, 1, __ATOMIC_ACQUIRE);
+
+               ret = epoll_pwait(ctx->epfd, &e, 1, 2000, &sigmask);
+               if (ret != 1) {
+                       /* We expect only signal delivery on stop */
+                       assert(ret < 0 && errno == EINTR && "Lost wakeup!\n");
+                       assert(ctx->stopped);
+                       break;
+               }
+
+               ret = read(e.data.fd, &v, sizeof(v));
+               /* Since we are on ET mode, thus each thread gets its own fd. */
+               assert(ret == sizeof(v));
+
+               __atomic_fetch_sub(&ctx->waiters, 1, __ATOMIC_RELEASE);
+       }
+
+       return NULL;
+}
+
+static inline unsigned long long msecs(void)
+{
+       struct timespec ts;
+       unsigned long long msecs;
+
+       clock_gettime(CLOCK_REALTIME, &ts);
+       msecs = ts.tv_sec * 1000ull;
+       msecs += ts.tv_nsec / 1000000ull;
+
+       return msecs;
+}
+
+static inline int count_waiters(struct epoll60_ctx *ctx)
+{
+       return __atomic_load_n(&ctx->waiters, __ATOMIC_ACQUIRE);
+}
+
+TEST(epoll60)
+{
+       struct epoll60_ctx ctx = { 0 };
+       pthread_t waiters[ARRAY_SIZE(ctx.evfd)];
+       struct epoll_event e;
+       int i, n, ret;
+
+       signal(SIGUSR1, signal_handler);
+
+       ctx.epfd = epoll_create1(0);
+       ASSERT_GE(ctx.epfd, 0);
+
+       /* Create event fds */
+       for (i = 0; i < ARRAY_SIZE(ctx.evfd); i++) {
+               ctx.evfd[i] = eventfd(0, EFD_NONBLOCK);
+               ASSERT_GE(ctx.evfd[i], 0);
+
+               e.events = EPOLLIN | EPOLLET;
+               e.data.fd = ctx.evfd[i];
+               ASSERT_EQ(epoll_ctl(ctx.epfd, EPOLL_CTL_ADD, ctx.evfd[i], &e), 0);
+       }
+
+       /* Create waiter threads */
+       for (i = 0; i < ARRAY_SIZE(waiters); i++)
+               ASSERT_EQ(pthread_create(&waiters[i], NULL,
+                                        epoll60_wait_thread, &ctx), 0);
+
+       for (i = 0; i < 300; i++) {
+               uint64_t v = 1, ms;
+
+               /* Wait for all to be ready */
+               while (__atomic_load_n(&ctx.ready, __ATOMIC_ACQUIRE) !=
+                      ARRAY_SIZE(ctx.evfd))
+                       ;
+
+               /* Steady, go */
+               __atomic_fetch_sub(&ctx.ready, ARRAY_SIZE(ctx.evfd),
+                                  __ATOMIC_ACQUIRE);
+
+               /* Wait all have gone to kernel */
+               while (count_waiters(&ctx) != ARRAY_SIZE(ctx.evfd))
+                       ;
+
+               /* 1ms should be enough to schedule away */
+               usleep(1000);
+
+               /* Quickly signal all handles at once */
+               for (n = 0; n < ARRAY_SIZE(ctx.evfd); n++) {
+                       ret = write(ctx.evfd[n], &v, sizeof(v));
+                       ASSERT_EQ(ret, sizeof(v));
+               }
+
+               /* Busy loop for 1s and wait for all waiters to wake up */
+               ms = msecs();
+               while (count_waiters(&ctx) && msecs() < ms + 1000)
+                       ;
+
+               ASSERT_EQ(count_waiters(&ctx), 0);
+       }
+       ctx.stopped = 1;
+       /* Stop waiters */
+       for (i = 0; i < ARRAY_SIZE(waiters); i++)
+               ret = pthread_kill(waiters[i], SIGUSR1);
+       for (i = 0; i < ARRAY_SIZE(waiters); i++)
+               pthread_join(waiters[i], NULL);
+
+       for (i = 0; i < ARRAY_SIZE(waiters); i++)
+               close(ctx.evfd[i]);
+       close(ctx.epfd);
+}
+
 TEST_HARNESS_MAIN
index 063ecb2..a4605b5 100755 (executable)
@@ -17,6 +17,7 @@ echo "                -v|--verbose Increase verbosity of test messages"
 echo "         -vv        Alias of -v -v (Show all results in stdout)"
 echo "         -vvv       Alias of -v -v -v (Show all commands immediately)"
 echo "         --fail-unsupported Treat UNSUPPORTED as a failure"
+echo "         --fail-unresolved Treat UNRESOLVED as a failure"
 echo "         -d|--debug Debug mode (trace all shell commands)"
 echo "         -l|--logdir <dir> Save logs on the <dir>"
 echo "                     If <dir> is -, all logs output in console only"
@@ -29,8 +30,25 @@ err_ret=1
 # kselftest skip code is 4
 err_skip=4
 
+# cgroup RT scheduling prevents chrt commands from succeeding, which
+# induces failures in test wakeup tests.  Disable for the duration of
+# the tests.
+
+readonly sched_rt_runtime=/proc/sys/kernel/sched_rt_runtime_us
+
+sched_rt_runtime_orig=$(cat $sched_rt_runtime)
+
+setup() {
+  echo -1 > $sched_rt_runtime
+}
+
+cleanup() {
+  echo $sched_rt_runtime_orig > $sched_rt_runtime
+}
+
 errexit() { # message
   echo "Error: $1" 1>&2
+  cleanup
   exit $err_ret
 }
 
@@ -39,6 +57,8 @@ if [ `id -u` -ne 0 ]; then
   errexit "this must be run by root user"
 fi
 
+setup
+
 # Utilities
 absdir() { # file_path
   (cd `dirname $1`; pwd)
@@ -93,6 +113,10 @@ parse_opts() { # opts
       UNSUPPORTED_RESULT=1
       shift 1
     ;;
+    --fail-unresolved)
+      UNRESOLVED_RESULT=1
+      shift 1
+    ;;
     --logdir|-l)
       LOG_DIR=$2
       shift 2
@@ -157,6 +181,7 @@ KEEP_LOG=0
 DEBUG=0
 VERBOSE=0
 UNSUPPORTED_RESULT=0
+UNRESOLVED_RESULT=0
 STOP_FAILURE=0
 # Parse command-line options
 parse_opts $*
@@ -235,6 +260,7 @@ TOTAL_RESULT=0
 
 INSTANCE=
 CASENO=0
+
 testcase() { # testfile
   CASENO=$((CASENO+1))
   desc=`grep "^#[ \t]*description:" $1 | cut -f2 -d:`
@@ -260,7 +286,7 @@ eval_result() { # sigval
     $UNRESOLVED)
       prlog "  [${color_blue}UNRESOLVED${color_reset}]"
       UNRESOLVED_CASES="$UNRESOLVED_CASES $CASENO"
-      return 1 # this is a kind of bug.. something happened.
+      return $UNRESOLVED_RESULT # depends on use case
     ;;
     $UNTESTED)
       prlog "  [${color_blue}UNTESTED${color_reset}]"
@@ -273,7 +299,7 @@ eval_result() { # sigval
       return $UNSUPPORTED_RESULT # depends on use case
     ;;
     $XFAIL)
-      prlog "  [${color_red}XFAIL${color_reset}]"
+      prlog "  [${color_green}XFAIL${color_reset}]"
       XFAILED_CASES="$XFAILED_CASES $CASENO"
       return 0
     ;;
@@ -406,5 +432,7 @@ prlog "# of unsupported: " `echo $UNSUPPORTED_CASES | wc -w`
 prlog "# of xfailed: " `echo $XFAILED_CASES | wc -w`
 prlog "# of undefined(test bug): " `echo $UNDEFINED_CASES | wc -w`
 
+cleanup
+
 # if no error, return 0
 exit $TOTAL_RESULT
index 712a2dd..b728c0a 100644 (file)
@@ -5,8 +5,34 @@ all:
 
 top_srcdir = ../../../..
 KSFT_KHDR_INSTALL := 1
+
+# For cross-builds to work, UNAME_M has to map to ARCH and arch specific
+# directories and targets in this Makefile. "uname -m" doesn't map to
+# arch specific sub-directory names.
+#
+# UNAME_M variable to used to run the compiles pointing to the right arch
+# directories and build the right targets for these supported architectures.
+#
+# TEST_GEN_PROGS and LIBKVM are set using UNAME_M variable.
+# LINUX_TOOL_ARCH_INCLUDE is set using ARCH variable.
+#
+# x86_64 targets are named to include x86_64 as a suffix and directories
+# for includes are in x86_64 sub-directory. s390x and aarch64 follow the
+# same convention. "uname -m" doesn't result in the correct mapping for
+# s390x and aarch64.
+#
+# No change necessary for x86_64
 UNAME_M := $(shell uname -m)
 
+# Set UNAME_M for arm64 compile/install to work
+ifeq ($(ARCH),arm64)
+       UNAME_M := aarch64
+endif
+# Set UNAME_M s390x compile/install to work
+ifeq ($(ARCH),s390)
+       UNAME_M := s390x
+endif
+
 LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c lib/test_util.c
 LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c
 LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c
@@ -53,7 +79,7 @@ LIBKVM += $(LIBKVM_$(UNAME_M))
 INSTALL_HDR_PATH = $(top_srcdir)/usr
 LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/
 LINUX_TOOL_INCLUDE = $(top_srcdir)/tools/include
-LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/x86/include
+LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/$(ARCH)/include
 CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \
        -fno-stack-protector -fno-PIE -I$(LINUX_TOOL_INCLUDE) \
        -I$(LINUX_TOOL_ARCH_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude \
@@ -84,6 +110,7 @@ $(LIBKVM_OBJ): $(OUTPUT)/%.o: %.c
 $(OUTPUT)/libkvm.a: $(LIBKVM_OBJ)
        $(AR) crs $@ $^
 
+x := $(shell mkdir -p $(sort $(dir $(TEST_GEN_PROGS))))
 all: $(STATIC_LIBS)
 $(TEST_GEN_PROGS): $(STATIC_LIBS)
 
index d8f4d6b..a034438 100644 (file)
@@ -219,8 +219,8 @@ struct hv_enlightened_vmcs {
 #define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK \
                (~((1ull << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
 
-struct hv_enlightened_vmcs *current_evmcs;
-struct hv_vp_assist_page *current_vp_assist;
+extern struct hv_enlightened_vmcs *current_evmcs;
+extern struct hv_vp_assist_page *current_vp_assist;
 
 int vcpu_enable_evmcs(struct kvm_vm *vm, int vcpu_id);
 
index 6f17f69..4ae104f 100644 (file)
@@ -17,6 +17,9 @@
 
 bool enable_evmcs;
 
+struct hv_enlightened_vmcs *current_evmcs;
+struct hv_vp_assist_page *current_vp_assist;
+
 struct eptPageTableEntry {
        uint64_t readable:1;
        uint64_t writable:1;
index 35505b3..4555f88 100644 (file)
@@ -165,9 +165,10 @@ void *child_thread(void *arg)
                        socklen_t zc_len = sizeof(zc);
                        int res;
 
+                       memset(&zc, 0, sizeof(zc));
                        zc.address = (__u64)((unsigned long)addr);
                        zc.length = chunk_size;
-                       zc.recv_skip_hint = 0;
+
                        res = getsockopt(fd, IPPROTO_TCP, TCP_ZEROCOPY_RECEIVE,
                                         &zc, &zc_len);
                        if (res == -1)
@@ -281,12 +282,14 @@ static void setup_sockaddr(int domain, const char *str_addr,
 static void do_accept(int fdlisten)
 {
        pthread_attr_t attr;
+       int rcvlowat;
 
        pthread_attr_init(&attr);
        pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
 
+       rcvlowat = chunk_size;
        if (setsockopt(fdlisten, SOL_SOCKET, SO_RCVLOWAT,
-                      &chunk_size, sizeof(chunk_size)) == -1) {
+                      &rcvlowat, sizeof(rcvlowat)) == -1) {
                perror("setsockopt SO_RCVLOWAT");
        }
 
index 936e1ca..17a1f53 100755 (executable)
@@ -48,8 +48,11 @@ cleanup() {
        exec 2>/dev/null
        printf "$orig_message_cost" > /proc/sys/net/core/message_cost
        ip0 link del dev wg0
+       ip0 link del dev wg1
        ip1 link del dev wg0
+       ip1 link del dev wg1
        ip2 link del dev wg0
+       ip2 link del dev wg1
        local to_kill="$(ip netns pids $netns0) $(ip netns pids $netns1) $(ip netns pids $netns2)"
        [[ -n $to_kill ]] && kill $to_kill
        pp ip netns del $netns1
@@ -77,18 +80,20 @@ ip0 link set wg0 netns $netns2
 key1="$(pp wg genkey)"
 key2="$(pp wg genkey)"
 key3="$(pp wg genkey)"
+key4="$(pp wg genkey)"
 pub1="$(pp wg pubkey <<<"$key1")"
 pub2="$(pp wg pubkey <<<"$key2")"
 pub3="$(pp wg pubkey <<<"$key3")"
+pub4="$(pp wg pubkey <<<"$key4")"
 psk="$(pp wg genpsk)"
 [[ -n $key1 && -n $key2 && -n $psk ]]
 
 configure_peers() {
        ip1 addr add 192.168.241.1/24 dev wg0
-       ip1 addr add fd00::1/24 dev wg0
+       ip1 addr add fd00::1/112 dev wg0
 
        ip2 addr add 192.168.241.2/24 dev wg0
-       ip2 addr add fd00::2/24 dev wg0
+       ip2 addr add fd00::2/112 dev wg0
 
        n1 wg set wg0 \
                private-key <(echo "$key1") \
@@ -230,9 +235,38 @@ n1 ping -W 1 -c 1 192.168.241.2
 n1 wg set wg0 private-key <(echo "$key3")
 n2 wg set wg0 peer "$pub3" preshared-key <(echo "$psk") allowed-ips 192.168.241.1/32 peer "$pub1" remove
 n1 ping -W 1 -c 1 192.168.241.2
+n2 wg set wg0 peer "$pub3" remove
+
+# Test that we can route wg through wg
+ip1 addr flush dev wg0
+ip2 addr flush dev wg0
+ip1 addr add fd00::5:1/112 dev wg0
+ip2 addr add fd00::5:2/112 dev wg0
+n1 wg set wg0 private-key <(echo "$key1") peer "$pub2" preshared-key <(echo "$psk") allowed-ips fd00::5:2/128 endpoint 127.0.0.1:2
+n2 wg set wg0 private-key <(echo "$key2") listen-port 2 peer "$pub1" preshared-key <(echo "$psk") allowed-ips fd00::5:1/128 endpoint 127.212.121.99:9998
+ip1 link add wg1 type wireguard
+ip2 link add wg1 type wireguard
+ip1 addr add 192.168.241.1/24 dev wg1
+ip1 addr add fd00::1/112 dev wg1
+ip2 addr add 192.168.241.2/24 dev wg1
+ip2 addr add fd00::2/112 dev wg1
+ip1 link set mtu 1340 up dev wg1
+ip2 link set mtu 1340 up dev wg1
+n1 wg set wg1 listen-port 5 private-key <(echo "$key3") peer "$pub4" allowed-ips 192.168.241.2/32,fd00::2/128 endpoint [fd00::5:2]:5
+n2 wg set wg1 listen-port 5 private-key <(echo "$key4") peer "$pub3" allowed-ips 192.168.241.1/32,fd00::1/128 endpoint [fd00::5:1]:5
+tests
+# Try to set up a routing loop between the two namespaces
+ip1 link set netns $netns0 dev wg1
+ip0 addr add 192.168.241.1/24 dev wg1
+ip0 link set up dev wg1
+n0 ping -W 1 -c 1 192.168.241.2
+n1 wg set wg0 peer "$pub2" endpoint 192.168.241.2:7
+ip2 link del wg0
+ip2 link del wg1
+! n0 ping -W 1 -c 10 -f 192.168.241.2 || false # Should not crash kernel
 
+ip0 link del wg1
 ip1 link del wg0
-ip2 link del wg0
 
 # Test using NAT. We now change the topology to this:
 # ┌────────────────────────────────────────┐    ┌────────────────────────────────────────────────┐     ┌────────────────────────────────────────┐
@@ -282,6 +316,20 @@ pp sleep 3
 n2 ping -W 1 -c 1 192.168.241.1
 n1 wg set wg0 peer "$pub2" persistent-keepalive 0
 
+# Test that onion routing works, even when it loops
+n1 wg set wg0 peer "$pub3" allowed-ips 192.168.242.2/32 endpoint 192.168.241.2:5
+ip1 addr add 192.168.242.1/24 dev wg0
+ip2 link add wg1 type wireguard
+ip2 addr add 192.168.242.2/24 dev wg1
+n2 wg set wg1 private-key <(echo "$key3") listen-port 5 peer "$pub1" allowed-ips 192.168.242.1/32
+ip2 link set wg1 up
+n1 ping -W 1 -c 1 192.168.242.2
+ip2 link del wg1
+n1 wg set wg0 peer "$pub3" endpoint 192.168.242.2:5
+! n1 ping -W 1 -c 1 192.168.242.2 || false # Should not crash kernel
+n1 wg set wg0 peer "$pub3" remove
+ip1 addr del 192.168.242.1/24 dev wg0
+
 # Do a wg-quick(8)-style policy routing for the default route, making sure vethc has a v6 address to tease out bugs.
 ip1 -6 addr add fc00::9/96 dev vethc
 ip1 -6 route add default via fc00::1
index 990c510..f52f1e2 100644 (file)
@@ -10,3 +10,4 @@ CONFIG_CMDLINE_BOOL=y
 CONFIG_CMDLINE="console=hvc0 wg.success=hvc1"
 CONFIG_SECTION_MISMATCH_WARN_ONLY=y
 CONFIG_FRAME_WARN=1280
+CONFIG_THREAD_SHIFT=14
index 5909e7e..9803dbb 100644 (file)
@@ -25,7 +25,6 @@ CONFIG_KASAN=y
 CONFIG_KASAN_INLINE=y
 CONFIG_UBSAN=y
 CONFIG_UBSAN_SANITIZE_ALL=y
-CONFIG_UBSAN_NO_ALIGNMENT=y
 CONFIG_UBSAN_NULL=y
 CONFIG_DEBUG_KMEMLEAK=y
 CONFIG_DEBUG_KMEMLEAK_EARLY_LOG_SIZE=8192
index d31f267..25c0e47 100644 (file)
@@ -125,12 +125,16 @@ static void __hyp_text kvm_adjust_itstate(struct kvm_vcpu *vcpu)
  */
 void __hyp_text kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr)
 {
+       u32 pc = *vcpu_pc(vcpu);
        bool is_thumb;
 
        is_thumb = !!(*vcpu_cpsr(vcpu) & PSR_AA32_T_BIT);
        if (is_thumb && !is_wide_instr)
-               *vcpu_pc(vcpu) += 2;
+               pc += 2;
        else
-               *vcpu_pc(vcpu) += 4;
+               pc += 4;
+
+       *vcpu_pc(vcpu) = pc;
+
        kvm_adjust_itstate(vcpu);
 }
index 14a162e..ae36471 100644 (file)
@@ -186,6 +186,33 @@ static void kvm_psci_system_reset(struct kvm_vcpu *vcpu)
        kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_RESET);
 }
 
+static void kvm_psci_narrow_to_32bit(struct kvm_vcpu *vcpu)
+{
+       int i;
+
+       /*
+        * Zero the input registers' upper 32 bits. They will be fully
+        * zeroed on exit, so we're fine changing them in place.
+        */
+       for (i = 1; i < 4; i++)
+               vcpu_set_reg(vcpu, i, lower_32_bits(vcpu_get_reg(vcpu, i)));
+}
+
+static unsigned long kvm_psci_check_allowed_function(struct kvm_vcpu *vcpu, u32 fn)
+{
+       switch(fn) {
+       case PSCI_0_2_FN64_CPU_SUSPEND:
+       case PSCI_0_2_FN64_CPU_ON:
+       case PSCI_0_2_FN64_AFFINITY_INFO:
+               /* Disallow these functions for 32bit guests */
+               if (vcpu_mode_is_32bit(vcpu))
+                       return PSCI_RET_NOT_SUPPORTED;
+               break;
+       }
+
+       return 0;
+}
+
 static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
 {
        struct kvm *kvm = vcpu->kvm;
@@ -193,6 +220,10 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
        unsigned long val;
        int ret = 1;
 
+       val = kvm_psci_check_allowed_function(vcpu, psci_fn);
+       if (val)
+               goto out;
+
        switch (psci_fn) {
        case PSCI_0_2_FN_PSCI_VERSION:
                /*
@@ -210,12 +241,16 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
                val = PSCI_RET_SUCCESS;
                break;
        case PSCI_0_2_FN_CPU_ON:
+               kvm_psci_narrow_to_32bit(vcpu);
+               fallthrough;
        case PSCI_0_2_FN64_CPU_ON:
                mutex_lock(&kvm->lock);
                val = kvm_psci_vcpu_on(vcpu);
                mutex_unlock(&kvm->lock);
                break;
        case PSCI_0_2_FN_AFFINITY_INFO:
+               kvm_psci_narrow_to_32bit(vcpu);
+               fallthrough;
        case PSCI_0_2_FN64_AFFINITY_INFO:
                val = kvm_psci_vcpu_affinity_info(vcpu);
                break;
@@ -256,6 +291,7 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
                break;
        }
 
+out:
        smccc_set_retval(vcpu, val, 0, 0, 0);
        return ret;
 }
@@ -273,6 +309,10 @@ static int kvm_psci_1_0_call(struct kvm_vcpu *vcpu)
                break;
        case PSCI_1_0_FN_PSCI_FEATURES:
                feature = smccc_get_arg1(vcpu);
+               val = kvm_psci_check_allowed_function(vcpu, feature);
+               if (val)
+                       break;
+
                switch(feature) {
                case PSCI_0_2_FN_PSCI_VERSION:
                case PSCI_0_2_FN_CPU_SUSPEND:
index a963b9d..32e32d6 100644 (file)
@@ -294,8 +294,15 @@ int vgic_init(struct kvm *kvm)
                }
        }
 
-       if (vgic_has_its(kvm)) {
+       if (vgic_has_its(kvm))
                vgic_lpi_translation_cache_init(kvm);
+
+       /*
+        * If we have GICv4.1 enabled, unconditionnaly request enable the
+        * v4 support so that we get HW-accelerated vSGIs. Otherwise, only
+        * enable it if we present a virtual ITS to the guest.
+        */
+       if (vgic_supports_direct_msis(kvm)) {
                ret = vgic_v4_init(kvm);
                if (ret)
                        goto out;
@@ -348,6 +355,12 @@ void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
 {
        struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 
+       /*
+        * Retire all pending LPIs on this vcpu anyway as we're
+        * going to destroy it.
+        */
+       vgic_flush_pending_lpis(vcpu);
+
        INIT_LIST_HEAD(&vgic_cpu->ap_list_head);
 }
 
@@ -359,10 +372,10 @@ static void __kvm_vgic_destroy(struct kvm *kvm)
 
        vgic_debug_destroy(kvm);
 
-       kvm_vgic_dist_destroy(kvm);
-
        kvm_for_each_vcpu(i, vcpu, kvm)
                kvm_vgic_vcpu_destroy(vcpu);
+
+       kvm_vgic_dist_destroy(kvm);
 }
 
 void kvm_vgic_destroy(struct kvm *kvm)
index d53d34a..c012a52 100644 (file)
@@ -96,14 +96,21 @@ out_unlock:
         * We "cache" the configuration table entries in our struct vgic_irq's.
         * However we only have those structs for mapped IRQs, so we read in
         * the respective config data from memory here upon mapping the LPI.
+        *
+        * Should any of these fail, behave as if we couldn't create the LPI
+        * by dropping the refcount and returning the error.
         */
        ret = update_lpi_config(kvm, irq, NULL, false);
-       if (ret)
+       if (ret) {
+               vgic_put_irq(kvm, irq);
                return ERR_PTR(ret);
+       }
 
        ret = vgic_v3_lpi_sync_pending_status(kvm, irq);
-       if (ret)
+       if (ret) {
+               vgic_put_irq(kvm, irq);
                return ERR_PTR(ret);
+       }
 
        return irq;
 }
index 5945f06..a016f07 100644 (file)
@@ -409,24 +409,28 @@ static const struct vgic_register_region vgic_v2_dist_registers[] = {
                NULL, vgic_mmio_uaccess_write_v2_group, 1,
                VGIC_ACCESS_32bit),
        REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ENABLE_SET,
-               vgic_mmio_read_enable, vgic_mmio_write_senable, NULL, NULL, 1,
+               vgic_mmio_read_enable, vgic_mmio_write_senable,
+               NULL, vgic_uaccess_write_senable, 1,
                VGIC_ACCESS_32bit),
        REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ENABLE_CLEAR,
-               vgic_mmio_read_enable, vgic_mmio_write_cenable, NULL, NULL, 1,
+               vgic_mmio_read_enable, vgic_mmio_write_cenable,
+               NULL, vgic_uaccess_write_cenable, 1,
                VGIC_ACCESS_32bit),
        REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PENDING_SET,
-               vgic_mmio_read_pending, vgic_mmio_write_spending, NULL, NULL, 1,
+               vgic_mmio_read_pending, vgic_mmio_write_spending,
+               NULL, vgic_uaccess_write_spending, 1,
                VGIC_ACCESS_32bit),
        REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PENDING_CLEAR,
-               vgic_mmio_read_pending, vgic_mmio_write_cpending, NULL, NULL, 1,
+               vgic_mmio_read_pending, vgic_mmio_write_cpending,
+               NULL, vgic_uaccess_write_cpending, 1,
                VGIC_ACCESS_32bit),
        REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ACTIVE_SET,
                vgic_mmio_read_active, vgic_mmio_write_sactive,
-               NULL, vgic_mmio_uaccess_write_sactive, 1,
+               vgic_uaccess_read_active, vgic_mmio_uaccess_write_sactive, 1,
                VGIC_ACCESS_32bit),
        REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ACTIVE_CLEAR,
                vgic_mmio_read_active, vgic_mmio_write_cactive,
-               NULL, vgic_mmio_uaccess_write_cactive, 1,
+               vgic_uaccess_read_active, vgic_mmio_uaccess_write_cactive, 1,
                VGIC_ACCESS_32bit),
        REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PRI,
                vgic_mmio_read_priority, vgic_mmio_write_priority, NULL, NULL,
index e72dcc4..89a14ec 100644 (file)
@@ -50,7 +50,8 @@ bool vgic_has_its(struct kvm *kvm)
 
 bool vgic_supports_direct_msis(struct kvm *kvm)
 {
-       return kvm_vgic_global_state.has_gicv4 && vgic_has_its(kvm);
+       return (kvm_vgic_global_state.has_gicv4_1 ||
+               (kvm_vgic_global_state.has_gicv4 && vgic_has_its(kvm)));
 }
 
 /*
@@ -538,10 +539,12 @@ static const struct vgic_register_region vgic_v3_dist_registers[] = {
                vgic_mmio_read_group, vgic_mmio_write_group, NULL, NULL, 1,
                VGIC_ACCESS_32bit),
        REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISENABLER,
-               vgic_mmio_read_enable, vgic_mmio_write_senable, NULL, NULL, 1,
+               vgic_mmio_read_enable, vgic_mmio_write_senable,
+               NULL, vgic_uaccess_write_senable, 1,
                VGIC_ACCESS_32bit),
        REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICENABLER,
-               vgic_mmio_read_enable, vgic_mmio_write_cenable, NULL, NULL, 1,
+               vgic_mmio_read_enable, vgic_mmio_write_cenable,
+              NULL, vgic_uaccess_write_cenable, 1,
                VGIC_ACCESS_32bit),
        REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISPENDR,
                vgic_mmio_read_pending, vgic_mmio_write_spending,
@@ -553,11 +556,11 @@ static const struct vgic_register_region vgic_v3_dist_registers[] = {
                VGIC_ACCESS_32bit),
        REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISACTIVER,
                vgic_mmio_read_active, vgic_mmio_write_sactive,
-               NULL, vgic_mmio_uaccess_write_sactive, 1,
+               vgic_uaccess_read_active, vgic_mmio_uaccess_write_sactive, 1,
                VGIC_ACCESS_32bit),
        REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICACTIVER,
                vgic_mmio_read_active, vgic_mmio_write_cactive,
-               NULL, vgic_mmio_uaccess_write_cactive,
+               vgic_uaccess_read_active, vgic_mmio_uaccess_write_cactive,
                1, VGIC_ACCESS_32bit),
        REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IPRIORITYR,
                vgic_mmio_read_priority, vgic_mmio_write_priority, NULL, NULL,
@@ -609,11 +612,13 @@ static const struct vgic_register_region vgic_v3_rd_registers[] = {
        REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_IGROUPR0,
                vgic_mmio_read_group, vgic_mmio_write_group, 4,
                VGIC_ACCESS_32bit),
-       REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_ISENABLER0,
-               vgic_mmio_read_enable, vgic_mmio_write_senable, 4,
+       REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ISENABLER0,
+               vgic_mmio_read_enable, vgic_mmio_write_senable,
+               NULL, vgic_uaccess_write_senable, 4,
                VGIC_ACCESS_32bit),
-       REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_ICENABLER0,
-               vgic_mmio_read_enable, vgic_mmio_write_cenable, 4,
+       REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ICENABLER0,
+               vgic_mmio_read_enable, vgic_mmio_write_cenable,
+               NULL, vgic_uaccess_write_cenable, 4,
                VGIC_ACCESS_32bit),
        REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ISPENDR0,
                vgic_mmio_read_pending, vgic_mmio_write_spending,
@@ -625,12 +630,12 @@ static const struct vgic_register_region vgic_v3_rd_registers[] = {
                VGIC_ACCESS_32bit),
        REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ISACTIVER0,
                vgic_mmio_read_active, vgic_mmio_write_sactive,
-               NULL, vgic_mmio_uaccess_write_sactive,
-               4, VGIC_ACCESS_32bit),
+               vgic_uaccess_read_active, vgic_mmio_uaccess_write_sactive, 4,
+               VGIC_ACCESS_32bit),
        REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ICACTIVER0,
                vgic_mmio_read_active, vgic_mmio_write_cactive,
-               NULL, vgic_mmio_uaccess_write_cactive,
-               4, VGIC_ACCESS_32bit),
+               vgic_uaccess_read_active, vgic_mmio_uaccess_write_cactive, 4,
+               VGIC_ACCESS_32bit),
        REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_IPRIORITYR0,
                vgic_mmio_read_priority, vgic_mmio_write_priority, 32,
                VGIC_ACCESS_32bit | VGIC_ACCESS_8bit),
index 2199302..b2d73fc 100644 (file)
@@ -184,6 +184,48 @@ void vgic_mmio_write_cenable(struct kvm_vcpu *vcpu,
        }
 }
 
+int vgic_uaccess_write_senable(struct kvm_vcpu *vcpu,
+                              gpa_t addr, unsigned int len,
+                              unsigned long val)
+{
+       u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
+       int i;
+       unsigned long flags;
+
+       for_each_set_bit(i, &val, len * 8) {
+               struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
+
+               raw_spin_lock_irqsave(&irq->irq_lock, flags);
+               irq->enabled = true;
+               vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
+
+               vgic_put_irq(vcpu->kvm, irq);
+       }
+
+       return 0;
+}
+
+int vgic_uaccess_write_cenable(struct kvm_vcpu *vcpu,
+                              gpa_t addr, unsigned int len,
+                              unsigned long val)
+{
+       u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
+       int i;
+       unsigned long flags;
+
+       for_each_set_bit(i, &val, len * 8) {
+               struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
+
+               raw_spin_lock_irqsave(&irq->irq_lock, flags);
+               irq->enabled = false;
+               raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
+
+               vgic_put_irq(vcpu->kvm, irq);
+       }
+
+       return 0;
+}
+
 unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
                                     gpa_t addr, unsigned int len)
 {
@@ -219,17 +261,6 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
        return value;
 }
 
-/* Must be called with irq->irq_lock held */
-static void vgic_hw_irq_spending(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
-                                bool is_uaccess)
-{
-       if (is_uaccess)
-               return;
-
-       irq->pending_latch = true;
-       vgic_irq_set_phys_active(irq, true);
-}
-
 static bool is_vgic_v2_sgi(struct kvm_vcpu *vcpu, struct vgic_irq *irq)
 {
        return (vgic_irq_is_sgi(irq->intid) &&
@@ -240,7 +271,6 @@ void vgic_mmio_write_spending(struct kvm_vcpu *vcpu,
                              gpa_t addr, unsigned int len,
                              unsigned long val)
 {
-       bool is_uaccess = !kvm_get_running_vcpu();
        u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
        int i;
        unsigned long flags;
@@ -270,22 +300,48 @@ void vgic_mmio_write_spending(struct kvm_vcpu *vcpu,
                        continue;
                }
 
+               irq->pending_latch = true;
                if (irq->hw)
-                       vgic_hw_irq_spending(vcpu, irq, is_uaccess);
-               else
-                       irq->pending_latch = true;
+                       vgic_irq_set_phys_active(irq, true);
+
                vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
                vgic_put_irq(vcpu->kvm, irq);
        }
 }
 
-/* Must be called with irq->irq_lock held */
-static void vgic_hw_irq_cpending(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
-                                bool is_uaccess)
+int vgic_uaccess_write_spending(struct kvm_vcpu *vcpu,
+                               gpa_t addr, unsigned int len,
+                               unsigned long val)
 {
-       if (is_uaccess)
-               return;
+       u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
+       int i;
+       unsigned long flags;
+
+       for_each_set_bit(i, &val, len * 8) {
+               struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
+
+               raw_spin_lock_irqsave(&irq->irq_lock, flags);
+               irq->pending_latch = true;
 
+               /*
+                * GICv2 SGIs are terribly broken. We can't restore
+                * the source of the interrupt, so just pick the vcpu
+                * itself as the source...
+                */
+               if (is_vgic_v2_sgi(vcpu, irq))
+                       irq->source |= BIT(vcpu->vcpu_id);
+
+               vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
+
+               vgic_put_irq(vcpu->kvm, irq);
+       }
+
+       return 0;
+}
+
+/* Must be called with irq->irq_lock held */
+static void vgic_hw_irq_cpending(struct kvm_vcpu *vcpu, struct vgic_irq *irq)
+{
        irq->pending_latch = false;
 
        /*
@@ -308,7 +364,6 @@ void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu,
                              gpa_t addr, unsigned int len,
                              unsigned long val)
 {
-       bool is_uaccess = !kvm_get_running_vcpu();
        u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
        int i;
        unsigned long flags;
@@ -339,7 +394,7 @@ void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu,
                }
 
                if (irq->hw)
-                       vgic_hw_irq_cpending(vcpu, irq, is_uaccess);
+                       vgic_hw_irq_cpending(vcpu, irq);
                else
                        irq->pending_latch = false;
 
@@ -348,8 +403,68 @@ void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu,
        }
 }
 
-unsigned long vgic_mmio_read_active(struct kvm_vcpu *vcpu,
-                                   gpa_t addr, unsigned int len)
+int vgic_uaccess_write_cpending(struct kvm_vcpu *vcpu,
+                               gpa_t addr, unsigned int len,
+                               unsigned long val)
+{
+       u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
+       int i;
+       unsigned long flags;
+
+       for_each_set_bit(i, &val, len * 8) {
+               struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
+
+               raw_spin_lock_irqsave(&irq->irq_lock, flags);
+               /*
+                * More fun with GICv2 SGIs! If we're clearing one of them
+                * from userspace, which source vcpu to clear? Let's not
+                * even think of it, and blow the whole set.
+                */
+               if (is_vgic_v2_sgi(vcpu, irq))
+                       irq->source = 0;
+
+               irq->pending_latch = false;
+
+               raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
+
+               vgic_put_irq(vcpu->kvm, irq);
+       }
+
+       return 0;
+}
+
+/*
+ * If we are fiddling with an IRQ's active state, we have to make sure the IRQ
+ * is not queued on some running VCPU's LRs, because then the change to the
+ * active state can be overwritten when the VCPU's state is synced coming back
+ * from the guest.
+ *
+ * For shared interrupts as well as GICv3 private interrupts, we have to
+ * stop all the VCPUs because interrupts can be migrated while we don't hold
+ * the IRQ locks and we don't want to be chasing moving targets.
+ *
+ * For GICv2 private interrupts we don't have to do anything because
+ * userspace accesses to the VGIC state already require all VCPUs to be
+ * stopped, and only the VCPU itself can modify its private interrupts
+ * active state, which guarantees that the VCPU is not running.
+ */
+static void vgic_access_active_prepare(struct kvm_vcpu *vcpu, u32 intid)
+{
+       if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3 ||
+           intid >= VGIC_NR_PRIVATE_IRQS)
+               kvm_arm_halt_guest(vcpu->kvm);
+}
+
+/* See vgic_access_active_prepare */
+static void vgic_access_active_finish(struct kvm_vcpu *vcpu, u32 intid)
+{
+       if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3 ||
+           intid >= VGIC_NR_PRIVATE_IRQS)
+               kvm_arm_resume_guest(vcpu->kvm);
+}
+
+static unsigned long __vgic_mmio_read_active(struct kvm_vcpu *vcpu,
+                                            gpa_t addr, unsigned int len)
 {
        u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
        u32 value = 0;
@@ -359,6 +474,10 @@ unsigned long vgic_mmio_read_active(struct kvm_vcpu *vcpu,
        for (i = 0; i < len * 8; i++) {
                struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
 
+               /*
+                * Even for HW interrupts, don't evaluate the HW state as
+                * all the guest is interested in is the virtual state.
+                */
                if (irq->active)
                        value |= (1U << i);
 
@@ -368,6 +487,29 @@ unsigned long vgic_mmio_read_active(struct kvm_vcpu *vcpu,
        return value;
 }
 
+unsigned long vgic_mmio_read_active(struct kvm_vcpu *vcpu,
+                                   gpa_t addr, unsigned int len)
+{
+       u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
+       u32 val;
+
+       mutex_lock(&vcpu->kvm->lock);
+       vgic_access_active_prepare(vcpu, intid);
+
+       val = __vgic_mmio_read_active(vcpu, addr, len);
+
+       vgic_access_active_finish(vcpu, intid);
+       mutex_unlock(&vcpu->kvm->lock);
+
+       return val;
+}
+
+unsigned long vgic_uaccess_read_active(struct kvm_vcpu *vcpu,
+                                   gpa_t addr, unsigned int len)
+{
+       return __vgic_mmio_read_active(vcpu, addr, len);
+}
+
 /* Must be called with irq->irq_lock held */
 static void vgic_hw_irq_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
                                      bool active, bool is_uaccess)
@@ -426,36 +568,6 @@ static void vgic_mmio_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
                raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
 }
 
-/*
- * If we are fiddling with an IRQ's active state, we have to make sure the IRQ
- * is not queued on some running VCPU's LRs, because then the change to the
- * active state can be overwritten when the VCPU's state is synced coming back
- * from the guest.
- *
- * For shared interrupts, we have to stop all the VCPUs because interrupts can
- * be migrated while we don't hold the IRQ locks and we don't want to be
- * chasing moving targets.
- *
- * For private interrupts we don't have to do anything because userspace
- * accesses to the VGIC state already require all VCPUs to be stopped, and
- * only the VCPU itself can modify its private interrupts active state, which
- * guarantees that the VCPU is not running.
- */
-static void vgic_change_active_prepare(struct kvm_vcpu *vcpu, u32 intid)
-{
-       if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3 ||
-           intid > VGIC_NR_PRIVATE_IRQS)
-               kvm_arm_halt_guest(vcpu->kvm);
-}
-
-/* See vgic_change_active_prepare */
-static void vgic_change_active_finish(struct kvm_vcpu *vcpu, u32 intid)
-{
-       if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3 ||
-           intid > VGIC_NR_PRIVATE_IRQS)
-               kvm_arm_resume_guest(vcpu->kvm);
-}
-
 static void __vgic_mmio_write_cactive(struct kvm_vcpu *vcpu,
                                      gpa_t addr, unsigned int len,
                                      unsigned long val)
@@ -477,11 +589,11 @@ void vgic_mmio_write_cactive(struct kvm_vcpu *vcpu,
        u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
 
        mutex_lock(&vcpu->kvm->lock);
-       vgic_change_active_prepare(vcpu, intid);
+       vgic_access_active_prepare(vcpu, intid);
 
        __vgic_mmio_write_cactive(vcpu, addr, len, val);
 
-       vgic_change_active_finish(vcpu, intid);
+       vgic_access_active_finish(vcpu, intid);
        mutex_unlock(&vcpu->kvm->lock);
 }
 
@@ -514,11 +626,11 @@ void vgic_mmio_write_sactive(struct kvm_vcpu *vcpu,
        u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
 
        mutex_lock(&vcpu->kvm->lock);
-       vgic_change_active_prepare(vcpu, intid);
+       vgic_access_active_prepare(vcpu, intid);
 
        __vgic_mmio_write_sactive(vcpu, addr, len, val);
 
-       vgic_change_active_finish(vcpu, intid);
+       vgic_access_active_finish(vcpu, intid);
        mutex_unlock(&vcpu->kvm->lock);
 }
 
index 5af2aef..fefcca2 100644 (file)
@@ -138,6 +138,14 @@ void vgic_mmio_write_cenable(struct kvm_vcpu *vcpu,
                             gpa_t addr, unsigned int len,
                             unsigned long val);
 
+int vgic_uaccess_write_senable(struct kvm_vcpu *vcpu,
+                              gpa_t addr, unsigned int len,
+                              unsigned long val);
+
+int vgic_uaccess_write_cenable(struct kvm_vcpu *vcpu,
+                              gpa_t addr, unsigned int len,
+                              unsigned long val);
+
 unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
                                     gpa_t addr, unsigned int len);
 
@@ -149,9 +157,20 @@ void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu,
                              gpa_t addr, unsigned int len,
                              unsigned long val);
 
+int vgic_uaccess_write_spending(struct kvm_vcpu *vcpu,
+                               gpa_t addr, unsigned int len,
+                               unsigned long val);
+
+int vgic_uaccess_write_cpending(struct kvm_vcpu *vcpu,
+                               gpa_t addr, unsigned int len,
+                               unsigned long val);
+
 unsigned long vgic_mmio_read_active(struct kvm_vcpu *vcpu,
                                    gpa_t addr, unsigned int len);
 
+unsigned long vgic_uaccess_read_active(struct kvm_vcpu *vcpu,
+                                   gpa_t addr, unsigned int len);
+
 void vgic_mmio_write_cactive(struct kvm_vcpu *vcpu,
                             gpa_t addr, unsigned int len,
                             unsigned long val);