Merge tag 'omap-for-v6.8/defconfig-signed' of git://git.kernel.org/pub/scm/linux...
authorArnd Bergmann <arnd@arndb.de>
Fri, 22 Dec 2023 11:39:14 +0000 (11:39 +0000)
committerArnd Bergmann <arnd@arndb.de>
Fri, 22 Dec 2023 11:39:14 +0000 (11:39 +0000)
Defconfig changes for omaps

Defconfig changes to enable I2C devices for bt200 as loadable modules
for the PMIC, sensors and LEDs.

* tag 'omap-for-v6.8/defconfig-signed' of git://git.kernel.org/pub/scm/linux/kernel/git/tmlind/linux-omap:
  ARM: omap2plus_defconfig: enable I2C devcies of bt200

Link: https://lore.kernel.org/r/pull-1702037869-295608@atomide.com-2
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
494 files changed:
Documentation/arch/loongarch/introduction.rst
Documentation/arch/x86/boot.rst
Documentation/devicetree/bindings/net/ethernet-controller.yaml
Documentation/devicetree/bindings/ufs/qcom,ufs.yaml
Documentation/devicetree/bindings/usb/microchip,usb5744.yaml
Documentation/devicetree/bindings/usb/qcom,dwc3.yaml
Documentation/devicetree/bindings/usb/usb-hcd.yaml
Documentation/filesystems/erofs.rst
Documentation/process/maintainer-netdev.rst
Documentation/translations/zh_CN/arch/loongarch/introduction.rst
MAINTAINERS
Makefile
arch/arm/configs/multi_v7_defconfig
arch/arm/configs/shmobile_defconfig
arch/arm/xen/enlighten.c
arch/arm64/Makefile
arch/arm64/configs/defconfig
arch/arm64/include/asm/setup.h
arch/arm64/mm/pageattr.c
arch/loongarch/Makefile
arch/loongarch/include/asm/asmmacro.h
arch/loongarch/include/asm/percpu.h
arch/loongarch/include/asm/setup.h
arch/loongarch/kernel/relocate.c
arch/loongarch/kernel/time.c
arch/loongarch/mm/pgtable.c
arch/parisc/Kconfig
arch/parisc/include/asm/alternative.h
arch/parisc/include/asm/assembly.h
arch/parisc/include/asm/bug.h
arch/parisc/include/asm/elf.h
arch/parisc/include/asm/jump_label.h
arch/parisc/include/asm/ldcw.h
arch/parisc/include/asm/processor.h
arch/parisc/include/asm/uaccess.h
arch/parisc/include/uapi/asm/errno.h
arch/parisc/kernel/processor.c
arch/parisc/kernel/sys_parisc.c
arch/parisc/kernel/vmlinux.lds.S
arch/s390/include/asm/processor.h
arch/s390/kernel/ipl.c
arch/s390/kernel/perf_pai_crypto.c
arch/s390/kernel/perf_pai_ext.c
arch/x86/events/intel/core.c
arch/x86/hyperv/hv_init.c
arch/x86/include/asm/acpi.h
arch/x86/include/asm/xen/hypervisor.h
arch/x86/kernel/acpi/boot.c
arch/x86/kernel/cpu/microcode/amd.c
arch/x86/kernel/cpu/microcode/core.c
arch/x86/kernel/cpu/microcode/intel.c
arch/x86/kernel/cpu/microcode/internal.h
arch/x86/kernel/cpu/mshyperv.c
arch/x86/kernel/signal_64.c
block/bdev.c
block/blk-cgroup.c
block/blk-cgroup.h
block/blk-mq.c
block/blk-pm.c
block/blk-throttle.c
drivers/accel/ivpu/ivpu_hw_37xx.c
drivers/accel/ivpu/ivpu_pm.c
drivers/acpi/acpi_video.c
drivers/acpi/device_pm.c
drivers/acpi/processor_idle.c
drivers/acpi/resource.c
drivers/ata/pata_isapnp.c
drivers/block/nbd.c
drivers/block/null_blk/main.c
drivers/dpll/dpll_netlink.c
drivers/firmware/Kconfig
drivers/firmware/qemu_fw_cfg.c
drivers/gpu/drm/amd/amdgpu/amdgpu.h
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
drivers/gpu/drm/amd/display/dc/core/dc.c
drivers/gpu/drm/amd/display/dc/core/dc_resource.c
drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
drivers/gpu/drm/amd/display/dc/dc_types.h
drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dio_stream_encoder.c
drivers/gpu/drm/amd/display/dc/link/link_detection.c
drivers/gpu/drm/amd/display/dmub/dmub_srv.h
drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h
drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
drivers/gpu/drm/ast/ast_drv.h
drivers/gpu/drm/ast/ast_mode.c
drivers/gpu/drm/ci/xfails/requirements.txt
drivers/gpu/drm/drm_panel_orientation_quirks.c
drivers/gpu/drm/i915/display/intel_dp_mst.c
drivers/gpu/drm/i915/gt/intel_gt.c
drivers/gpu/drm/i915/i915_driver.c
drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_8_0_sc8280xp.h
drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c
drivers/gpu/drm/msm/dp/dp_display.c
drivers/gpu/drm/msm/dp/dp_drm.c
drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c
drivers/gpu/drm/msm/msm_drv.c
drivers/gpu/drm/nouveau/include/nvkm/core/event.h
drivers/gpu/drm/nouveau/nouveau_display.c
drivers/gpu/drm/nouveau/nvkm/core/event.c
drivers/gpu/drm/nouveau/nvkm/engine/fifo/r535.c
drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c
drivers/gpu/drm/panel/panel-simple.c
drivers/gpu/drm/rockchip/rockchip_drm_vop.c
drivers/hid/hid-apple.c
drivers/hid/hid-asus.c
drivers/hid/hid-core.c
drivers/hid/hid-debug.c
drivers/hid/hid-glorious.c
drivers/hid/hid-ids.h
drivers/hid/hid-logitech-dj.c
drivers/hid/hid-mcp2221.c
drivers/hid/hid-multitouch.c
drivers/hid/hid-quirks.c
drivers/i2c/busses/i2c-designware-common.c
drivers/i2c/busses/i2c-ocores.c
drivers/i2c/busses/i2c-pxa.c
drivers/irqchip/irq-gic-v3-its.c
drivers/md/bcache/bcache.h
drivers/md/bcache/btree.c
drivers/md/bcache/super.c
drivers/md/bcache/sysfs.c
drivers/md/bcache/writeback.c
drivers/md/dm-bufio.c
drivers/md/dm-crypt.c
drivers/md/dm-delay.c
drivers/md/dm-verity-fec.c
drivers/md/dm-verity-target.c
drivers/md/dm-verity.h
drivers/md/md.c
drivers/net/bonding/bond_main.c
drivers/net/ethernet/amd/pds_core/adminq.c
drivers/net/ethernet/amd/pds_core/core.h
drivers/net/ethernet/amd/pds_core/dev.c
drivers/net/ethernet/amd/pds_core/devlink.c
drivers/net/ethernet/amd/xgbe/xgbe-drv.c
drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c
drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
drivers/net/ethernet/broadcom/tg3.c
drivers/net/ethernet/broadcom/tg3.h
drivers/net/ethernet/cortina/gemini.c
drivers/net/ethernet/cortina/gemini.h
drivers/net/ethernet/google/gve/gve_main.c
drivers/net/ethernet/google/gve/gve_rx.c
drivers/net/ethernet/google/gve/gve_tx.c
drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c
drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
drivers/net/ethernet/intel/ice/ice_ddp.c
drivers/net/ethernet/intel/ice/ice_dpll.c
drivers/net/ethernet/intel/ice/ice_dpll.h
drivers/net/ethernet/intel/ice/ice_main.c
drivers/net/ethernet/intel/ice/ice_ptp.c
drivers/net/ethernet/intel/ice/ice_ptp.h
drivers/net/ethernet/intel/ice/ice_ptp_hw.c
drivers/net/ethernet/intel/ice/ice_ptp_hw.h
drivers/net/ethernet/intel/ice/ice_txrx.c
drivers/net/ethernet/intel/ice/ice_txrx.h
drivers/net/ethernet/marvell/mvneta.c
drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c
drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
drivers/net/ethernet/mellanox/mlx5/core/eq.c
drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c
drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h
drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
drivers/net/ethernet/realtek/r8169_main.c
drivers/net/ethernet/stmicro/stmmac/Kconfig
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
drivers/net/ethernet/ti/icssg/icssg_prueth.c
drivers/net/ethernet/wangxun/libwx/wx_hw.c
drivers/net/ethernet/wangxun/ngbe/ngbe_main.c
drivers/net/ethernet/wangxun/txgbe/txgbe_main.c
drivers/net/ethernet/xilinx/xilinx_axienet_main.c
drivers/net/hyperv/netvsc_drv.c
drivers/net/ipa/reg/gsi_reg-v5.0.c
drivers/net/ipvlan/ipvlan_core.c
drivers/net/macvlan.c
drivers/net/netkit.c
drivers/net/ppp/ppp_synctty.c
drivers/net/usb/aqc111.c
drivers/net/usb/ax88179_178a.c
drivers/net/usb/qmi_wwan.c
drivers/net/veth.c
drivers/net/vrf.c
drivers/net/wireguard/device.c
drivers/net/wireguard/receive.c
drivers/net/wireguard/send.c
drivers/nfc/virtual_ncidev.c
drivers/nvme/host/auth.c
drivers/nvme/host/core.c
drivers/nvme/host/fabrics.c
drivers/nvme/host/fc.c
drivers/nvme/host/rdma.c
drivers/nvme/host/tcp.c
drivers/nvme/target/Kconfig
drivers/nvme/target/configfs.c
drivers/nvme/target/fabrics-cmd.c
drivers/nvme/target/tcp.c
drivers/parisc/power.c
drivers/phy/Kconfig
drivers/phy/Makefile
drivers/phy/realtek/Kconfig [deleted file]
drivers/phy/realtek/Makefile [deleted file]
drivers/phy/realtek/phy-rtk-usb2.c [deleted file]
drivers/phy/realtek/phy-rtk-usb3.c [deleted file]
drivers/platform/x86/amd/pmc/pmc.c
drivers/platform/x86/hp/hp-bioscfg/bioscfg.c
drivers/platform/x86/ideapad-laptop.c
drivers/platform/x86/intel/telemetry/core.c
drivers/ptp/ptp_chardev.c
drivers/ptp/ptp_clock.c
drivers/ptp/ptp_private.h
drivers/ptp/ptp_sysfs.c
drivers/s390/block/dasd.c
drivers/s390/block/dasd_int.h
drivers/s390/net/Kconfig
drivers/s390/net/ism_drv.c
drivers/scsi/qla2xxx/qla_os.c
drivers/scsi/scsi_debug.c
drivers/scsi/sd.c
drivers/thunderbolt/switch.c
drivers/thunderbolt/tb.c
drivers/ufs/core/ufs-mcq.c
drivers/usb/cdns3/cdnsp-ring.c
drivers/usb/core/config.c
drivers/usb/core/hub.c
drivers/usb/dwc2/hcd_intr.c
drivers/usb/dwc3/core.c
drivers/usb/dwc3/drd.c
drivers/usb/dwc3/dwc3-qcom.c
drivers/usb/dwc3/dwc3-rtk.c
drivers/usb/host/xhci-mtk-sch.c
drivers/usb/host/xhci-mtk.h
drivers/usb/host/xhci-plat.c
drivers/usb/misc/onboard_usb_hub.c
drivers/usb/misc/onboard_usb_hub.h
drivers/usb/misc/usb-ljca.c
drivers/usb/serial/option.c
drivers/usb/typec/tcpm/tcpm.c
drivers/usb/typec/tipd/core.c
drivers/vdpa/vdpa_sim/vdpa_sim_blk.c
drivers/vhost/vdpa.c
drivers/virtio/virtio_pci_common.c
drivers/virtio/virtio_pci_modern_dev.c
drivers/xen/events/events_2l.c
drivers/xen/events/events_base.c
drivers/xen/events/events_internal.h
drivers/xen/pcpu.c
drivers/xen/privcmd.c
drivers/xen/swiotlb-xen.c
drivers/xen/xen-front-pgdir-shbuf.c
fs/afs/dynroot.c
fs/afs/internal.h
fs/afs/server_list.c
fs/afs/super.c
fs/afs/vl_rotate.c
fs/autofs/inode.c
fs/bcachefs/backpointers.c
fs/bcachefs/bcachefs.h
fs/bcachefs/btree_iter.c
fs/bcachefs/btree_key_cache.c
fs/bcachefs/btree_key_cache_types.h [new file with mode: 0644]
fs/bcachefs/btree_trans_commit.c
fs/bcachefs/btree_types.h
fs/bcachefs/btree_update_interior.c
fs/bcachefs/btree_update_interior.h
fs/bcachefs/data_update.c
fs/bcachefs/disk_groups.c
fs/bcachefs/ec.c
fs/bcachefs/fs-io-pagecache.c
fs/bcachefs/fs-io-pagecache.h
fs/bcachefs/fs.c
fs/bcachefs/fsck.c
fs/bcachefs/inode.c
fs/bcachefs/io_write.c
fs/bcachefs/journal.c
fs/bcachefs/journal.h
fs/bcachefs/journal_io.c
fs/bcachefs/journal_reclaim.c
fs/bcachefs/journal_types.h
fs/bcachefs/six.c
fs/bcachefs/subvolume_types.h
fs/bcachefs/trace.h
fs/bcachefs/xattr.c
fs/btrfs/ctree.c
fs/btrfs/delayed-ref.c
fs/btrfs/extent-tree.c
fs/btrfs/extent-tree.h
fs/btrfs/inode.c
fs/btrfs/ioctl.c
fs/btrfs/qgroup.c
fs/btrfs/raid-stripe-tree.c
fs/btrfs/scrub.c
fs/btrfs/volumes.c
fs/btrfs/zoned.c
fs/ecryptfs/inode.c
fs/erofs/Kconfig
fs/erofs/data.c
fs/erofs/inode.c
fs/inode.c
fs/libfs.c
fs/nfsd/cache.h
fs/nfsd/nfs4state.c
fs/nfsd/nfscache.c
fs/nfsd/nfssvc.c
fs/overlayfs/inode.c
fs/overlayfs/overlayfs.h
fs/overlayfs/params.c
fs/overlayfs/util.c
fs/smb/client/cifs_spnego.c
fs/smb/client/cifsglob.h
fs/smb/client/cifspdu.h
fs/smb/client/cifsproto.h
fs/smb/client/cifssmb.c
fs/smb/client/connect.c
fs/smb/client/inode.c
fs/smb/client/readdir.c
fs/smb/client/sess.c
fs/smb/client/smb1ops.c
fs/smb/client/smb2inode.c
fs/smb/client/smb2ops.c
fs/smb/client/smb2transport.c
fs/stat.c
fs/tracefs/event_inode.c
fs/tracefs/inode.c
fs/xfs/Kconfig
fs/xfs/libxfs/xfs_alloc.c
fs/xfs/libxfs/xfs_defer.c
fs/xfs/libxfs/xfs_defer.h
fs/xfs/libxfs/xfs_inode_buf.c
fs/xfs/xfs_dquot.c
fs/xfs/xfs_dquot_item_recover.c
fs/xfs/xfs_inode.h
fs/xfs/xfs_inode_item_recover.c
fs/xfs/xfs_ioctl.c
fs/xfs/xfs_iops.c
fs/xfs/xfs_log.c
fs/xfs/xfs_log_recover.c
fs/xfs/xfs_reflink.c
include/acpi/acpi_bus.h
include/asm-generic/qspinlock.h
include/linux/blk-pm.h
include/linux/bpf.h
include/linux/bpf_verifier.h
include/linux/cpuhotplug.h
include/linux/export-internal.h
include/linux/hid.h
include/linux/hrtimer.h
include/linux/mdio.h
include/linux/netdevice.h
include/linux/pagemap.h
include/linux/perf_event.h
include/linux/stackleak.h
include/linux/usb/phy.h
include/linux/virtio_pci_modern.h
include/net/netfilter/nf_tables.h
include/net/netkit.h
include/net/tc_act/tc_ct.h
include/trace/events/rxrpc.h
include/uapi/linux/btrfs_tree.h
include/uapi/linux/fcntl.h
include/uapi/linux/virtio_pci.h
include/xen/events.h
io_uring/fdinfo.c
io_uring/fs.c
io_uring/rsrc.c
io_uring/sqpoll.c
kernel/audit_watch.c
kernel/bpf/core.c
kernel/bpf/verifier.c
kernel/cgroup/cgroup.c
kernel/cpu.c
kernel/events/core.c
kernel/futex/core.c
kernel/locking/lockdep.c
kernel/sched/fair.c
kernel/sys.c
kernel/time/hrtimer.c
lib/errname.c
lib/iov_iter.c
lib/zstd/common/fse_decompress.c
mm/damon/core.c
mm/damon/sysfs-schemes.c
mm/damon/sysfs.c
mm/filemap.c
mm/huge_memory.c
mm/ksm.c
mm/memcontrol.c
mm/page-writeback.c
mm/userfaultfd.c
mm/util.c
net/bridge/netfilter/nf_conntrack_bridge.c
net/core/dev.c
net/core/filter.c
net/core/gso_test.c
net/ipv4/inet_diag.c
net/ipv4/inet_hashtables.c
net/ipv4/raw_diag.c
net/ipv4/route.c
net/ipv4/tcp_diag.c
net/ipv4/udp_diag.c
net/mptcp/mptcp_diag.c
net/mptcp/pm_netlink.c
net/mptcp/protocol.c
net/mptcp/sockopt.c
net/ncsi/ncsi-aen.c
net/netfilter/ipset/ip_set_core.c
net/netfilter/nf_tables_api.c
net/netfilter/nft_byteorder.c
net/netfilter/nft_meta.c
net/netfilter/nft_set_rbtree.c
net/packet/diag.c
net/rxrpc/conn_client.c
net/rxrpc/input.c
net/sched/act_ct.c
net/sctp/diag.c
net/smc/af_smc.c
net/smc/smc_diag.c
net/tipc/diag.c
net/tipc/netlink_compat.c
net/tls/tls_sw.c
net/unix/af_unix.c
net/unix/diag.c
net/vmw_vsock/diag.c
net/xdp/xsk_diag.c
scripts/Makefile.lib
scripts/checkstack.pl
scripts/gcc-plugins/latent_entropy_plugin.c
scripts/gcc-plugins/randomize_layout_plugin.c
scripts/kconfig/symbol.c
scripts/mod/modpost.c
sound/pci/hda/cs35l56_hda_i2c.c
sound/pci/hda/hda_intel.c
sound/pci/hda/patch_realtek.c
tools/arch/parisc/include/uapi/asm/errno.h
tools/hv/hv_kvp_daemon.c
tools/hv/hv_set_ifconfig.sh
tools/net/ynl/Makefile.deps
tools/net/ynl/generated/devlink-user.c
tools/net/ynl/ynl-gen-c.py
tools/power/pm-graph/sleepgraph.py
tools/power/x86/turbostat/turbostat.c
tools/testing/selftests/arm64/fp/za-fork.c
tools/testing/selftests/bpf/prog_tests/tc_redirect.c
tools/testing/selftests/bpf/prog_tests/verifier.c
tools/testing/selftests/bpf/progs/bpf_loop_bench.c
tools/testing/selftests/bpf/progs/cb_refs.c
tools/testing/selftests/bpf/progs/exceptions_fail.c
tools/testing/selftests/bpf/progs/strobemeta.h
tools/testing/selftests/bpf/progs/verifier_cfg.c
tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/verifier_loops1.c
tools/testing/selftests/bpf/progs/verifier_precision.c
tools/testing/selftests/bpf/progs/verifier_subprog_precision.c
tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c
tools/testing/selftests/bpf/verifier/calls.c
tools/testing/selftests/bpf/verifier/ld_imm64.c
tools/testing/selftests/bpf/xskxceiver.c
tools/testing/selftests/mm/.gitignore
tools/testing/selftests/mm/pagemap_ioctl.c
tools/testing/selftests/mm/run_vmtests.sh
tools/testing/selftests/net/mptcp/mptcp_join.sh
tools/testing/selftests/net/rtnetlink.sh
tools/testing/vsock/vsock_test.c

index 8c568cf..5e6db78 100644 (file)
@@ -375,9 +375,9 @@ Developer web site of Loongson and LoongArch (Software and Documentation):
 
 Documentation of LoongArch ISA:
 
-  https://github.com/loongson/LoongArch-Documentation/releases/latest/download/LoongArch-Vol1-v1.02-CN.pdf (in Chinese)
+  https://github.com/loongson/LoongArch-Documentation/releases/latest/download/LoongArch-Vol1-v1.10-CN.pdf (in Chinese)
 
-  https://github.com/loongson/LoongArch-Documentation/releases/latest/download/LoongArch-Vol1-v1.02-EN.pdf (in English)
+  https://github.com/loongson/LoongArch-Documentation/releases/latest/download/LoongArch-Vol1-v1.10-EN.pdf (in English)
 
 Documentation of LoongArch ELF psABI:
 
index f5d2f24..22cc7a0 100644 (file)
@@ -77,7 +77,7 @@ Protocol 2.14 BURNT BY INCORRECT COMMIT
 Protocol 2.15  (Kernel 5.5) Added the kernel_info and kernel_info.setup_type_max.
 =============  ============================================================
 
-.. note::
+  .. note::
      The protocol version number should be changed only if the setup header
      is changed. There is no need to update the version number if boot_params
      or kernel_info are changed. Additionally, it is recommended to use
index 9f6a5cc..d14d123 100644 (file)
@@ -275,12 +275,12 @@ allOf:
       properties:
         rx-internal-delay-ps:
           description:
-            RGMII Receive Clock Delay defined in pico seconds.This is used for
+            RGMII Receive Clock Delay defined in pico seconds. This is used for
             controllers that have configurable RX internal delays. If this
             property is present then the MAC applies the RX delay.
         tx-internal-delay-ps:
           description:
-            RGMII Transmit Clock Delay defined in pico seconds.This is used for
+            RGMII Transmit Clock Delay defined in pico seconds. This is used for
             controllers that have configurable TX internal delays. If this
             property is present then the MAC applies the TX delay.
 
index 462ead5..2cf3d01 100644 (file)
@@ -36,6 +36,7 @@ properties:
           - qcom,sm8350-ufshc
           - qcom,sm8450-ufshc
           - qcom,sm8550-ufshc
+          - qcom,sm8650-ufshc
       - const: qcom,ufshc
       - const: jedec,ufs-2.0
 
@@ -122,6 +123,7 @@ allOf:
               - qcom,sm8350-ufshc
               - qcom,sm8450-ufshc
               - qcom,sm8550-ufshc
+              - qcom,sm8650-ufshc
     then:
       properties:
         clocks:
index ff3a170..6d4cfd9 100644 (file)
@@ -36,7 +36,11 @@ properties:
 
   vdd-supply:
     description:
-      VDD power supply to the hub
+      3V3 power supply to the hub
+
+  vdd2-supply:
+    description:
+      1V2 power supply to the hub
 
   peer-hub:
     $ref: /schemas/types.yaml#/definitions/phandle
@@ -62,6 +66,7 @@ allOf:
       properties:
         reset-gpios: false
         vdd-supply: false
+        vdd2-supply: false
         peer-hub: false
         i2c-bus: false
     else:
index e889158..915c820 100644 (file)
@@ -521,8 +521,8 @@ examples:
 
             interrupts = <GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>,
                          <GIC_SPI 486 IRQ_TYPE_LEVEL_HIGH>,
-                         <GIC_SPI 488 IRQ_TYPE_LEVEL_HIGH>,
-                         <GIC_SPI 489 IRQ_TYPE_LEVEL_HIGH>;
+                         <GIC_SPI 488 IRQ_TYPE_EDGE_BOTH>,
+                         <GIC_SPI 489 IRQ_TYPE_EDGE_BOTH>;
             interrupt-names = "hs_phy_irq", "ss_phy_irq",
                           "dm_hs_phy_irq", "dp_hs_phy_irq";
 
index 692dd60..45a19d4 100644 (file)
@@ -41,7 +41,7 @@ examples:
   - |
     usb {
         phys = <&usb2_phy1>, <&usb3_phy1>;
-        phy-names = "usb";
+        phy-names = "usb2", "usb3";
         #address-cells = <1>;
         #size-cells = <0>;
 
index 57c6ae2..cc4626d 100644 (file)
@@ -91,6 +91,10 @@ compatibility checking tool (fsck.erofs), and a debugging tool (dump.erofs):
 
 - git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs-utils.git
 
+For more information, please also refer to the documentation site:
+
+- https://erofs.docs.kernel.org
+
 Bugs and patches are welcome, please kindly help us and send to the following
 linux-erofs mailing list:
 
index 7feacc2..84ee60f 100644 (file)
@@ -193,9 +193,23 @@ Review timelines
 Generally speaking, the patches get triaged quickly (in less than
 48h). But be patient, if your patch is active in patchwork (i.e. it's
 listed on the project's patch list) the chances it was missed are close to zero.
-Asking the maintainer for status updates on your
-patch is a good way to ensure your patch is ignored or pushed to the
-bottom of the priority list.
+
+The high volume of development on netdev makes reviewers move on
+from discussions relatively quickly. New comments and replies
+are very unlikely to arrive after a week of silence. If a patch
+is no longer active in patchwork and the thread went idle for more
+than a week - clarify the next steps and/or post the next version.
+
+For RFC postings specifically, if nobody responded in a week - reviewers
+either missed the posting or have no strong opinions. If the code is ready,
+repost as a PATCH.
+
+Emails saying just "ping" or "bump" are considered rude. If you can't figure
+out the status of the patch from patchwork or where the discussion has
+landed - describe your best guess and ask if it's correct. For example::
+
+  I don't understand what the next steps are. Person X seems to be unhappy
+  with A, should I do B and repost the patches?
 
 .. _Changes requested:
 
index 59d6bf3..bf463c5 100644 (file)
@@ -338,9 +338,9 @@ Loongson与LoongArch的开发者网站(软件与文档资源):
 
 LoongArch指令集架构的文档:
 
-  https://github.com/loongson/LoongArch-Documentation/releases/latest/download/LoongArch-Vol1-v1.02-CN.pdf (中文版)
+  https://github.com/loongson/LoongArch-Documentation/releases/latest/download/LoongArch-Vol1-v1.10-CN.pdf (中文版)
 
-  https://github.com/loongson/LoongArch-Documentation/releases/latest/download/LoongArch-Vol1-v1.02-EN.pdf (英文版)
+  https://github.com/loongson/LoongArch-Documentation/releases/latest/download/LoongArch-Vol1-v1.10-EN.pdf (英文版)
 
 LoongArch的ELF psABI文档:
 
index 97f51d5..012df8c 100644 (file)
@@ -7855,6 +7855,7 @@ R:        Yue Hu <huyue2@coolpad.com>
 R:     Jeffle Xu <jefflexu@linux.alibaba.com>
 L:     linux-erofs@lists.ozlabs.org
 S:     Maintained
+W:     https://erofs.docs.kernel.org
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs.git
 F:     Documentation/ABI/testing/sysfs-fs-erofs
 F:     Documentation/filesystems/erofs.rst
@@ -8950,7 +8951,6 @@ S:        Maintained
 F:     scripts/get_maintainer.pl
 
 GFS2 FILE SYSTEM
-M:     Bob Peterson <rpeterso@redhat.com>
 M:     Andreas Gruenbacher <agruenba@redhat.com>
 L:     gfs2@lists.linux.dev
 S:     Supported
@@ -11025,7 +11025,6 @@ F:      drivers/net/wireless/intel/iwlwifi/
 
 INTEL WMI SLIM BOOTLOADER (SBL) FIRMWARE UPDATE DRIVER
 M:     Jithu Joseph <jithu.joseph@intel.com>
-R:     Maurice Ma <maurice.ma@intel.com>
 S:     Maintained
 W:     https://slimbootloader.github.io/security/firmware-update.html
 F:     drivers/platform/x86/intel/wmi/sbl-fw-update.c
@@ -13779,7 +13778,6 @@ F:      drivers/net/ethernet/mellanox/mlxfw/
 MELLANOX HARDWARE PLATFORM SUPPORT
 M:     Hans de Goede <hdegoede@redhat.com>
 M:     Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
-M:     Mark Gross <markgross@kernel.org>
 M:     Vadim Pasternak <vadimp@nvidia.com>
 L:     platform-driver-x86@vger.kernel.org
 S:     Supported
@@ -14388,7 +14386,6 @@ F:      drivers/platform/surface/surface_gpe.c
 MICROSOFT SURFACE HARDWARE PLATFORM SUPPORT
 M:     Hans de Goede <hdegoede@redhat.com>
 M:     Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
-M:     Mark Gross <markgross@kernel.org>
 M:     Maximilian Luz <luzmaximilian@gmail.com>
 L:     platform-driver-x86@vger.kernel.org
 S:     Maintained
@@ -14995,6 +14992,7 @@ M:      Jakub Kicinski <kuba@kernel.org>
 M:     Paolo Abeni <pabeni@redhat.com>
 L:     netdev@vger.kernel.org
 S:     Maintained
+P:     Documentation/process/maintainer-netdev.rst
 Q:     https://patchwork.kernel.org/project/netdevbpf/list/
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git
@@ -15046,6 +15044,7 @@ M:      Jakub Kicinski <kuba@kernel.org>
 M:     Paolo Abeni <pabeni@redhat.com>
 L:     netdev@vger.kernel.org
 S:     Maintained
+P:     Documentation/process/maintainer-netdev.rst
 Q:     https://patchwork.kernel.org/project/netdevbpf/list/
 B:     mailto:netdev@vger.kernel.org
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git
@@ -15056,6 +15055,7 @@ F:      Documentation/networking/
 F:     Documentation/process/maintainer-netdev.rst
 F:     Documentation/userspace-api/netlink/
 F:     include/linux/in.h
+F:     include/linux/indirect_call_wrapper.h
 F:     include/linux/net.h
 F:     include/linux/netdevice.h
 F:     include/net/
@@ -21769,7 +21769,9 @@ F:      Documentation/devicetree/bindings/counter/ti-eqep.yaml
 F:     drivers/counter/ti-eqep.c
 
 TI ETHERNET SWITCH DRIVER (CPSW)
-R:     Grygorii Strashko <grygorii.strashko@ti.com>
+R:     Siddharth Vadapalli <s-vadapalli@ti.com>
+R:     Ravi Gunasekaran <r-gunasekaran@ti.com>
+R:     Roger Quadros <rogerq@kernel.org>
 L:     linux-omap@vger.kernel.org
 L:     netdev@vger.kernel.org
 S:     Maintained
@@ -21793,6 +21795,15 @@ F:     Documentation/devicetree/bindings/media/i2c/ti,ds90*
 F:     drivers/media/i2c/ds90*
 F:     include/media/i2c/ds90*
 
+TI ICSSG ETHERNET DRIVER (ICSSG)
+R:     MD Danish Anwar <danishanwar@ti.com>
+R:     Roger Quadros <rogerq@kernel.org>
+L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+L:     netdev@vger.kernel.org
+S:     Maintained
+F:     Documentation/devicetree/bindings/net/ti,icss*.yaml
+F:     drivers/net/ethernet/ti/icssg/*
+
 TI J721E CSI2RX DRIVER
 M:     Jai Luthra <j-luthra@ti.com>
 L:     linux-media@vger.kernel.org
@@ -22068,6 +22079,7 @@ F:      drivers/watchdog/tqmx86_wdt.c
 TRACING
 M:     Steven Rostedt <rostedt@goodmis.org>
 M:     Masami Hiramatsu <mhiramat@kernel.org>
+R:     Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
 L:     linux-kernel@vger.kernel.org
 L:     linux-trace-kernel@vger.kernel.org
 S:     Maintained
@@ -23654,7 +23666,6 @@ F:      drivers/platform/x86/x86-android-tablets/
 X86 PLATFORM DRIVERS
 M:     Hans de Goede <hdegoede@redhat.com>
 M:     Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
-M:     Mark Gross <markgross@kernel.org>
 L:     platform-driver-x86@vger.kernel.org
 S:     Maintained
 Q:     https://patchwork.kernel.org/project/platform-driver-x86/list/
@@ -23692,6 +23703,20 @@ F:     arch/x86/kernel/dumpstack.c
 F:     arch/x86/kernel/stacktrace.c
 F:     arch/x86/kernel/unwind_*.c
 
+X86 TRUST DOMAIN EXTENSIONS (TDX)
+M:     Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+R:     Dave Hansen <dave.hansen@linux.intel.com>
+L:     x86@kernel.org
+L:     linux-coco@lists.linux.dev
+S:     Supported
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86/tdx
+F:     arch/x86/boot/compressed/tdx*
+F:     arch/x86/coco/tdx/
+F:     arch/x86/include/asm/shared/tdx.h
+F:     arch/x86/include/asm/tdx.h
+F:     arch/x86/virt/vmx/tdx/
+F:     drivers/virt/coco/tdx-guest
+
 X86 VDSO
 M:     Andy Lutomirski <luto@kernel.org>
 L:     linux-kernel@vger.kernel.org
@@ -23872,8 +23897,7 @@ T:      git git://git.kernel.org/pub/scm/fs/xfs/xfs-linux.git
 P:     Documentation/filesystems/xfs-maintainer-entry-profile.rst
 F:     Documentation/ABI/testing/sysfs-fs-xfs
 F:     Documentation/admin-guide/xfs.rst
-F:     Documentation/filesystems/xfs-delayed-logging-design.rst
-F:     Documentation/filesystems/xfs-self-describing-metadata.rst
+F:     Documentation/filesystems/xfs-*
 F:     fs/xfs/
 F:     include/uapi/linux/dqblk_xfs.h
 F:     include/uapi/linux/fsmap.h
index ede0bd2..99db546 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 6
 PATCHLEVEL = 7
 SUBLEVEL = 0
-EXTRAVERSION = -rc1
+EXTRAVERSION = -rc3
 NAME = Hurr durr I'ma ninja sloth
 
 # *DOCUMENTATION*
index 10fd74b..a4260c9 100644 (file)
@@ -272,6 +272,7 @@ CONFIG_KS8851=y
 CONFIG_LAN966X_SWITCH=m
 CONFIG_R8169=y
 CONFIG_SH_ETH=y
+CONFIG_RAVB=y
 CONFIG_SMSC911X=y
 CONFIG_SNI_AVE=y
 CONFIG_STMMAC_ETH=y
@@ -1191,6 +1192,7 @@ CONFIG_PWM_TEGRA=y
 CONFIG_PWM_VT8500=y
 CONFIG_KEYSTONE_IRQ=y
 CONFIG_RESET_MCHP_SPARX5=y
+CONFIG_RESET_SCMI=y
 CONFIG_PHY_SUN4I_USB=y
 CONFIG_PHY_SUN9I_USB=y
 CONFIG_PHY_BRCM_USB=m
index dfdea29..e2ea369 100644 (file)
@@ -75,6 +75,7 @@ CONFIG_SERIAL_8250=y
 # CONFIG_SERIAL_8250_16550A_VARIANTS is not set
 CONFIG_SERIAL_8250_CONSOLE=y
 # CONFIG_SERIAL_8250_PCI is not set
+# CONFIG_SERIAL_8250_EXAR is not set
 CONFIG_SERIAL_8250_DW=y
 CONFIG_SERIAL_8250_EM=y
 # CONFIG_SERIAL_8250_PERICOM is not set
index 9afdc4c..a395b6c 100644 (file)
@@ -484,7 +484,8 @@ static int __init xen_guest_init(void)
         * for secondary CPUs as they are brought up.
         * For uniformity we use VCPUOP_register_vcpu_info even on cpu0.
         */
-       xen_vcpu_info = alloc_percpu(struct vcpu_info);
+       xen_vcpu_info = __alloc_percpu(sizeof(struct vcpu_info),
+                                      1 << fls(sizeof(struct vcpu_info) - 1));
        if (xen_vcpu_info == NULL)
                return -ENOMEM;
 
index 4bd85cc..9a2d372 100644 (file)
@@ -158,7 +158,7 @@ endif
 
 all:   $(notdir $(KBUILD_IMAGE))
 
-
+vmlinuz.efi: Image
 Image vmlinuz.efi: vmlinux
        $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
 
index b60aa1f..5ad2b84 100644 (file)
@@ -1192,6 +1192,7 @@ CONFIG_COMMON_CLK_FSL_SAI=y
 CONFIG_COMMON_CLK_S2MPS11=y
 CONFIG_COMMON_CLK_PWM=y
 CONFIG_COMMON_CLK_RS9_PCIE=y
+CONFIG_COMMON_CLK_VC3=y
 CONFIG_COMMON_CLK_VC5=y
 CONFIG_COMMON_CLK_BD718XX=m
 CONFIG_CLK_RASPBERRYPI=m
index f4af547..2e4d7da 100644 (file)
@@ -21,9 +21,22 @@ static inline bool arch_parse_debug_rodata(char *arg)
        extern bool rodata_enabled;
        extern bool rodata_full;
 
-       if (arg && !strcmp(arg, "full")) {
+       if (!arg)
+               return false;
+
+       if (!strcmp(arg, "full")) {
+               rodata_enabled = rodata_full = true;
+               return true;
+       }
+
+       if (!strcmp(arg, "off")) {
+               rodata_enabled = rodata_full = false;
+               return true;
+       }
+
+       if (!strcmp(arg, "on")) {
                rodata_enabled = true;
-               rodata_full = true;
+               rodata_full = false;
                return true;
        }
 
index 8e2017b..924843f 100644 (file)
@@ -29,8 +29,8 @@ bool can_set_direct_map(void)
         *
         * KFENCE pool requires page-granular mapping if initialized late.
         */
-       return (rodata_enabled && rodata_full) || debug_pagealloc_enabled() ||
-               arm64_kfence_can_set_direct_map();
+       return rodata_full || debug_pagealloc_enabled() ||
+              arm64_kfence_can_set_direct_map();
 }
 
 static int change_page_range(pte_t *ptep, unsigned long addr, void *data)
@@ -105,8 +105,7 @@ static int change_memory_common(unsigned long addr, int numpages,
         * If we are manipulating read-only permissions, apply the same
         * change to the linear mapping of the pages that back this VM area.
         */
-       if (rodata_enabled &&
-           rodata_full && (pgprot_val(set_mask) == PTE_RDONLY ||
+       if (rodata_full && (pgprot_val(set_mask) == PTE_RDONLY ||
                            pgprot_val(clear_mask) == PTE_RDONLY)) {
                for (i = 0; i < area->nr_pages; i++) {
                        __change_memory_common((u64)page_address(area->pages[i]),
index 9eeb0c0..204b94b 100644 (file)
@@ -68,6 +68,7 @@ LDFLAGS_vmlinux                       += -static -n -nostdlib
 ifdef CONFIG_AS_HAS_EXPLICIT_RELOCS
 cflags-y                       += $(call cc-option,-mexplicit-relocs)
 KBUILD_CFLAGS_KERNEL           += $(call cc-option,-mdirect-extern-access)
+KBUILD_CFLAGS_KERNEL           += $(call cc-option,-fdirect-access-external-data)
 KBUILD_AFLAGS_MODULE           += $(call cc-option,-fno-direct-access-external-data)
 KBUILD_CFLAGS_MODULE           += $(call cc-option,-fno-direct-access-external-data)
 KBUILD_AFLAGS_MODULE           += $(call cc-option,-mno-relax) $(call cc-option,-Wa$(comma)-mno-relax)
@@ -142,6 +143,8 @@ vdso-install-y += arch/loongarch/vdso/vdso.so.dbg
 
 all:   $(notdir $(KBUILD_IMAGE))
 
+vmlinuz.efi: vmlinux.efi
+
 vmlinux.elf vmlinux.efi vmlinuz.efi: vmlinux
        $(Q)$(MAKE) $(build)=$(boot) $(bootvars-y) $(boot)/$@
 
index c9544f3..655db7d 100644 (file)
        lu32i.d \reg, 0
        lu52i.d \reg, \reg, 0
        .pushsection ".la_abs", "aw", %progbits
-       768:
-       .dword  768b-766b
+       .dword  766b
        .dword  \sym
        .popsection
 #endif
index ed5da02..9b36ac0 100644 (file)
@@ -40,13 +40,13 @@ static __always_inline unsigned long __percpu_##op(void *ptr,               \
        switch (size) {                                                 \
        case 4:                                                         \
                __asm__ __volatile__(                                   \
-               "am"#asm_op".w" " %[ret], %[val], %[ptr]        \n"             \
+               "am"#asm_op".w" " %[ret], %[val], %[ptr]        \n"     \
                : [ret] "=&r" (ret), [ptr] "+ZB"(*(u32 *)ptr)           \
                : [val] "r" (val));                                     \
                break;                                                  \
        case 8:                                                         \
                __asm__ __volatile__(                                   \
-               "am"#asm_op".d" " %[ret], %[val], %[ptr]        \n"             \
+               "am"#asm_op".d" " %[ret], %[val], %[ptr]        \n"     \
                : [ret] "=&r" (ret), [ptr] "+ZB"(*(u64 *)ptr)           \
                : [val] "r" (val));                                     \
                break;                                                  \
@@ -63,7 +63,7 @@ PERCPU_OP(and, and, &)
 PERCPU_OP(or, or, |)
 #undef PERCPU_OP
 
-static __always_inline unsigned long __percpu_read(void *ptr, int size)
+static __always_inline unsigned long __percpu_read(void __percpu *ptr, int size)
 {
        unsigned long ret;
 
@@ -100,7 +100,7 @@ static __always_inline unsigned long __percpu_read(void *ptr, int size)
        return ret;
 }
 
-static __always_inline void __percpu_write(void *ptr, unsigned long val, int size)
+static __always_inline void __percpu_write(void __percpu *ptr, unsigned long val, int size)
 {
        switch (size) {
        case 1:
@@ -132,8 +132,7 @@ static __always_inline void __percpu_write(void *ptr, unsigned long val, int siz
        }
 }
 
-static __always_inline unsigned long __percpu_xchg(void *ptr, unsigned long val,
-                                                  int size)
+static __always_inline unsigned long __percpu_xchg(void *ptr, unsigned long val, int size)
 {
        switch (size) {
        case 1:
index a0bc159..ee52fb1 100644 (file)
@@ -25,7 +25,7 @@ extern void set_merr_handler(unsigned long offset, void *addr, unsigned long len
 #ifdef CONFIG_RELOCATABLE
 
 struct rela_la_abs {
-       long offset;
+       long pc;
        long symvalue;
 };
 
index 6c3eff9..1acfa70 100644 (file)
@@ -52,7 +52,7 @@ static inline void __init relocate_absolute(long random_offset)
        for (p = begin; (void *)p < end; p++) {
                long v = p->symvalue;
                uint32_t lu12iw, ori, lu32id, lu52id;
-               union loongarch_instruction *insn = (void *)p - p->offset;
+               union loongarch_instruction *insn = (void *)p->pc;
 
                lu12iw = (v >> 12) & 0xfffff;
                ori    = v & 0xfff;
@@ -102,6 +102,14 @@ static inline __init unsigned long get_random_boot(void)
        return hash;
 }
 
+static int __init nokaslr(char *p)
+{
+       pr_info("KASLR is disabled.\n");
+
+       return 0; /* Print a notice and silence the boot warning */
+}
+early_param("nokaslr", nokaslr);
+
 static inline __init bool kaslr_disabled(void)
 {
        char *str;
index 3064af9..e7015f7 100644 (file)
@@ -58,14 +58,16 @@ static int constant_set_state_oneshot(struct clock_event_device *evt)
        return 0;
 }
 
-static int constant_set_state_oneshot_stopped(struct clock_event_device *evt)
+static int constant_set_state_periodic(struct clock_event_device *evt)
 {
+       unsigned long period;
        unsigned long timer_config;
 
        raw_spin_lock(&state_lock);
 
-       timer_config = csr_read64(LOONGARCH_CSR_TCFG);
-       timer_config &= ~CSR_TCFG_EN;
+       period = const_clock_freq / HZ;
+       timer_config = period & CSR_TCFG_VAL;
+       timer_config |= (CSR_TCFG_PERIOD | CSR_TCFG_EN);
        csr_write64(timer_config, LOONGARCH_CSR_TCFG);
 
        raw_spin_unlock(&state_lock);
@@ -73,16 +75,14 @@ static int constant_set_state_oneshot_stopped(struct clock_event_device *evt)
        return 0;
 }
 
-static int constant_set_state_periodic(struct clock_event_device *evt)
+static int constant_set_state_shutdown(struct clock_event_device *evt)
 {
-       unsigned long period;
        unsigned long timer_config;
 
        raw_spin_lock(&state_lock);
 
-       period = const_clock_freq / HZ;
-       timer_config = period & CSR_TCFG_VAL;
-       timer_config |= (CSR_TCFG_PERIOD | CSR_TCFG_EN);
+       timer_config = csr_read64(LOONGARCH_CSR_TCFG);
+       timer_config &= ~CSR_TCFG_EN;
        csr_write64(timer_config, LOONGARCH_CSR_TCFG);
 
        raw_spin_unlock(&state_lock);
@@ -90,11 +90,6 @@ static int constant_set_state_periodic(struct clock_event_device *evt)
        return 0;
 }
 
-static int constant_set_state_shutdown(struct clock_event_device *evt)
-{
-       return 0;
-}
-
 static int constant_timer_next_event(unsigned long delta, struct clock_event_device *evt)
 {
        unsigned long timer_config;
@@ -161,7 +156,7 @@ int constant_clockevent_init(void)
        cd->rating = 320;
        cd->cpumask = cpumask_of(cpu);
        cd->set_state_oneshot = constant_set_state_oneshot;
-       cd->set_state_oneshot_stopped = constant_set_state_oneshot_stopped;
+       cd->set_state_oneshot_stopped = constant_set_state_shutdown;
        cd->set_state_periodic = constant_set_state_periodic;
        cd->set_state_shutdown = constant_set_state_shutdown;
        cd->set_next_event = constant_timer_next_event;
index 71d0539..2aae72e 100644 (file)
@@ -13,13 +13,13 @@ struct page *dmw_virt_to_page(unsigned long kaddr)
 {
        return pfn_to_page(virt_to_pfn(kaddr));
 }
-EXPORT_SYMBOL_GPL(dmw_virt_to_page);
+EXPORT_SYMBOL(dmw_virt_to_page);
 
 struct page *tlb_virt_to_page(unsigned long kaddr)
 {
        return pfn_to_page(pte_pfn(*virt_to_kpte(kaddr)));
 }
-EXPORT_SYMBOL_GPL(tlb_virt_to_page);
+EXPORT_SYMBOL(tlb_virt_to_page);
 
 pgd_t *pgd_alloc(struct mm_struct *mm)
 {
index fd69dfa..d14ccc9 100644 (file)
@@ -115,9 +115,12 @@ config ARCH_HAS_ILOG2_U64
        default n
 
 config GENERIC_BUG
-       bool
-       default y
+       def_bool y
        depends on BUG
+       select GENERIC_BUG_RELATIVE_POINTERS if 64BIT
+
+config GENERIC_BUG_RELATIVE_POINTERS
+       bool
 
 config GENERIC_HWEIGHT
        bool
@@ -140,11 +143,11 @@ config ARCH_MMAP_RND_COMPAT_BITS_MIN
        default 8
 
 config ARCH_MMAP_RND_BITS_MAX
-       default 24 if 64BIT
-       default 17
+       default 18 if 64BIT
+       default 13
 
 config ARCH_MMAP_RND_COMPAT_BITS_MAX
-       default 17
+       default 13
 
 # unless you want to implement ACPI on PA-RISC ... ;-)
 config PM
index 1ed45fd..1eb488f 100644 (file)
@@ -34,7 +34,8 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end,
 
 /* Alternative SMP implementation. */
 #define ALTERNATIVE(cond, replacement)         "!0:"   \
-       ".section .altinstructions, \"aw\"      !"      \
+       ".section .altinstructions, \"a\"       !"      \
+       ".align 4                               !"      \
        ".word (0b-4-.)                         !"      \
        ".hword 1, " __stringify(cond) "        !"      \
        ".word " __stringify(replacement) "     !"      \
@@ -44,7 +45,8 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end,
 
 /* to replace one single instructions by a new instruction */
 #define ALTERNATIVE(from, to, cond, replacement)\
-       .section .altinstructions, "aw" !       \
+       .section .altinstructions, "a"  !       \
+       .align 4                        !       \
        .word (from - .)                !       \
        .hword (to - from)/4, cond      !       \
        .word replacement               !       \
@@ -52,7 +54,8 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end,
 
 /* to replace multiple instructions by new code */
 #define ALTERNATIVE_CODE(from, num_instructions, cond, new_instr_ptr)\
-       .section .altinstructions, "aw" !       \
+       .section .altinstructions, "a"  !       \
+       .align 4                        !       \
        .word (from - .)                !       \
        .hword -num_instructions, cond  !       \
        .word (new_instr_ptr - .)       !       \
index 75677b5..74d17d7 100644 (file)
         */
 #define ASM_EXCEPTIONTABLE_ENTRY(fault_addr, except_addr)      \
        .section __ex_table,"aw"                        !       \
+       .align 4                                        !       \
        .word (fault_addr - .), (except_addr - .)       !       \
        .previous
 
index 4b6d60b..1641ff9 100644 (file)
 #define        PARISC_BUG_BREAK_ASM    "break 0x1f, 0x1fff"
 #define        PARISC_BUG_BREAK_INSN   0x03ffe01f  /* PARISC_BUG_BREAK_ASM */
 
-#if defined(CONFIG_64BIT)
-#define ASM_WORD_INSN          ".dword\t"
+#ifdef CONFIG_GENERIC_BUG_RELATIVE_POINTERS
+# define __BUG_REL(val) ".word " __stringify(val) " - ."
 #else
-#define ASM_WORD_INSN          ".word\t"
+# define __BUG_REL(val) ".word " __stringify(val)
 #endif
 
+
 #ifdef CONFIG_DEBUG_BUGVERBOSE
 #define BUG()                                                          \
        do {                                                            \
                asm volatile("\n"                                       \
                             "1:\t" PARISC_BUG_BREAK_ASM "\n"           \
-                            "\t.pushsection __bug_table,\"aw\"\n"      \
-                            "2:\t" ASM_WORD_INSN "1b, %c0\n"           \
-                            "\t.short %c1, %c2\n"                      \
-                            "\t.org 2b+%c3\n"                          \
+                            "\t.pushsection __bug_table,\"a\"\n"       \
+                            "\t.align 4\n"                             \
+                            "2:\t" __BUG_REL(1b) "\n"                  \
+                            "\t" __BUG_REL(%c0)  "\n"                  \
+                            "\t.short %1, %2\n"                        \
+                            "\t.blockz %3-2*4-2*2\n"                   \
                             "\t.popsection"                            \
                             : : "i" (__FILE__), "i" (__LINE__),        \
-                            "i" (0), "i" (sizeof(struct bug_entry)) ); \
+                            "i" (0), "i" (sizeof(struct bug_entry)) ); \
                unreachable();                                          \
        } while(0)
 
        do {                                                            \
                asm volatile("\n"                                       \
                             "1:\t" PARISC_BUG_BREAK_ASM "\n"           \
-                            "\t.pushsection __bug_table,\"aw\"\n"      \
-                            "2:\t" ASM_WORD_INSN "1b, %c0\n"           \
-                            "\t.short %c1, %c2\n"                      \
-                            "\t.org 2b+%c3\n"                          \
+                            "\t.pushsection __bug_table,\"a\"\n"       \
+                            "\t.align 4\n"                             \
+                            "2:\t" __BUG_REL(1b) "\n"                  \
+                            "\t" __BUG_REL(%c0)  "\n"                  \
+                            "\t.short %1, %2\n"                        \
+                            "\t.blockz %3-2*4-2*2\n"                   \
                             "\t.popsection"                            \
                             : : "i" (__FILE__), "i" (__LINE__),        \
                             "i" (BUGFLAG_WARNING|(flags)),             \
        do {                                                            \
                asm volatile("\n"                                       \
                             "1:\t" PARISC_BUG_BREAK_ASM "\n"           \
-                            "\t.pushsection __bug_table,\"aw\"\n"      \
-                            "2:\t" ASM_WORD_INSN "1b\n"                \
-                            "\t.short %c0\n"                           \
-                            "\t.org 2b+%c1\n"                          \
+                            "\t.pushsection __bug_table,\"a\"\n"       \
+                            "\t.align %2\n"                            \
+                            "2:\t" __BUG_REL(1b) "\n"                  \
+                            "\t.short %0\n"                            \
+                            "\t.blockz %1-4-2\n"                       \
                             "\t.popsection"                            \
                             : : "i" (BUGFLAG_WARNING|(flags)),         \
                             "i" (sizeof(struct bug_entry)) );          \
index 140eaa9..2d73d3c 100644 (file)
@@ -349,15 +349,7 @@ struct pt_regs;    /* forward declaration... */
 
 #define ELF_HWCAP      0
 
-/* Masks for stack and mmap randomization */
-#define BRK_RND_MASK   (is_32bit_task() ? 0x07ffUL : 0x3ffffUL)
-#define MMAP_RND_MASK  (is_32bit_task() ? 0x1fffUL : 0x3ffffUL)
-#define STACK_RND_MASK MMAP_RND_MASK
-
-struct mm_struct;
-extern unsigned long arch_randomize_brk(struct mm_struct *);
-#define arch_randomize_brk arch_randomize_brk
-
+#define STACK_RND_MASK 0x7ff   /* 8MB of VA */
 
 #define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1
 struct linux_binprm;
index af2a598..9442879 100644 (file)
@@ -15,10 +15,12 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool bran
        asm_volatile_goto("1:\n\t"
                 "nop\n\t"
                 ".pushsection __jump_table,  \"aw\"\n\t"
+                ".align %1\n\t"
                 ".word 1b - ., %l[l_yes] - .\n\t"
                 __stringify(ASM_ULONG_INSN) " %c0 - .\n\t"
                 ".popsection\n\t"
-                : :  "i" (&((char *)key)[branch]) :  : l_yes);
+                : : "i" (&((char *)key)[branch]), "i" (sizeof(long))
+                : : l_yes);
 
        return false;
 l_yes:
@@ -30,10 +32,12 @@ static __always_inline bool arch_static_branch_jump(struct static_key *key, bool
        asm_volatile_goto("1:\n\t"
                 "b,n %l[l_yes]\n\t"
                 ".pushsection __jump_table,  \"aw\"\n\t"
+                ".align %1\n\t"
                 ".word 1b - ., %l[l_yes] - .\n\t"
                 __stringify(ASM_ULONG_INSN) " %c0 - .\n\t"
                 ".popsection\n\t"
-                : :  "i" (&((char *)key)[branch]) :  : l_yes);
+                : : "i" (&((char *)key)[branch]), "i" (sizeof(long))
+                : : l_yes);
 
        return false;
 l_yes:
index ee9e071..47ebc4c 100644 (file)
@@ -55,7 +55,7 @@
 })
 
 #ifdef CONFIG_SMP
-# define __lock_aligned __section(".data..lock_aligned")
+# define __lock_aligned __section(".data..lock_aligned") __aligned(16)
 #endif
 
 #endif /* __PARISC_LDCW_H */
index c05d121..982aca2 100644 (file)
@@ -47,6 +47,8 @@
 
 #ifndef __ASSEMBLY__
 
+struct rlimit;
+unsigned long mmap_upper_limit(struct rlimit *rlim_stack);
 unsigned long calc_max_stack_size(unsigned long stack_max);
 
 /*
index 2bf660e..4165079 100644 (file)
@@ -41,6 +41,7 @@ struct exception_table_entry {
 
 #define ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr )\
        ".section __ex_table,\"aw\"\n"                     \
+       ".align 4\n"                                       \
        ".word (" #fault_addr " - .), (" #except_addr " - .)\n\t" \
        ".previous\n"
 
index 87245c5..8d94739 100644 (file)
@@ -75,7 +75,6 @@
 
 /* We now return you to your regularly scheduled HPUX. */
 
-#define ENOSYM         215     /* symbol does not exist in executable */
 #define        ENOTSOCK        216     /* Socket operation on non-socket */
 #define        EDESTADDRREQ    217     /* Destination address required */
 #define        EMSGSIZE        218     /* Message too long */
 #define        ETIMEDOUT       238     /* Connection timed out */
 #define        ECONNREFUSED    239     /* Connection refused */
 #define        EREFUSED        ECONNREFUSED    /* for HP's NFS apparently */
-#define        EREMOTERELEASE  240     /* Remote peer released connection */
 #define        EHOSTDOWN       241     /* Host is down */
 #define        EHOSTUNREACH    242     /* No route to host */
 
index 29e2750..e95a977 100644 (file)
@@ -383,7 +383,7 @@ show_cpuinfo (struct seq_file *m, void *v)
        char cpu_name[60], *p;
 
        /* strip PA path from CPU name to not confuse lscpu */
-       strlcpy(cpu_name, per_cpu(cpu_data, 0).dev->name, sizeof(cpu_name));
+       strscpy(cpu_name, per_cpu(cpu_data, 0).dev->name, sizeof(cpu_name));
        p = strrchr(cpu_name, '[');
        if (p)
                *(--p) = 0;
index ab896ef..98af719 100644 (file)
@@ -77,7 +77,7 @@ unsigned long calc_max_stack_size(unsigned long stack_max)
  * indicating that "current" should be used instead of a passed-in
  * value from the exec bprm as done with arch_pick_mmap_layout().
  */
-static unsigned long mmap_upper_limit(struct rlimit *rlim_stack)
+unsigned long mmap_upper_limit(struct rlimit *rlim_stack)
 {
        unsigned long stack_base;
 
index 58694d1..548051b 100644 (file)
@@ -130,6 +130,7 @@ SECTIONS
        RO_DATA(8)
 
        /* unwind info */
+       . = ALIGN(4);
        .PARISC.unwind : {
                __start___unwind = .;
                *(.PARISC.unwind)
index dc17896..c15eadb 100644 (file)
@@ -228,7 +228,6 @@ typedef struct thread_struct thread_struct;
        execve_tail();                                                  \
 } while (0)
 
-/* Forward declaration, a strange C thing */
 struct task_struct;
 struct mm_struct;
 struct seq_file;
index cc364fc..ba75f6b 100644 (file)
@@ -666,6 +666,7 @@ static int __init ipl_init(void)
                                                &ipl_ccw_attr_group_lpar);
                break;
        case IPL_TYPE_ECKD:
+       case IPL_TYPE_ECKD_DUMP:
                rc = sysfs_create_group(&ipl_kset->kobj, &ipl_eckd_attr_group);
                break;
        case IPL_TYPE_FCP:
index 77fd24e..39a91b0 100644 (file)
@@ -279,12 +279,6 @@ static int paicrypt_event_init(struct perf_event *event)
        if (IS_ERR(cpump))
                return PTR_ERR(cpump);
 
-       /* Event initialization sets last_tag to 0. When later on the events
-        * are deleted and re-added, do not reset the event count value to zero.
-        * Events are added, deleted and re-added when 2 or more events
-        * are active at the same time.
-        */
-       event->hw.last_tag = 0;
        event->destroy = paicrypt_event_destroy;
 
        if (a->sample_period) {
@@ -318,6 +312,11 @@ static void paicrypt_start(struct perf_event *event, int flags)
 {
        u64 sum;
 
+       /* Event initialization sets last_tag to 0. When later on the events
+        * are deleted and re-added, do not reset the event count value to zero.
+        * Events are added, deleted and re-added when 2 or more events
+        * are active at the same time.
+        */
        if (!event->hw.last_tag) {
                event->hw.last_tag = 1;
                sum = paicrypt_getall(event);           /* Get current value */
index 8ba0f1a..e7013a2 100644 (file)
@@ -260,7 +260,6 @@ static int paiext_event_init(struct perf_event *event)
        rc = paiext_alloc(a, event);
        if (rc)
                return rc;
-       event->hw.last_tag = 0;
        event->destroy = paiext_event_destroy;
 
        if (a->sample_period) {
index a08f794..ce1c777 100644 (file)
@@ -4660,7 +4660,7 @@ static void intel_pmu_check_hybrid_pmus(struct x86_hybrid_pmu *pmu)
        if (pmu->intel_cap.pebs_output_pt_available)
                pmu->pmu.capabilities |= PERF_PMU_CAP_AUX_OUTPUT;
        else
-               pmu->pmu.capabilities |= ~PERF_PMU_CAP_AUX_OUTPUT;
+               pmu->pmu.capabilities &= ~PERF_PMU_CAP_AUX_OUTPUT;
 
        intel_pmu_check_event_constraints(pmu->event_constraints,
                                          pmu->num_counters,
index 21556ad..8f3a4d1 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/io.h>
 #include <asm/apic.h>
 #include <asm/desc.h>
+#include <asm/e820/api.h>
 #include <asm/sev.h>
 #include <asm/ibt.h>
 #include <asm/hypervisor.h>
@@ -286,15 +287,31 @@ static int hv_cpu_die(unsigned int cpu)
 
 static int __init hv_pci_init(void)
 {
-       int gen2vm = efi_enabled(EFI_BOOT);
+       bool gen2vm = efi_enabled(EFI_BOOT);
 
        /*
-        * For Generation-2 VM, we exit from pci_arch_init() by returning 0.
-        * The purpose is to suppress the harmless warning:
+        * A Generation-2 VM doesn't support legacy PCI/PCIe, so both
+        * raw_pci_ops and raw_pci_ext_ops are NULL, and pci_subsys_init() ->
+        * pcibios_init() doesn't call pcibios_resource_survey() ->
+        * e820__reserve_resources_late(); as a result, any emulated persistent
+        * memory of E820_TYPE_PRAM (12) via the kernel parameter
+        * memmap=nn[KMG]!ss is not added into iomem_resource and hence can't be
+        * detected by register_e820_pmem(). Fix this by directly calling
+        * e820__reserve_resources_late() here: e820__reserve_resources_late()
+        * depends on e820__reserve_resources(), which has been called earlier
+        * from setup_arch(). Note: e820__reserve_resources_late() also adds
+        * any memory of E820_TYPE_PMEM (7) into iomem_resource, and
+        * acpi_nfit_register_region() -> acpi_nfit_insert_resource() ->
+        * region_intersects() returns REGION_INTERSECTS, so the memory of
+        * E820_TYPE_PMEM won't get added twice.
+        *
+        * We return 0 here so that pci_arch_init() won't print the warning:
         * "PCI: Fatal: No config space access function found"
         */
-       if (gen2vm)
+       if (gen2vm) {
+               e820__reserve_resources_late();
                return 0;
+       }
 
        /* For Generation-1 VM, we'll proceed in pci_arch_init().  */
        return 1;
index c8a7fc2..f896eed 100644 (file)
@@ -16,6 +16,9 @@
 #include <asm/x86_init.h>
 #include <asm/cpufeature.h>
 #include <asm/irq_vectors.h>
+#include <asm/xen/hypervisor.h>
+
+#include <xen/xen.h>
 
 #ifdef CONFIG_ACPI_APEI
 # include <asm/pgtable_types.h>
@@ -127,6 +130,17 @@ static inline void arch_acpi_set_proc_cap_bits(u32 *cap)
        if (!cpu_has(c, X86_FEATURE_MWAIT) ||
            boot_option_idle_override == IDLE_NOMWAIT)
                *cap &= ~(ACPI_PROC_CAP_C_C1_FFH | ACPI_PROC_CAP_C_C2C3_FFH);
+
+       if (xen_initial_domain()) {
+               /*
+                * When Linux is running as Xen dom0, the hypervisor is the
+                * entity in charge of the processor power management, and so
+                * Xen needs to check the OS capabilities reported in the
+                * processor capabilities buffer matches what the hypervisor
+                * driver supports.
+                */
+               xen_sanitize_proc_cap_bits(cap);
+       }
 }
 
 static inline bool acpi_has_cpu_in_madt(void)
index 7048dfa..a908825 100644 (file)
@@ -100,4 +100,13 @@ static inline void leave_lazy(enum xen_lazy_mode mode)
 
 enum xen_lazy_mode xen_get_lazy_mode(void);
 
+#if defined(CONFIG_XEN_DOM0) && defined(CONFIG_ACPI)
+void xen_sanitize_proc_cap_bits(uint32_t *buf);
+#else
+static inline void xen_sanitize_proc_cap_bits(uint32_t *buf)
+{
+       BUG();
+}
+#endif
+
 #endif /* _ASM_X86_XEN_HYPERVISOR_H */
index d0918a7..1a0dd80 100644 (file)
@@ -63,6 +63,7 @@ int acpi_fix_pin2_polarity __initdata;
 
 #ifdef CONFIG_X86_LOCAL_APIC
 static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
+static bool has_lapic_cpus __initdata;
 static bool acpi_support_online_capable;
 #endif
 
@@ -232,6 +233,14 @@ acpi_parse_x2apic(union acpi_subtable_headers *header, const unsigned long end)
        if (!acpi_is_processor_usable(processor->lapic_flags))
                return 0;
 
+       /*
+        * According to https://uefi.org/specs/ACPI/6.5/05_ACPI_Software_Programming_Model.html#processor-local-x2apic-structure
+        * when MADT provides both valid LAPIC and x2APIC entries, the APIC ID
+        * in x2APIC must be equal or greater than 0xff.
+        */
+       if (has_lapic_cpus && apic_id < 0xff)
+               return 0;
+
        /*
         * We need to register disabled CPU as well to permit
         * counting disabled CPUs. This allows us to size
@@ -1114,10 +1123,7 @@ static int __init early_acpi_parse_madt_lapic_addr_ovr(void)
 
 static int __init acpi_parse_madt_lapic_entries(void)
 {
-       int count;
-       int x2count = 0;
-       int ret;
-       struct acpi_subtable_proc madt_proc[2];
+       int count, x2count = 0;
 
        if (!boot_cpu_has(X86_FEATURE_APIC))
                return -ENODEV;
@@ -1126,21 +1132,11 @@ static int __init acpi_parse_madt_lapic_entries(void)
                                      acpi_parse_sapic, MAX_LOCAL_APIC);
 
        if (!count) {
-               memset(madt_proc, 0, sizeof(madt_proc));
-               madt_proc[0].id = ACPI_MADT_TYPE_LOCAL_APIC;
-               madt_proc[0].handler = acpi_parse_lapic;
-               madt_proc[1].id = ACPI_MADT_TYPE_LOCAL_X2APIC;
-               madt_proc[1].handler = acpi_parse_x2apic;
-               ret = acpi_table_parse_entries_array(ACPI_SIG_MADT,
-                               sizeof(struct acpi_table_madt),
-                               madt_proc, ARRAY_SIZE(madt_proc), MAX_LOCAL_APIC);
-               if (ret < 0) {
-                       pr_err("Error parsing LAPIC/X2APIC entries\n");
-                       return ret;
-               }
-
-               count = madt_proc[0].count;
-               x2count = madt_proc[1].count;
+               count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC,
+                                       acpi_parse_lapic, MAX_LOCAL_APIC);
+               has_lapic_cpus = count > 0;
+               x2count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC,
+                                       acpi_parse_x2apic, MAX_LOCAL_APIC);
        }
        if (!count && !x2count) {
                pr_err("No LAPIC entries present\n");
index 9373ec0..13b45b9 100644 (file)
@@ -104,8 +104,6 @@ struct cont_desc {
        size_t               size;
 };
 
-static u32 ucode_new_rev;
-
 /*
  * Microcode patch container file is prepended to the initrd in cpio
  * format. See Documentation/arch/x86/microcode.rst
@@ -442,12 +440,11 @@ static int __apply_microcode_amd(struct microcode_amd *mc)
  *
  * Returns true if container found (sets @desc), false otherwise.
  */
-static bool early_apply_microcode(u32 cpuid_1_eax, void *ucode, size_t size)
+static bool early_apply_microcode(u32 cpuid_1_eax, u32 old_rev, void *ucode, size_t size)
 {
        struct cont_desc desc = { 0 };
        struct microcode_amd *mc;
        bool ret = false;
-       u32 rev, dummy;
 
        desc.cpuid_1_eax = cpuid_1_eax;
 
@@ -457,22 +454,15 @@ static bool early_apply_microcode(u32 cpuid_1_eax, void *ucode, size_t size)
        if (!mc)
                return ret;
 
-       native_rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
-
        /*
         * Allow application of the same revision to pick up SMT-specific
         * changes even if the revision of the other SMT thread is already
         * up-to-date.
         */
-       if (rev > mc->hdr.patch_id)
+       if (old_rev > mc->hdr.patch_id)
                return ret;
 
-       if (!__apply_microcode_amd(mc)) {
-               ucode_new_rev = mc->hdr.patch_id;
-               ret = true;
-       }
-
-       return ret;
+       return !__apply_microcode_amd(mc);
 }
 
 static bool get_builtin_microcode(struct cpio_data *cp, unsigned int family)
@@ -506,9 +496,12 @@ static void __init find_blobs_in_containers(unsigned int cpuid_1_eax, struct cpi
        *ret = cp;
 }
 
-void __init load_ucode_amd_bsp(unsigned int cpuid_1_eax)
+void __init load_ucode_amd_bsp(struct early_load_data *ed, unsigned int cpuid_1_eax)
 {
        struct cpio_data cp = { };
+       u32 dummy;
+
+       native_rdmsr(MSR_AMD64_PATCH_LEVEL, ed->old_rev, dummy);
 
        /* Needed in load_microcode_amd() */
        ucode_cpu_info[0].cpu_sig.sig = cpuid_1_eax;
@@ -517,7 +510,8 @@ void __init load_ucode_amd_bsp(unsigned int cpuid_1_eax)
        if (!(cp.data && cp.size))
                return;
 
-       early_apply_microcode(cpuid_1_eax, cp.data, cp.size);
+       if (early_apply_microcode(cpuid_1_eax, ed->old_rev, cp.data, cp.size))
+               native_rdmsr(MSR_AMD64_PATCH_LEVEL, ed->new_rev, dummy);
 }
 
 static enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t size);
@@ -625,10 +619,8 @@ void reload_ucode_amd(unsigned int cpu)
        rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
 
        if (rev < mc->hdr.patch_id) {
-               if (!__apply_microcode_amd(mc)) {
-                       ucode_new_rev = mc->hdr.patch_id;
-                       pr_info("reload patch_level=0x%08x\n", ucode_new_rev);
-               }
+               if (!__apply_microcode_amd(mc))
+                       pr_info_once("reload revision: 0x%08x\n", mc->hdr.patch_id);
        }
 }
 
@@ -649,8 +641,6 @@ static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig)
        if (p && (p->patch_id == csig->rev))
                uci->mc = p->data;
 
-       pr_info("CPU%d: patch_level=0x%08x\n", cpu, csig->rev);
-
        return 0;
 }
 
@@ -691,8 +681,6 @@ static enum ucode_state apply_microcode_amd(int cpu)
        rev = mc_amd->hdr.patch_id;
        ret = UCODE_UPDATED;
 
-       pr_info("CPU%d: new patch_level=0x%08x\n", cpu, rev);
-
 out:
        uci->cpu_sig.rev = rev;
        c->microcode     = rev;
@@ -935,11 +923,6 @@ struct microcode_ops * __init init_amd_microcode(void)
                pr_warn("AMD CPU family 0x%x not supported\n", c->x86);
                return NULL;
        }
-
-       if (ucode_new_rev)
-               pr_info_once("microcode updated early to new patch_level=0x%08x\n",
-                            ucode_new_rev);
-
        return &microcode_amd_ops;
 }
 
index 666d25b..232026a 100644 (file)
@@ -41,8 +41,6 @@
 
 #include "internal.h"
 
-#define DRIVER_VERSION "2.2"
-
 static struct microcode_ops    *microcode_ops;
 bool dis_ucode_ldr = true;
 
@@ -77,6 +75,8 @@ static u32 final_levels[] = {
        0, /* T-101 terminator */
 };
 
+struct early_load_data early_data;
+
 /*
  * Check the current patch level on this CPU.
  *
@@ -155,9 +155,9 @@ void __init load_ucode_bsp(void)
                return;
 
        if (intel)
-               load_ucode_intel_bsp();
+               load_ucode_intel_bsp(&early_data);
        else
-               load_ucode_amd_bsp(cpuid_1_eax);
+               load_ucode_amd_bsp(&early_data, cpuid_1_eax);
 }
 
 void load_ucode_ap(void)
@@ -828,6 +828,11 @@ static int __init microcode_init(void)
        if (!microcode_ops)
                return -ENODEV;
 
+       pr_info_once("Current revision: 0x%08x\n", (early_data.new_rev ?: early_data.old_rev));
+
+       if (early_data.new_rev)
+               pr_info_once("Updated early from: 0x%08x\n", early_data.old_rev);
+
        microcode_pdev = platform_device_register_simple("microcode", -1, NULL, 0);
        if (IS_ERR(microcode_pdev))
                return PTR_ERR(microcode_pdev);
@@ -846,8 +851,6 @@ static int __init microcode_init(void)
        cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/microcode:online",
                          mc_cpu_online, mc_cpu_down_prep);
 
-       pr_info("Microcode Update Driver: v%s.", DRIVER_VERSION);
-
        return 0;
 
  out_pdev:
index 6024feb..070426b 100644 (file)
@@ -339,16 +339,9 @@ static enum ucode_state __apply_microcode(struct ucode_cpu_info *uci,
 static enum ucode_state apply_microcode_early(struct ucode_cpu_info *uci)
 {
        struct microcode_intel *mc = uci->mc;
-       enum ucode_state ret;
-       u32 cur_rev, date;
+       u32 cur_rev;
 
-       ret = __apply_microcode(uci, mc, &cur_rev);
-       if (ret == UCODE_UPDATED) {
-               date = mc->hdr.date;
-               pr_info_once("updated early: 0x%x -> 0x%x, date = %04x-%02x-%02x\n",
-                            cur_rev, mc->hdr.rev, date & 0xffff, date >> 24, (date >> 16) & 0xff);
-       }
-       return ret;
+       return __apply_microcode(uci, mc, &cur_rev);
 }
 
 static __init bool load_builtin_intel_microcode(struct cpio_data *cp)
@@ -413,13 +406,17 @@ static int __init save_builtin_microcode(void)
 early_initcall(save_builtin_microcode);
 
 /* Load microcode on BSP from initrd or builtin blobs */
-void __init load_ucode_intel_bsp(void)
+void __init load_ucode_intel_bsp(struct early_load_data *ed)
 {
        struct ucode_cpu_info uci;
 
+       ed->old_rev = intel_get_microcode_revision();
+
        uci.mc = get_microcode_blob(&uci, false);
        if (uci.mc && apply_microcode_early(&uci) == UCODE_UPDATED)
                ucode_patch_va = UCODE_BSP_LOADED;
+
+       ed->new_rev = uci.cpu_sig.rev;
 }
 
 void load_ucode_intel_ap(void)
index f8047b1..21776c5 100644 (file)
@@ -37,6 +37,12 @@ struct microcode_ops {
                                use_nmi         : 1;
 };
 
+struct early_load_data {
+       u32 old_rev;
+       u32 new_rev;
+};
+
+extern struct early_load_data early_data;
 extern struct ucode_cpu_info ucode_cpu_info[];
 struct cpio_data find_microcode_in_initrd(const char *path);
 
@@ -92,14 +98,14 @@ extern bool dis_ucode_ldr;
 extern bool force_minrev;
 
 #ifdef CONFIG_CPU_SUP_AMD
-void load_ucode_amd_bsp(unsigned int family);
+void load_ucode_amd_bsp(struct early_load_data *ed, unsigned int family);
 void load_ucode_amd_ap(unsigned int family);
 int save_microcode_in_initrd_amd(unsigned int family);
 void reload_ucode_amd(unsigned int cpu);
 struct microcode_ops *init_amd_microcode(void);
 void exit_amd_microcode(void);
 #else /* CONFIG_CPU_SUP_AMD */
-static inline void load_ucode_amd_bsp(unsigned int family) { }
+static inline void load_ucode_amd_bsp(struct early_load_data *ed, unsigned int family) { }
 static inline void load_ucode_amd_ap(unsigned int family) { }
 static inline int save_microcode_in_initrd_amd(unsigned int family) { return -EINVAL; }
 static inline void reload_ucode_amd(unsigned int cpu) { }
@@ -108,12 +114,12 @@ static inline void exit_amd_microcode(void) { }
 #endif /* !CONFIG_CPU_SUP_AMD */
 
 #ifdef CONFIG_CPU_SUP_INTEL
-void load_ucode_intel_bsp(void);
+void load_ucode_intel_bsp(struct early_load_data *ed);
 void load_ucode_intel_ap(void);
 void reload_ucode_intel(void);
 struct microcode_ops *init_intel_microcode(void);
 #else /* CONFIG_CPU_SUP_INTEL */
-static inline void load_ucode_intel_bsp(void) { }
+static inline void load_ucode_intel_bsp(struct early_load_data *ed) { }
 static inline void load_ucode_intel_ap(void) { }
 static inline void reload_ucode_intel(void) { }
 static inline struct microcode_ops *init_intel_microcode(void) { return NULL; }
index e6bba12..01fa06d 100644 (file)
@@ -262,11 +262,14 @@ static uint32_t  __init ms_hyperv_platform(void)
 static int hv_nmi_unknown(unsigned int val, struct pt_regs *regs)
 {
        static atomic_t nmi_cpu = ATOMIC_INIT(-1);
+       unsigned int old_cpu, this_cpu;
 
        if (!unknown_nmi_panic)
                return NMI_DONE;
 
-       if (atomic_cmpxchg(&nmi_cpu, -1, raw_smp_processor_id()) != -1)
+       old_cpu = -1;
+       this_cpu = raw_smp_processor_id();
+       if (!atomic_try_cmpxchg(&nmi_cpu, &old_cpu, this_cpu))
                return NMI_HANDLED;
 
        return NMI_DONE;
index cacf2ed..23d8aaf 100644 (file)
@@ -175,9 +175,6 @@ int x64_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs)
        frame = get_sigframe(ksig, regs, sizeof(struct rt_sigframe), &fp);
        uc_flags = frame_uc_flags(regs);
 
-       if (setup_signal_shadow_stack(ksig))
-               return -EFAULT;
-
        if (!user_access_begin(frame, sizeof(*frame)))
                return -EFAULT;
 
@@ -198,6 +195,9 @@ int x64_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs)
                        return -EFAULT;
        }
 
+       if (setup_signal_shadow_stack(ksig))
+               return -EFAULT;
+
        /* Set up registers for signal handler */
        regs->di = ksig->sig;
        /* In case the signal handler was declared without prototypes */
index e4cfb7a..750aec1 100644 (file)
@@ -425,6 +425,8 @@ void bdev_set_nr_sectors(struct block_device *bdev, sector_t sectors)
 
 void bdev_add(struct block_device *bdev, dev_t dev)
 {
+       if (bdev_stable_writes(bdev))
+               mapping_set_stable_writes(bdev->bd_inode->i_mapping);
        bdev->bd_dev = dev;
        bdev->bd_inode->i_rdev = dev;
        bdev->bd_inode->i_ino = dev;
index 4a42ea2..4b48c2c 100644 (file)
@@ -577,6 +577,7 @@ static void blkg_destroy_all(struct gendisk *disk)
        struct request_queue *q = disk->queue;
        struct blkcg_gq *blkg, *n;
        int count = BLKG_DESTROY_BATCH_SIZE;
+       int i;
 
 restart:
        spin_lock_irq(&q->queue_lock);
@@ -602,6 +603,18 @@ restart:
                }
        }
 
+       /*
+        * Mark policy deactivated since policy offline has been done, and
+        * the free is scheduled, so future blkcg_deactivate_policy() can
+        * be bypassed
+        */
+       for (i = 0; i < BLKCG_MAX_POLS; i++) {
+               struct blkcg_policy *pol = blkcg_policy[i];
+
+               if (pol)
+                       __clear_bit(pol->plid, q->blkcg_pols);
+       }
+
        q->root_blkg = NULL;
        spin_unlock_irq(&q->queue_lock);
 }
index 624c03c..fd48243 100644 (file)
@@ -249,8 +249,6 @@ static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg,
 {
        struct blkcg_gq *blkg;
 
-       WARN_ON_ONCE(!rcu_read_lock_held());
-
        if (blkcg == &blkcg_root)
                return q->root_blkg;
 
index e2d1118..900c1be 100644 (file)
@@ -2858,11 +2858,8 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q,
        };
        struct request *rq;
 
-       if (unlikely(bio_queue_enter(bio)))
-               return NULL;
-
        if (blk_mq_attempt_bio_merge(q, bio, nsegs))
-               goto queue_exit;
+               return NULL;
 
        rq_qos_throttle(q, bio);
 
@@ -2878,35 +2875,23 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q,
        rq_qos_cleanup(q, bio);
        if (bio->bi_opf & REQ_NOWAIT)
                bio_wouldblock_error(bio);
-queue_exit:
-       blk_queue_exit(q);
        return NULL;
 }
 
-static inline struct request *blk_mq_get_cached_request(struct request_queue *q,
-               struct blk_plug *plug, struct bio **bio, unsigned int nsegs)
+/* return true if this @rq can be used for @bio */
+static bool blk_mq_can_use_cached_rq(struct request *rq, struct blk_plug *plug,
+               struct bio *bio)
 {
-       struct request *rq;
-       enum hctx_type type, hctx_type;
+       enum hctx_type type = blk_mq_get_hctx_type(bio->bi_opf);
+       enum hctx_type hctx_type = rq->mq_hctx->type;
 
-       if (!plug)
-               return NULL;
-       rq = rq_list_peek(&plug->cached_rq);
-       if (!rq || rq->q != q)
-               return NULL;
+       WARN_ON_ONCE(rq_list_peek(&plug->cached_rq) != rq);
 
-       if (blk_mq_attempt_bio_merge(q, *bio, nsegs)) {
-               *bio = NULL;
-               return NULL;
-       }
-
-       type = blk_mq_get_hctx_type((*bio)->bi_opf);
-       hctx_type = rq->mq_hctx->type;
        if (type != hctx_type &&
            !(type == HCTX_TYPE_READ && hctx_type == HCTX_TYPE_DEFAULT))
-               return NULL;
-       if (op_is_flush(rq->cmd_flags) != op_is_flush((*bio)->bi_opf))
-               return NULL;
+               return false;
+       if (op_is_flush(rq->cmd_flags) != op_is_flush(bio->bi_opf))
+               return false;
 
        /*
         * If any qos ->throttle() end up blocking, we will have flushed the
@@ -2914,12 +2899,12 @@ static inline struct request *blk_mq_get_cached_request(struct request_queue *q,
         * before we throttle.
         */
        plug->cached_rq = rq_list_next(rq);
-       rq_qos_throttle(q, *bio);
+       rq_qos_throttle(rq->q, bio);
 
        blk_mq_rq_time_init(rq, 0);
-       rq->cmd_flags = (*bio)->bi_opf;
+       rq->cmd_flags = bio->bi_opf;
        INIT_LIST_HEAD(&rq->queuelist);
-       return rq;
+       return true;
 }
 
 static void bio_set_ioprio(struct bio *bio)
@@ -2949,7 +2934,7 @@ void blk_mq_submit_bio(struct bio *bio)
        struct blk_plug *plug = blk_mq_plug(bio);
        const int is_sync = op_is_sync(bio->bi_opf);
        struct blk_mq_hw_ctx *hctx;
-       struct request *rq;
+       struct request *rq = NULL;
        unsigned int nr_segs = 1;
        blk_status_t ret;
 
@@ -2960,20 +2945,36 @@ void blk_mq_submit_bio(struct bio *bio)
                        return;
        }
 
-       if (!bio_integrity_prep(bio))
-               return;
-
        bio_set_ioprio(bio);
 
-       rq = blk_mq_get_cached_request(q, plug, &bio, nr_segs);
-       if (!rq) {
-               if (!bio)
+       if (plug) {
+               rq = rq_list_peek(&plug->cached_rq);
+               if (rq && rq->q != q)
+                       rq = NULL;
+       }
+       if (rq) {
+               if (!bio_integrity_prep(bio))
                        return;
-               rq = blk_mq_get_new_requests(q, plug, bio, nr_segs);
-               if (unlikely(!rq))
+               if (blk_mq_attempt_bio_merge(q, bio, nr_segs))
                        return;
+               if (blk_mq_can_use_cached_rq(rq, plug, bio))
+                       goto done;
+               percpu_ref_get(&q->q_usage_counter);
+       } else {
+               if (unlikely(bio_queue_enter(bio)))
+                       return;
+               if (!bio_integrity_prep(bio))
+                       goto fail;
+       }
+
+       rq = blk_mq_get_new_requests(q, plug, bio, nr_segs);
+       if (unlikely(!rq)) {
+fail:
+               blk_queue_exit(q);
+               return;
        }
 
+done:
        trace_block_getrq(bio);
 
        rq_qos_track(q, rq, bio);
index 6b72b2e..42e8420 100644 (file)
@@ -163,38 +163,15 @@ EXPORT_SYMBOL(blk_pre_runtime_resume);
  * @q: the queue of the device
  *
  * Description:
- *    For historical reasons, this routine merely calls blk_set_runtime_active()
- *    to do the real work of restarting the queue.  It does this regardless of
- *    whether the device's runtime-resume succeeded; even if it failed the
+ *    Restart the queue of a runtime suspended device. It does this regardless
+ *    of whether the device's runtime-resume succeeded; even if it failed the
  *    driver or error handler will need to communicate with the device.
  *
  *    This function should be called near the end of the device's
- *    runtime_resume callback.
+ *    runtime_resume callback to correct queue runtime PM status and re-enable
+ *    peeking requests from the queue.
  */
 void blk_post_runtime_resume(struct request_queue *q)
-{
-       blk_set_runtime_active(q);
-}
-EXPORT_SYMBOL(blk_post_runtime_resume);
-
-/**
- * blk_set_runtime_active - Force runtime status of the queue to be active
- * @q: the queue of the device
- *
- * If the device is left runtime suspended during system suspend the resume
- * hook typically resumes the device and corrects runtime status
- * accordingly. However, that does not affect the queue runtime PM status
- * which is still "suspended". This prevents processing requests from the
- * queue.
- *
- * This function can be used in driver's resume hook to correct queue
- * runtime PM status and re-enable peeking requests from the queue. It
- * should be called before first request is added to the queue.
- *
- * This function is also called by blk_post_runtime_resume() for
- * runtime resumes.  It does everything necessary to restart the queue.
- */
-void blk_set_runtime_active(struct request_queue *q)
 {
        int old_status;
 
@@ -211,4 +188,4 @@ void blk_set_runtime_active(struct request_queue *q)
        if (old_status != RPM_ACTIVE)
                blk_clear_pm_only(q);
 }
-EXPORT_SYMBOL(blk_set_runtime_active);
+EXPORT_SYMBOL(blk_post_runtime_resume);
index 13e4377..16f5766 100644 (file)
@@ -1320,6 +1320,7 @@ static void tg_conf_updated(struct throtl_grp *tg, bool global)
                   tg_bps_limit(tg, READ), tg_bps_limit(tg, WRITE),
                   tg_iops_limit(tg, READ), tg_iops_limit(tg, WRITE));
 
+       rcu_read_lock();
        /*
         * Update has_rules[] flags for the updated tg's subtree.  A tg is
         * considered to have rules if either the tg itself or any of its
@@ -1347,6 +1348,7 @@ static void tg_conf_updated(struct throtl_grp *tg, bool global)
                this_tg->latency_target = max(this_tg->latency_target,
                                parent_tg->latency_target);
        }
+       rcu_read_unlock();
 
        /*
         * We're already holding queue_lock and know @tg is valid.  Let's
index 5c0246b..4ccf199 100644 (file)
@@ -502,6 +502,16 @@ static int ivpu_boot_pwr_domain_enable(struct ivpu_device *vdev)
        return ret;
 }
 
+static int ivpu_boot_pwr_domain_disable(struct ivpu_device *vdev)
+{
+       ivpu_boot_dpu_active_drive(vdev, false);
+       ivpu_boot_pwr_island_isolation_drive(vdev, true);
+       ivpu_boot_pwr_island_trickle_drive(vdev, false);
+       ivpu_boot_pwr_island_drive(vdev, false);
+
+       return ivpu_boot_wait_for_pwr_island_status(vdev, 0x0);
+}
+
 static void ivpu_boot_no_snoop_enable(struct ivpu_device *vdev)
 {
        u32 val = REGV_RD32(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES);
@@ -600,25 +610,17 @@ static int ivpu_hw_37xx_info_init(struct ivpu_device *vdev)
 
 static int ivpu_hw_37xx_reset(struct ivpu_device *vdev)
 {
-       int ret;
-       u32 val;
-
-       if (IVPU_WA(punit_disabled))
-               return 0;
+       int ret = 0;
 
-       ret = REGB_POLL_FLD(VPU_37XX_BUTTRESS_VPU_IP_RESET, TRIGGER, 0, TIMEOUT_US);
-       if (ret) {
-               ivpu_err(vdev, "Timed out waiting for TRIGGER bit\n");
-               return ret;
+       if (ivpu_boot_pwr_domain_disable(vdev)) {
+               ivpu_err(vdev, "Failed to disable power domain\n");
+               ret = -EIO;
        }
 
-       val = REGB_RD32(VPU_37XX_BUTTRESS_VPU_IP_RESET);
-       val = REG_SET_FLD(VPU_37XX_BUTTRESS_VPU_IP_RESET, TRIGGER, val);
-       REGB_WR32(VPU_37XX_BUTTRESS_VPU_IP_RESET, val);
-
-       ret = REGB_POLL_FLD(VPU_37XX_BUTTRESS_VPU_IP_RESET, TRIGGER, 0, TIMEOUT_US);
-       if (ret)
-               ivpu_err(vdev, "Timed out waiting for RESET completion\n");
+       if (ivpu_pll_disable(vdev)) {
+               ivpu_err(vdev, "Failed to disable PLL\n");
+               ret = -EIO;
+       }
 
        return ret;
 }
@@ -651,10 +653,6 @@ static int ivpu_hw_37xx_power_up(struct ivpu_device *vdev)
 {
        int ret;
 
-       ret = ivpu_hw_37xx_reset(vdev);
-       if (ret)
-               ivpu_warn(vdev, "Failed to reset HW: %d\n", ret);
-
        ret = ivpu_hw_37xx_d0i3_disable(vdev);
        if (ret)
                ivpu_warn(vdev, "Failed to disable D0I3: %d\n", ret);
@@ -722,11 +720,11 @@ static int ivpu_hw_37xx_power_down(struct ivpu_device *vdev)
 {
        int ret = 0;
 
-       if (!ivpu_hw_37xx_is_idle(vdev) && ivpu_hw_37xx_reset(vdev))
-               ivpu_err(vdev, "Failed to reset the VPU\n");
+       if (!ivpu_hw_37xx_is_idle(vdev))
+               ivpu_warn(vdev, "VPU not idle during power down\n");
 
-       if (ivpu_pll_disable(vdev)) {
-               ivpu_err(vdev, "Failed to disable PLL\n");
+       if (ivpu_hw_37xx_reset(vdev)) {
+               ivpu_err(vdev, "Failed to reset VPU\n");
                ret = -EIO;
        }
 
index 0ace218..e9b16cb 100644 (file)
@@ -250,9 +250,6 @@ int ivpu_rpm_get_if_active(struct ivpu_device *vdev)
 {
        int ret;
 
-       ivpu_dbg(vdev, RPM, "rpm_get_if_active count %d\n",
-                atomic_read(&vdev->drm.dev->power.usage_count));
-
        ret = pm_runtime_get_if_active(vdev->drm.dev, false);
        drm_WARN_ON(&vdev->drm, ret < 0);
 
index 0b7a01f..d321ca7 100644 (file)
@@ -2031,7 +2031,7 @@ static int acpi_video_bus_add(struct acpi_device *device)
         * HP ZBook Fury 16 G10 requires ACPI video's child devices have _PS0
         * evaluated to have functional panel brightness control.
         */
-       acpi_device_fix_up_power_extended(device);
+       acpi_device_fix_up_power_children(device);
 
        pr_info("%s [%s] (multi-head: %s  rom: %s  post: %s)\n",
               ACPI_VIDEO_DEVICE_NAME, acpi_device_bid(device),
index f007116..3b4d048 100644 (file)
@@ -397,6 +397,19 @@ void acpi_device_fix_up_power_extended(struct acpi_device *adev)
 }
 EXPORT_SYMBOL_GPL(acpi_device_fix_up_power_extended);
 
+/**
+ * acpi_device_fix_up_power_children - Force a device's children into D0.
+ * @adev: Parent device object whose children's power state is to be fixed up.
+ *
+ * Call acpi_device_fix_up_power() for @adev's children so long as they
+ * are reported as present and enabled.
+ */
+void acpi_device_fix_up_power_children(struct acpi_device *adev)
+{
+       acpi_dev_for_each_child(adev, fix_up_power_if_applicable, NULL);
+}
+EXPORT_SYMBOL_GPL(acpi_device_fix_up_power_children);
+
 int acpi_device_update_power(struct acpi_device *device, int *state_p)
 {
        int state;
index 3a34a8c..55437f5 100644 (file)
@@ -592,7 +592,7 @@ static int acpi_idle_play_dead(struct cpuidle_device *dev, int index)
        while (1) {
 
                if (cx->entry_method == ACPI_CSTATE_HALT)
-                       safe_halt();
+                       raw_safe_halt();
                else if (cx->entry_method == ACPI_CSTATE_SYSTEMIO) {
                        io_idle(cx->address);
                } else
index 15a3bdb..9bd9f79 100644 (file)
@@ -447,6 +447,13 @@ static const struct dmi_system_id irq1_level_low_skip_override[] = {
                        DMI_MATCH(DMI_BOARD_NAME, "B1402CBA"),
                },
        },
+       {
+               /* Asus ExpertBook B1402CVA */
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+                       DMI_MATCH(DMI_BOARD_NAME, "B1402CVA"),
+               },
+       },
        {
                /* Asus ExpertBook B1502CBA */
                .matches = {
index 25a63d0..0f77e04 100644 (file)
@@ -82,6 +82,9 @@ static int isapnp_init_one(struct pnp_dev *idev, const struct pnp_device_id *dev
        if (pnp_port_valid(idev, 1)) {
                ctl_addr = devm_ioport_map(&idev->dev,
                                           pnp_port_start(idev, 1), 1);
+               if (!ctl_addr)
+                       return -ENOMEM;
+
                ap->ioaddr.altstatus_addr = ctl_addr;
                ap->ioaddr.ctl_addr = ctl_addr;
                ap->ops = &isapnp_port_ops;
index 855fdf5..b6414e1 100644 (file)
@@ -67,6 +67,7 @@ struct nbd_sock {
 struct recv_thread_args {
        struct work_struct work;
        struct nbd_device *nbd;
+       struct nbd_sock *nsock;
        int index;
 };
 
@@ -395,6 +396,22 @@ static u32 req_to_nbd_cmd_type(struct request *req)
        }
 }
 
+static struct nbd_config *nbd_get_config_unlocked(struct nbd_device *nbd)
+{
+       if (refcount_inc_not_zero(&nbd->config_refs)) {
+               /*
+                * Add smp_mb__after_atomic to ensure that reading nbd->config_refs
+                * and reading nbd->config is ordered. The pair is the barrier in
+                * nbd_alloc_and_init_config(), avoid nbd->config_refs is set
+                * before nbd->config.
+                */
+               smp_mb__after_atomic();
+               return nbd->config;
+       }
+
+       return NULL;
+}
+
 static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req)
 {
        struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req);
@@ -409,13 +426,13 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req)
                return BLK_EH_DONE;
        }
 
-       if (!refcount_inc_not_zero(&nbd->config_refs)) {
+       config = nbd_get_config_unlocked(nbd);
+       if (!config) {
                cmd->status = BLK_STS_TIMEOUT;
                __clear_bit(NBD_CMD_INFLIGHT, &cmd->flags);
                mutex_unlock(&cmd->lock);
                goto done;
        }
-       config = nbd->config;
 
        if (config->num_connections > 1 ||
            (config->num_connections == 1 && nbd->tag_set.timeout)) {
@@ -489,15 +506,9 @@ done:
        return BLK_EH_DONE;
 }
 
-/*
- *  Send or receive packet. Return a positive value on success and
- *  negtive value on failue, and never return 0.
- */
-static int sock_xmit(struct nbd_device *nbd, int index, int send,
-                    struct iov_iter *iter, int msg_flags, int *sent)
+static int __sock_xmit(struct nbd_device *nbd, struct socket *sock, int send,
+                      struct iov_iter *iter, int msg_flags, int *sent)
 {
-       struct nbd_config *config = nbd->config;
-       struct socket *sock = config->socks[index]->sock;
        int result;
        struct msghdr msg;
        unsigned int noreclaim_flag;
@@ -540,6 +551,19 @@ static int sock_xmit(struct nbd_device *nbd, int index, int send,
        return result;
 }
 
+/*
+ *  Send or receive packet. Return a positive value on success and
+ *  negtive value on failure, and never return 0.
+ */
+static int sock_xmit(struct nbd_device *nbd, int index, int send,
+                    struct iov_iter *iter, int msg_flags, int *sent)
+{
+       struct nbd_config *config = nbd->config;
+       struct socket *sock = config->socks[index]->sock;
+
+       return __sock_xmit(nbd, sock, send, iter, msg_flags, sent);
+}
+
 /*
  * Different settings for sk->sk_sndtimeo can result in different return values
  * if there is a signal pending when we enter sendmsg, because reasons?
@@ -696,7 +720,7 @@ out:
        return 0;
 }
 
-static int nbd_read_reply(struct nbd_device *nbd, int index,
+static int nbd_read_reply(struct nbd_device *nbd, struct socket *sock,
                          struct nbd_reply *reply)
 {
        struct kvec iov = {.iov_base = reply, .iov_len = sizeof(*reply)};
@@ -705,7 +729,7 @@ static int nbd_read_reply(struct nbd_device *nbd, int index,
 
        reply->magic = 0;
        iov_iter_kvec(&to, ITER_DEST, &iov, 1, sizeof(*reply));
-       result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL);
+       result = __sock_xmit(nbd, sock, 0, &to, MSG_WAITALL, NULL);
        if (result < 0) {
                if (!nbd_disconnected(nbd->config))
                        dev_err(disk_to_dev(nbd->disk),
@@ -829,14 +853,14 @@ static void recv_work(struct work_struct *work)
        struct nbd_device *nbd = args->nbd;
        struct nbd_config *config = nbd->config;
        struct request_queue *q = nbd->disk->queue;
-       struct nbd_sock *nsock;
+       struct nbd_sock *nsock = args->nsock;
        struct nbd_cmd *cmd;
        struct request *rq;
 
        while (1) {
                struct nbd_reply reply;
 
-               if (nbd_read_reply(nbd, args->index, &reply))
+               if (nbd_read_reply(nbd, nsock->sock, &reply))
                        break;
 
                /*
@@ -871,7 +895,6 @@ static void recv_work(struct work_struct *work)
                percpu_ref_put(&q->q_usage_counter);
        }
 
-       nsock = config->socks[args->index];
        mutex_lock(&nsock->tx_lock);
        nbd_mark_nsock_dead(nbd, nsock, 1);
        mutex_unlock(&nsock->tx_lock);
@@ -977,12 +1000,12 @@ static int nbd_handle_cmd(struct nbd_cmd *cmd, int index)
        struct nbd_sock *nsock;
        int ret;
 
-       if (!refcount_inc_not_zero(&nbd->config_refs)) {
+       config = nbd_get_config_unlocked(nbd);
+       if (!config) {
                dev_err_ratelimited(disk_to_dev(nbd->disk),
                                    "Socks array is empty\n");
                return -EINVAL;
        }
-       config = nbd->config;
 
        if (index >= config->num_connections) {
                dev_err_ratelimited(disk_to_dev(nbd->disk),
@@ -1215,6 +1238,7 @@ static int nbd_reconnect_socket(struct nbd_device *nbd, unsigned long arg)
                INIT_WORK(&args->work, recv_work);
                args->index = i;
                args->nbd = nbd;
+               args->nsock = nsock;
                nsock->cookie++;
                mutex_unlock(&nsock->tx_lock);
                sockfd_put(old);
@@ -1397,6 +1421,7 @@ static int nbd_start_device(struct nbd_device *nbd)
                refcount_inc(&nbd->config_refs);
                INIT_WORK(&args->work, recv_work);
                args->nbd = nbd;
+               args->nsock = config->socks[i];
                args->index = i;
                queue_work(nbd->recv_workq, &args->work);
        }
@@ -1530,17 +1555,20 @@ static int nbd_ioctl(struct block_device *bdev, blk_mode_t mode,
        return error;
 }
 
-static struct nbd_config *nbd_alloc_config(void)
+static int nbd_alloc_and_init_config(struct nbd_device *nbd)
 {
        struct nbd_config *config;
 
+       if (WARN_ON(nbd->config))
+               return -EINVAL;
+
        if (!try_module_get(THIS_MODULE))
-               return ERR_PTR(-ENODEV);
+               return -ENODEV;
 
        config = kzalloc(sizeof(struct nbd_config), GFP_NOFS);
        if (!config) {
                module_put(THIS_MODULE);
-               return ERR_PTR(-ENOMEM);
+               return -ENOMEM;
        }
 
        atomic_set(&config->recv_threads, 0);
@@ -1548,12 +1576,24 @@ static struct nbd_config *nbd_alloc_config(void)
        init_waitqueue_head(&config->conn_wait);
        config->blksize_bits = NBD_DEF_BLKSIZE_BITS;
        atomic_set(&config->live_connections, 0);
-       return config;
+
+       nbd->config = config;
+       /*
+        * Order refcount_set(&nbd->config_refs, 1) and nbd->config assignment,
+        * its pair is the barrier in nbd_get_config_unlocked().
+        * So nbd_get_config_unlocked() won't see nbd->config as null after
+        * refcount_inc_not_zero() succeed.
+        */
+       smp_mb__before_atomic();
+       refcount_set(&nbd->config_refs, 1);
+
+       return 0;
 }
 
 static int nbd_open(struct gendisk *disk, blk_mode_t mode)
 {
        struct nbd_device *nbd;
+       struct nbd_config *config;
        int ret = 0;
 
        mutex_lock(&nbd_index_mutex);
@@ -1566,27 +1606,25 @@ static int nbd_open(struct gendisk *disk, blk_mode_t mode)
                ret = -ENXIO;
                goto out;
        }
-       if (!refcount_inc_not_zero(&nbd->config_refs)) {
-               struct nbd_config *config;
 
+       config = nbd_get_config_unlocked(nbd);
+       if (!config) {
                mutex_lock(&nbd->config_lock);
                if (refcount_inc_not_zero(&nbd->config_refs)) {
                        mutex_unlock(&nbd->config_lock);
                        goto out;
                }
-               config = nbd_alloc_config();
-               if (IS_ERR(config)) {
-                       ret = PTR_ERR(config);
+               ret = nbd_alloc_and_init_config(nbd);
+               if (ret) {
                        mutex_unlock(&nbd->config_lock);
                        goto out;
                }
-               nbd->config = config;
-               refcount_set(&nbd->config_refs, 1);
+
                refcount_inc(&nbd->refs);
                mutex_unlock(&nbd->config_lock);
                if (max_part)
                        set_bit(GD_NEED_PART_SCAN, &disk->state);
-       } else if (nbd_disconnected(nbd->config)) {
+       } else if (nbd_disconnected(config)) {
                if (max_part)
                        set_bit(GD_NEED_PART_SCAN, &disk->state);
        }
@@ -1990,22 +2028,17 @@ again:
                pr_err("nbd%d already in use\n", index);
                return -EBUSY;
        }
-       if (WARN_ON(nbd->config)) {
-               mutex_unlock(&nbd->config_lock);
-               nbd_put(nbd);
-               return -EINVAL;
-       }
-       config = nbd_alloc_config();
-       if (IS_ERR(config)) {
+
+       ret = nbd_alloc_and_init_config(nbd);
+       if (ret) {
                mutex_unlock(&nbd->config_lock);
                nbd_put(nbd);
                pr_err("couldn't allocate config\n");
-               return PTR_ERR(config);
+               return ret;
        }
-       nbd->config = config;
-       refcount_set(&nbd->config_refs, 1);
-       set_bit(NBD_RT_BOUND, &config->runtime_flags);
 
+       config = nbd->config;
+       set_bit(NBD_RT_BOUND, &config->runtime_flags);
        ret = nbd_genl_size_set(info, nbd);
        if (ret)
                goto out;
@@ -2208,7 +2241,8 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info)
        }
        mutex_unlock(&nbd_index_mutex);
 
-       if (!refcount_inc_not_zero(&nbd->config_refs)) {
+       config = nbd_get_config_unlocked(nbd);
+       if (!config) {
                dev_err(nbd_to_dev(nbd),
                        "not configured, cannot reconfigure\n");
                nbd_put(nbd);
@@ -2216,7 +2250,6 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info)
        }
 
        mutex_lock(&nbd->config_lock);
-       config = nbd->config;
        if (!test_bit(NBD_RT_BOUND, &config->runtime_flags) ||
            !nbd->pid) {
                dev_err(nbd_to_dev(nbd),
index 22a3cf7..3021d58 100644 (file)
@@ -1464,19 +1464,13 @@ blk_status_t null_process_cmd(struct nullb_cmd *cmd, enum req_op op,
        return BLK_STS_OK;
 }
 
-static blk_status_t null_handle_cmd(struct nullb_cmd *cmd, sector_t sector,
-                                   sector_t nr_sectors, enum req_op op)
+static void null_handle_cmd(struct nullb_cmd *cmd, sector_t sector,
+                           sector_t nr_sectors, enum req_op op)
 {
        struct nullb_device *dev = cmd->nq->dev;
        struct nullb *nullb = dev->nullb;
        blk_status_t sts;
 
-       if (test_bit(NULLB_DEV_FL_THROTTLED, &dev->flags)) {
-               sts = null_handle_throttled(cmd);
-               if (sts != BLK_STS_OK)
-                       return sts;
-       }
-
        if (op == REQ_OP_FLUSH) {
                cmd->error = errno_to_blk_status(null_handle_flush(nullb));
                goto out;
@@ -1493,7 +1487,6 @@ static blk_status_t null_handle_cmd(struct nullb_cmd *cmd, sector_t sector,
 
 out:
        nullb_complete_cmd(cmd);
-       return BLK_STS_OK;
 }
 
 static enum hrtimer_restart nullb_bwtimer_fn(struct hrtimer *timer)
@@ -1724,8 +1717,6 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx,
        cmd->fake_timeout = should_timeout_request(rq) ||
                blk_should_fake_timeout(rq->q);
 
-       blk_mq_start_request(rq);
-
        if (should_requeue_request(rq)) {
                /*
                 * Alternate between hitting the core BUSY path, and the
@@ -1738,6 +1729,15 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx,
                return BLK_STS_OK;
        }
 
+       if (test_bit(NULLB_DEV_FL_THROTTLED, &nq->dev->flags)) {
+               blk_status_t sts = null_handle_throttled(cmd);
+
+               if (sts != BLK_STS_OK)
+                       return sts;
+       }
+
+       blk_mq_start_request(rq);
+
        if (is_poll) {
                spin_lock(&nq->poll_lock);
                list_add_tail(&rq->queuelist, &nq->poll_list);
@@ -1747,7 +1747,8 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx,
        if (cmd->fake_timeout)
                return BLK_STS_OK;
 
-       return null_handle_cmd(cmd, sector, nr_sectors, req_op(rq));
+       null_handle_cmd(cmd, sector, nr_sectors, req_op(rq));
+       return BLK_STS_OK;
 }
 
 static void null_queue_rqs(struct request **rqlist)
index a6dc399..442a0eb 100644 (file)
@@ -1093,9 +1093,10 @@ int dpll_nl_pin_id_get_doit(struct sk_buff *skb, struct genl_info *info)
                return -ENOMEM;
        hdr = genlmsg_put_reply(msg, info, &dpll_nl_family, 0,
                                DPLL_CMD_PIN_ID_GET);
-       if (!hdr)
+       if (!hdr) {
+               nlmsg_free(msg);
                return -EMSGSIZE;
-
+       }
        pin = dpll_pin_find_from_nlattr(info);
        if (!IS_ERR(pin)) {
                ret = dpll_msg_add_pin_handle(msg, pin);
@@ -1123,8 +1124,10 @@ int dpll_nl_pin_get_doit(struct sk_buff *skb, struct genl_info *info)
                return -ENOMEM;
        hdr = genlmsg_put_reply(msg, info, &dpll_nl_family, 0,
                                DPLL_CMD_PIN_GET);
-       if (!hdr)
+       if (!hdr) {
+               nlmsg_free(msg);
                return -EMSGSIZE;
+       }
        ret = dpll_cmd_pin_get_one(msg, pin, info->extack);
        if (ret) {
                nlmsg_free(msg);
@@ -1256,8 +1259,10 @@ int dpll_nl_device_id_get_doit(struct sk_buff *skb, struct genl_info *info)
                return -ENOMEM;
        hdr = genlmsg_put_reply(msg, info, &dpll_nl_family, 0,
                                DPLL_CMD_DEVICE_ID_GET);
-       if (!hdr)
+       if (!hdr) {
+               nlmsg_free(msg);
                return -EMSGSIZE;
+       }
 
        dpll = dpll_device_find_from_nlattr(info);
        if (!IS_ERR(dpll)) {
@@ -1284,8 +1289,10 @@ int dpll_nl_device_get_doit(struct sk_buff *skb, struct genl_info *info)
                return -ENOMEM;
        hdr = genlmsg_put_reply(msg, info, &dpll_nl_family, 0,
                                DPLL_CMD_DEVICE_GET);
-       if (!hdr)
+       if (!hdr) {
+               nlmsg_free(msg);
                return -EMSGSIZE;
+       }
 
        ret = dpll_device_get_one(dpll, msg, info->extack);
        if (ret) {
index 74d00b0..4a98a85 100644 (file)
@@ -131,7 +131,7 @@ config RASPBERRYPI_FIRMWARE
 
 config FW_CFG_SYSFS
        tristate "QEMU fw_cfg device support in sysfs"
-       depends on SYSFS && (ARM || ARM64 || PARISC || PPC_PMAC || SPARC || X86)
+       depends on SYSFS && (ARM || ARM64 || PARISC || PPC_PMAC || RISCV || SPARC || X86)
        depends on HAS_IOPORT_MAP
        default n
        help
index a69399a..1448f61 100644 (file)
@@ -211,7 +211,7 @@ static void fw_cfg_io_cleanup(void)
 
 /* arch-specific ctrl & data register offsets are not available in ACPI, DT */
 #if !(defined(FW_CFG_CTRL_OFF) && defined(FW_CFG_DATA_OFF))
-# if (defined(CONFIG_ARM) || defined(CONFIG_ARM64))
+# if (defined(CONFIG_ARM) || defined(CONFIG_ARM64) || defined(CONFIG_RISCV))
 #  define FW_CFG_CTRL_OFF 0x08
 #  define FW_CFG_DATA_OFF 0x00
 #  define FW_CFG_DMA_OFF 0x10
index afec099..9d92ca1 100644 (file)
@@ -248,6 +248,7 @@ extern int amdgpu_umsch_mm;
 extern int amdgpu_seamless;
 
 extern int amdgpu_user_partt_mode;
+extern int amdgpu_agp;
 
 #define AMDGPU_VM_MAX_NUM_CTX                  4096
 #define AMDGPU_SG_THRESHOLD                    (256*1024*1024)
index df3ecfa..e50be65 100644 (file)
@@ -207,7 +207,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
        }
 
        for (i = 0; i < p->nchunks; i++) {
-               struct drm_amdgpu_cs_chunk __user **chunk_ptr = NULL;
+               struct drm_amdgpu_cs_chunk __user *chunk_ptr = NULL;
                struct drm_amdgpu_cs_chunk user_chunk;
                uint32_t __user *cdata;
 
index 3095a3a..8f24cab 100644 (file)
@@ -207,6 +207,7 @@ int amdgpu_user_partt_mode = AMDGPU_AUTO_COMPUTE_PARTITION_MODE;
 int amdgpu_umsch_mm;
 int amdgpu_seamless = -1; /* auto */
 uint amdgpu_debug_mask;
+int amdgpu_agp = -1; /* auto */
 
 static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work);
 
@@ -961,6 +962,15 @@ module_param_named(seamless, amdgpu_seamless, int, 0444);
 MODULE_PARM_DESC(debug_mask, "debug options for amdgpu, disabled by default");
 module_param_named(debug_mask, amdgpu_debug_mask, uint, 0444);
 
+/**
+ * DOC: agp (int)
+ * Enable the AGP aperture.  This provides an aperture in the GPU's internal
+ * address space for direct access to system memory.  Note that these accesses
+ * are non-snooped, so they are only used for access to uncached memory.
+ */
+MODULE_PARM_DESC(agp, "AGP (-1 = auto (default), 0 = disable, 1 = enable)");
+module_param_named(agp, amdgpu_agp, int, 0444);
+
 /* These devices are not supported by amdgpu.
  * They are supported by the mach64, r128, radeon drivers
  */
index 32b701c..a21045d 100644 (file)
@@ -1473,6 +1473,11 @@ int psp_xgmi_get_topology_info(struct psp_context *psp,
                                topology->nodes[i].num_links = (requires_reflection && topology->nodes[i].num_links) ?
                                                                topology->nodes[i].num_links : node_num_links;
                        }
+                       /* popluate the connected port num info if supported and available */
+                       if (ta_port_num_support && topology->nodes[i].num_links) {
+                               memcpy(topology->nodes[i].port_num, link_extend_info_output->nodes[i].port_num,
+                                      sizeof(struct xgmi_connected_port_num) * TA_XGMI__MAX_PORT_NUM);
+                       }
 
                        /* reflect the topology information for bi-directionality */
                        if (requires_reflection && topology->nodes[i].num_hops)
index 5d36ad3..c4d9cbd 100644 (file)
@@ -150,6 +150,7 @@ struct psp_xgmi_node_info {
        uint8_t                                 is_sharing_enabled;
        enum ta_xgmi_assigned_sdma_engine       sdma_engine;
        uint8_t                                 num_links;
+       struct xgmi_connected_port_num          port_num[TA_XGMI__MAX_PORT_NUM];
 };
 
 struct psp_xgmi_topology_info {
index 84e5987..a3dc68e 100644 (file)
@@ -1188,7 +1188,7 @@ static int amdgpu_ras_query_error_status_helper(struct amdgpu_device *adev,
                        }
 
                        if (block_obj->hw_ops->query_ras_error_count)
-                               block_obj->hw_ops->query_ras_error_count(adev, &err_data);
+                               block_obj->hw_ops->query_ras_error_count(adev, err_data);
 
                        if ((info->head.block == AMDGPU_RAS_BLOCK__SDMA) ||
                            (info->head.block == AMDGPU_RAS_BLOCK__GFX) ||
index 65949cc..07d9303 100644 (file)
@@ -398,6 +398,7 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
  * amdgpu_uvd_entity_init - init entity
  *
  * @adev: amdgpu_device pointer
+ * @ring: amdgpu_ring pointer to check
  *
  * Initialize the entity used for handle management in the kernel driver.
  */
index 0954447..59acf42 100644 (file)
@@ -230,6 +230,7 @@ int amdgpu_vce_sw_fini(struct amdgpu_device *adev)
  * amdgpu_vce_entity_init - init entity
  *
  * @adev: amdgpu_device pointer
+ * @ring: amdgpu_ring pointer to check
  *
  * Initialize the entity used for handle management in the kernel driver.
  */
index 0ec7b06..a5a05c1 100644 (file)
@@ -675,7 +675,7 @@ static void gmc_v10_0_vram_gtt_location(struct amdgpu_device *adev,
        amdgpu_gmc_set_agp_default(adev, mc);
        amdgpu_gmc_vram_location(adev, &adev->gmc, base);
        amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_BEST_FIT);
-       if (!amdgpu_sriov_vf(adev))
+       if (!amdgpu_sriov_vf(adev) && (amdgpu_agp == 1))
                amdgpu_gmc_agp_location(adev, mc);
 
        /* base offset of vram pages */
index 6dce9b2..23d7b54 100644 (file)
@@ -640,8 +640,9 @@ static void gmc_v11_0_vram_gtt_location(struct amdgpu_device *adev,
        amdgpu_gmc_set_agp_default(adev, mc);
        amdgpu_gmc_vram_location(adev, &adev->gmc, base);
        amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_HIGH);
-       if (!amdgpu_sriov_vf(adev) ||
-           (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(11, 5, 0)))
+       if (!amdgpu_sriov_vf(adev) &&
+           (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(11, 5, 0)) &&
+           (amdgpu_agp == 1))
                amdgpu_gmc_agp_location(adev, mc);
 
        /* base offset of vram pages */
index bde25eb..2ac5820 100644 (file)
@@ -1630,7 +1630,7 @@ static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev,
        } else {
                amdgpu_gmc_vram_location(adev, mc, base);
                amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_BEST_FIT);
-               if (!amdgpu_sriov_vf(adev))
+               if (!amdgpu_sriov_vf(adev) && (amdgpu_agp == 1))
                        amdgpu_gmc_agp_location(adev, mc);
        }
        /* base offset of vram pages */
@@ -2170,8 +2170,6 @@ static int gmc_v9_0_sw_fini(void *handle)
 
        if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3))
                amdgpu_gmc_sysfs_fini(adev);
-       adev->gmc.num_mem_partitions = 0;
-       kfree(adev->gmc.mem_partitions);
 
        amdgpu_gmc_ras_fini(adev);
        amdgpu_gem_force_release(adev);
@@ -2185,6 +2183,9 @@ static int gmc_v9_0_sw_fini(void *handle)
        amdgpu_bo_free_kernel(&adev->gmc.pdb0_bo, NULL, &adev->gmc.ptr_pdb0);
        amdgpu_bo_fini(adev);
 
+       adev->gmc.num_mem_partitions = 0;
+       kfree(adev->gmc.mem_partitions);
+
        return 0;
 }
 
index ea14261..9b01467 100644 (file)
@@ -130,6 +130,9 @@ static void mmhub_v1_8_init_system_aperture_regs(struct amdgpu_device *adev)
        uint64_t value;
        int i;
 
+       if (amdgpu_sriov_vf(adev))
+               return;
+
        inst_mask = adev->aid_mask;
        for_each_inst(i, inst_mask) {
                /* Program the AGP BAR */
@@ -139,9 +142,6 @@ static void mmhub_v1_8_init_system_aperture_regs(struct amdgpu_device *adev)
                WREG32_SOC15(MMHUB, i, regMC_VM_AGP_TOP,
                             adev->gmc.agp_end >> 24);
 
-               if (amdgpu_sriov_vf(adev))
-                       return;
-
                /* Program the system aperture low logical page number. */
                WREG32_SOC15(MMHUB, i, regMC_VM_SYSTEM_APERTURE_LOW_ADDR,
                        min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
index 6f99f67..ee97814 100644 (file)
@@ -2079,7 +2079,7 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev)
        struct dmub_srv_create_params create_params;
        struct dmub_srv_region_params region_params;
        struct dmub_srv_region_info region_info;
-       struct dmub_srv_fb_params fb_params;
+       struct dmub_srv_memory_params memory_params;
        struct dmub_srv_fb_info *fb_info;
        struct dmub_srv *dmub_srv;
        const struct dmcub_firmware_header_v1_0 *hdr;
@@ -2182,6 +2182,7 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev)
                adev->dm.dmub_fw->data +
                le32_to_cpu(hdr->header.ucode_array_offset_bytes) +
                PSP_HEADER_BYTES;
+       region_params.is_mailbox_in_inbox = false;
 
        status = dmub_srv_calc_region_info(dmub_srv, &region_params,
                                           &region_info);
@@ -2205,10 +2206,10 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev)
                return r;
 
        /* Rebase the regions on the framebuffer address. */
-       memset(&fb_params, 0, sizeof(fb_params));
-       fb_params.cpu_addr = adev->dm.dmub_bo_cpu_addr;
-       fb_params.gpu_addr = adev->dm.dmub_bo_gpu_addr;
-       fb_params.region_info = &region_info;
+       memset(&memory_params, 0, sizeof(memory_params));
+       memory_params.cpu_fb_addr = adev->dm.dmub_bo_cpu_addr;
+       memory_params.gpu_fb_addr = adev->dm.dmub_bo_gpu_addr;
+       memory_params.region_info = &region_info;
 
        adev->dm.dmub_fb_info =
                kzalloc(sizeof(*adev->dm.dmub_fb_info), GFP_KERNEL);
@@ -2220,7 +2221,7 @@ static int dm_dmub_sw_init(struct amdgpu_device *adev)
                return -ENOMEM;
        }
 
-       status = dmub_srv_calc_fb_info(dmub_srv, &fb_params, fb_info);
+       status = dmub_srv_calc_mem_info(dmub_srv, &memory_params, fb_info);
        if (status != DMUB_STATUS_OK) {
                DRM_ERROR("Error calculating DMUB FB info: %d\n", status);
                return -EINVAL;
@@ -7481,6 +7482,9 @@ static int amdgpu_dm_i2c_xfer(struct i2c_adapter *i2c_adap,
        int i;
        int result = -EIO;
 
+       if (!ddc_service->ddc_pin || !ddc_service->ddc_pin->hw_info.hw_supported)
+               return result;
+
        cmd.payloads = kcalloc(num, sizeof(struct i2c_payload), GFP_KERNEL);
 
        if (!cmd.payloads)
@@ -9603,14 +9607,14 @@ static bool should_reset_plane(struct drm_atomic_state *state,
        struct drm_plane *other;
        struct drm_plane_state *old_other_state, *new_other_state;
        struct drm_crtc_state *new_crtc_state;
+       struct amdgpu_device *adev = drm_to_adev(plane->dev);
        int i;
 
        /*
-        * TODO: Remove this hack once the checks below are sufficient
-        * enough to determine when we need to reset all the planes on
-        * the stream.
+        * TODO: Remove this hack for all asics once it proves that the
+        * fast updates works fine on DCN3.2+.
         */
-       if (state->allow_modeset)
+       if (adev->ip_versions[DCE_HWIP][0] < IP_VERSION(3, 2, 0) && state->allow_modeset)
                return true;
 
        /* Exit early if we know that we're adding or removing the plane. */
index ed784cf..c7a29bb 100644 (file)
@@ -536,11 +536,8 @@ bool dm_helpers_dp_read_dpcd(
 
        struct amdgpu_dm_connector *aconnector = link->priv;
 
-       if (!aconnector) {
-               drm_dbg_dp(aconnector->base.dev,
-                          "Failed to find connector for link!\n");
+       if (!aconnector)
                return false;
-       }
 
        return drm_dp_dpcd_read(&aconnector->dm_dp_aux.aux, address, data,
                                size) == size;
index d3b13d3..11da0ee 100644 (file)
@@ -1604,31 +1604,31 @@ enum dc_status dm_dp_mst_is_port_support_mode(
        unsigned int upper_link_bw_in_kbps = 0, down_link_bw_in_kbps = 0;
        unsigned int max_compressed_bw_in_kbps = 0;
        struct dc_dsc_bw_range bw_range = {0};
-       struct drm_dp_mst_topology_mgr *mst_mgr;
+       uint16_t full_pbn = aconnector->mst_output_port->full_pbn;
 
        /*
-        * check if the mode could be supported if DSC pass-through is supported
-        * AND check if there enough bandwidth available to support the mode
-        * with DSC enabled.
+        * Consider the case with the depth of the mst topology tree is equal or less than 2
+        * A. When dsc bitstream can be transmitted along the entire path
+        *    1. dsc is possible between source and branch/leaf device (common dsc params is possible), AND
+        *    2. dsc passthrough supported at MST branch, or
+        *    3. dsc decoding supported at leaf MST device
+        *    Use maximum dsc compression as bw constraint
+        * B. When dsc bitstream cannot be transmitted along the entire path
+        *    Use native bw as bw constraint
         */
        if (is_dsc_common_config_possible(stream, &bw_range) &&
-           aconnector->mst_output_port->passthrough_aux) {
-               mst_mgr = aconnector->mst_output_port->mgr;
-               mutex_lock(&mst_mgr->lock);
-
+          (aconnector->mst_output_port->passthrough_aux ||
+           aconnector->dsc_aux == &aconnector->mst_output_port->aux)) {
                cur_link_settings = stream->link->verified_link_cap;
 
                upper_link_bw_in_kbps = dc_link_bandwidth_kbps(aconnector->dc_link,
-                                                              &cur_link_settings
-                                                              );
-               down_link_bw_in_kbps = kbps_from_pbn(aconnector->mst_output_port->full_pbn);
+                                                              &cur_link_settings);
+               down_link_bw_in_kbps = kbps_from_pbn(full_pbn);
 
                /* pick the bottleneck */
                end_to_end_bw_in_kbps = min(upper_link_bw_in_kbps,
                                            down_link_bw_in_kbps);
 
-               mutex_unlock(&mst_mgr->lock);
-
                /*
                 * use the maximum dsc compression bandwidth as the required
                 * bandwidth for the mode
@@ -1643,8 +1643,7 @@ enum dc_status dm_dp_mst_is_port_support_mode(
                /* check if mode could be supported within full_pbn */
                bpp = convert_dc_color_depth_into_bpc(stream->timing.display_color_depth) * 3;
                pbn = drm_dp_calc_pbn_mode(stream->timing.pix_clk_100hz / 10, bpp, false);
-
-               if (pbn > aconnector->mst_output_port->full_pbn)
+               if (pbn > full_pbn)
                        return DC_FAIL_BANDWIDTH_VALIDATE;
        }
 
index 0fa4fcd..507a7cf 100644 (file)
@@ -820,22 +820,22 @@ static void dcn35_set_idle_state(struct clk_mgr *clk_mgr_base, bool allow_idle)
 
        if (dc->config.disable_ips == DMUB_IPS_ENABLE ||
                dc->config.disable_ips == DMUB_IPS_DISABLE_DYNAMIC) {
-               val |= DMUB_IPS1_ALLOW_MASK;
-               val |= DMUB_IPS2_ALLOW_MASK;
-       } else if (dc->config.disable_ips == DMUB_IPS_DISABLE_IPS1) {
                val = val & ~DMUB_IPS1_ALLOW_MASK;
                val = val & ~DMUB_IPS2_ALLOW_MASK;
-       } else if (dc->config.disable_ips == DMUB_IPS_DISABLE_IPS2) {
-               val |= DMUB_IPS1_ALLOW_MASK;
-               val = val & ~DMUB_IPS2_ALLOW_MASK;
-       } else if (dc->config.disable_ips == DMUB_IPS_DISABLE_IPS2_Z10) {
+       } else if (dc->config.disable_ips == DMUB_IPS_DISABLE_IPS1) {
                val |= DMUB_IPS1_ALLOW_MASK;
                val |= DMUB_IPS2_ALLOW_MASK;
+       } else if (dc->config.disable_ips == DMUB_IPS_DISABLE_IPS2) {
+               val = val & ~DMUB_IPS1_ALLOW_MASK;
+               val |= DMUB_IPS2_ALLOW_MASK;
+       } else if (dc->config.disable_ips == DMUB_IPS_DISABLE_IPS2_Z10) {
+               val = val & ~DMUB_IPS1_ALLOW_MASK;
+               val = val & ~DMUB_IPS2_ALLOW_MASK;
        }
 
        if (!allow_idle) {
-               val = val & ~DMUB_IPS1_ALLOW_MASK;
-               val = val & ~DMUB_IPS2_ALLOW_MASK;
+               val |= DMUB_IPS1_ALLOW_MASK;
+               val |= DMUB_IPS2_ALLOW_MASK;
        }
 
        dcn35_smu_write_ips_scratch(clk_mgr, val);
index 7b9bf5c..76b47f1 100644 (file)
@@ -3178,7 +3178,7 @@ static bool update_planes_and_stream_state(struct dc *dc,
                        struct pipe_ctx *otg_master = resource_get_otg_master_for_stream(&context->res_ctx,
                                        context->streams[i]);
 
-                       if (otg_master->stream->test_pattern.type != DP_TEST_PATTERN_VIDEO_MODE)
+                       if (otg_master && otg_master->stream->test_pattern.type != DP_TEST_PATTERN_VIDEO_MODE)
                                resource_build_test_pattern_params(&context->res_ctx, otg_master);
                }
        }
@@ -4934,8 +4934,8 @@ bool dc_dmub_is_ips_idle_state(struct dc *dc)
        if (dc->hwss.get_idle_state)
                idle_state = dc->hwss.get_idle_state(dc);
 
-       if ((idle_state & DMUB_IPS1_ALLOW_MASK) ||
-               (idle_state & DMUB_IPS2_ALLOW_MASK))
+       if (!(idle_state & DMUB_IPS1_ALLOW_MASK) ||
+               !(idle_state & DMUB_IPS2_ALLOW_MASK))
                return true;
 
        return false;
index 1d48278..a1f1d10 100644 (file)
@@ -5190,6 +5190,9 @@ bool dc_resource_acquire_secondary_pipe_for_mpc_odm_legacy(
        sec_next = sec_pipe->next_odm_pipe;
        sec_prev = sec_pipe->prev_odm_pipe;
 
+       if (pri_pipe == NULL)
+               return false;
+
        *sec_pipe = *pri_pipe;
 
        sec_pipe->top_pipe = sec_top;
index e4c0072..0e07699 100644 (file)
@@ -1202,11 +1202,11 @@ void dc_dmub_srv_exit_low_power_state(const struct dc *dc)
                allow_state = dc->hwss.get_idle_state(dc);
                dc->hwss.set_idle_state(dc, false);
 
-               if (allow_state & DMUB_IPS2_ALLOW_MASK) {
+               if (!(allow_state & DMUB_IPS2_ALLOW_MASK)) {
                        // Wait for evaluation time
                        udelay(dc->debug.ips2_eval_delay_us);
                        commit_state = dc->hwss.get_idle_state(dc);
-                       if (commit_state & DMUB_IPS2_COMMIT_MASK) {
+                       if (!(commit_state & DMUB_IPS2_COMMIT_MASK)) {
                                // Tell PMFW to exit low power state
                                dc->clk_mgr->funcs->exit_low_power_state(dc->clk_mgr);
 
@@ -1216,7 +1216,7 @@ void dc_dmub_srv_exit_low_power_state(const struct dc *dc)
 
                                for (i = 0; i < max_num_polls; ++i) {
                                        commit_state = dc->hwss.get_idle_state(dc);
-                                       if (!(commit_state & DMUB_IPS2_COMMIT_MASK))
+                                       if (commit_state & DMUB_IPS2_COMMIT_MASK)
                                                break;
 
                                        udelay(1);
@@ -1235,10 +1235,10 @@ void dc_dmub_srv_exit_low_power_state(const struct dc *dc)
                }
 
                dc_dmub_srv_notify_idle(dc, false);
-               if (allow_state & DMUB_IPS1_ALLOW_MASK) {
+               if (!(allow_state & DMUB_IPS1_ALLOW_MASK)) {
                        for (i = 0; i < max_num_polls; ++i) {
                                commit_state = dc->hwss.get_idle_state(dc);
-                               if (!(commit_state & DMUB_IPS1_COMMIT_MASK))
+                               if (commit_state & DMUB_IPS1_COMMIT_MASK)
                                        break;
 
                                udelay(1);
index cea666e..fcb825e 100644 (file)
@@ -177,6 +177,7 @@ struct dc_panel_patch {
        unsigned int disable_fams;
        unsigned int skip_avmute;
        unsigned int mst_start_top_delay;
+       unsigned int remove_sink_ext_caps;
 };
 
 struct dc_edid_caps {
index 001f9eb..62a8f0b 100644 (file)
@@ -261,12 +261,6 @@ static void enc35_stream_encoder_enable(
                        /* invalid mode ! */
                        ASSERT_CRITICAL(false);
                }
-
-               REG_UPDATE(DIG_FE_CLK_CNTL, DIG_FE_CLK_EN, 1);
-               REG_UPDATE(DIG_FE_EN_CNTL, DIG_FE_ENABLE, 1);
-       } else {
-               REG_UPDATE(DIG_FE_EN_CNTL, DIG_FE_ENABLE, 0);
-               REG_UPDATE(DIG_FE_CLK_CNTL, DIG_FE_CLK_EN, 0);
        }
 }
 
@@ -436,6 +430,8 @@ static void enc35_disable_fifo(struct stream_encoder *enc)
        struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
 
        REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_ENABLE, 0);
+       REG_UPDATE(DIG_FE_EN_CNTL, DIG_FE_ENABLE, 0);
+       REG_UPDATE(DIG_FE_CLK_CNTL, DIG_FE_CLK_EN, 0);
 }
 
 static void enc35_enable_fifo(struct stream_encoder *enc)
@@ -443,6 +439,8 @@ static void enc35_enable_fifo(struct stream_encoder *enc)
        struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
 
        REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_READ_START_LEVEL, 0x7);
+       REG_UPDATE(DIG_FE_CLK_CNTL, DIG_FE_CLK_EN, 1);
+       REG_UPDATE(DIG_FE_EN_CNTL, DIG_FE_ENABLE, 1);
 
        enc35_reset_fifo(enc, true);
        enc35_reset_fifo(enc, false);
index d6f0f85..f2fe523 100644 (file)
@@ -1088,6 +1088,9 @@ static bool detect_link_and_local_sink(struct dc_link *link,
                if (sink->edid_caps.panel_patch.skip_scdc_overwrite)
                        link->ctx->dc->debug.hdmi20_disable = true;
 
+               if (sink->edid_caps.panel_patch.remove_sink_ext_caps)
+                       link->dpcd_sink_ext_caps.raw = 0;
+
                if (dc_is_hdmi_signal(link->connector_signal))
                        read_scdc_caps(link->ddc, link->local_sink);
 
index 9665ada..df63aa8 100644 (file)
@@ -195,6 +195,7 @@ struct dmub_srv_region_params {
        uint32_t vbios_size;
        const uint8_t *fw_inst_const;
        const uint8_t *fw_bss_data;
+       bool is_mailbox_in_inbox;
 };
 
 /**
@@ -214,20 +215,25 @@ struct dmub_srv_region_params {
  */
 struct dmub_srv_region_info {
        uint32_t fb_size;
+       uint32_t inbox_size;
        uint8_t num_regions;
        struct dmub_region regions[DMUB_WINDOW_TOTAL];
 };
 
 /**
- * struct dmub_srv_fb_params - parameters used for driver fb setup
+ * struct dmub_srv_memory_params - parameters used for driver fb setup
  * @region_info: region info calculated by dmub service
- * @cpu_addr: base cpu address for the framebuffer
- * @gpu_addr: base gpu virtual address for the framebuffer
+ * @cpu_fb_addr: base cpu address for the framebuffer
+ * @cpu_inbox_addr: base cpu address for the gart
+ * @gpu_fb_addr: base gpu virtual address for the framebuffer
+ * @gpu_inbox_addr: base gpu virtual address for the gart
  */
-struct dmub_srv_fb_params {
+struct dmub_srv_memory_params {
        const struct dmub_srv_region_info *region_info;
-       void *cpu_addr;
-       uint64_t gpu_addr;
+       void *cpu_fb_addr;
+       void *cpu_inbox_addr;
+       uint64_t gpu_fb_addr;
+       uint64_t gpu_inbox_addr;
 };
 
 /**
@@ -563,8 +569,8 @@ dmub_srv_calc_region_info(struct dmub_srv *dmub,
  *   DMUB_STATUS_OK - success
  *   DMUB_STATUS_INVALID - unspecified error
  */
-enum dmub_status dmub_srv_calc_fb_info(struct dmub_srv *dmub,
-                                      const struct dmub_srv_fb_params *params,
+enum dmub_status dmub_srv_calc_mem_info(struct dmub_srv *dmub,
+                                      const struct dmub_srv_memory_params *params,
                                       struct dmub_srv_fb_info *out);
 
 /**
index e43e8d4..22fc4ba 100644 (file)
@@ -434,7 +434,7 @@ dmub_srv_calc_region_info(struct dmub_srv *dmub,
        uint32_t fw_state_size = DMUB_FW_STATE_SIZE;
        uint32_t trace_buffer_size = DMUB_TRACE_BUFFER_SIZE;
        uint32_t scratch_mem_size = DMUB_SCRATCH_MEM_SIZE;
-
+       uint32_t previous_top = 0;
        if (!dmub->sw_init)
                return DMUB_STATUS_INVALID;
 
@@ -459,8 +459,15 @@ dmub_srv_calc_region_info(struct dmub_srv *dmub,
        bios->base = dmub_align(stack->top, 256);
        bios->top = bios->base + params->vbios_size;
 
-       mail->base = dmub_align(bios->top, 256);
-       mail->top = mail->base + DMUB_MAILBOX_SIZE;
+       if (params->is_mailbox_in_inbox) {
+               mail->base = 0;
+               mail->top = mail->base + DMUB_MAILBOX_SIZE;
+               previous_top = bios->top;
+       } else {
+               mail->base = dmub_align(bios->top, 256);
+               mail->top = mail->base + DMUB_MAILBOX_SIZE;
+               previous_top = mail->top;
+       }
 
        fw_info = dmub_get_fw_meta_info(params);
 
@@ -479,7 +486,7 @@ dmub_srv_calc_region_info(struct dmub_srv *dmub,
                        dmub->fw_version = fw_info->fw_version;
        }
 
-       trace_buff->base = dmub_align(mail->top, 256);
+       trace_buff->base = dmub_align(previous_top, 256);
        trace_buff->top = trace_buff->base + dmub_align(trace_buffer_size, 64);
 
        fw_state->base = dmub_align(trace_buff->top, 256);
@@ -490,11 +497,14 @@ dmub_srv_calc_region_info(struct dmub_srv *dmub,
 
        out->fb_size = dmub_align(scratch_mem->top, 4096);
 
+       if (params->is_mailbox_in_inbox)
+               out->inbox_size = dmub_align(mail->top, 4096);
+
        return DMUB_STATUS_OK;
 }
 
-enum dmub_status dmub_srv_calc_fb_info(struct dmub_srv *dmub,
-                                      const struct dmub_srv_fb_params *params,
+enum dmub_status dmub_srv_calc_mem_info(struct dmub_srv *dmub,
+                                      const struct dmub_srv_memory_params *params,
                                       struct dmub_srv_fb_info *out)
 {
        uint8_t *cpu_base;
@@ -509,8 +519,8 @@ enum dmub_status dmub_srv_calc_fb_info(struct dmub_srv *dmub,
        if (params->region_info->num_regions != DMUB_NUM_WINDOWS)
                return DMUB_STATUS_INVALID;
 
-       cpu_base = (uint8_t *)params->cpu_addr;
-       gpu_base = params->gpu_addr;
+       cpu_base = (uint8_t *)params->cpu_fb_addr;
+       gpu_base = params->gpu_fb_addr;
 
        for (i = 0; i < DMUB_NUM_WINDOWS; ++i) {
                const struct dmub_region *reg =
@@ -518,6 +528,12 @@ enum dmub_status dmub_srv_calc_fb_info(struct dmub_srv *dmub,
 
                out->fb[i].cpu_addr = cpu_base + reg->base;
                out->fb[i].gpu_addr = gpu_base + reg->base;
+
+               if (i == DMUB_WINDOW_4_MAILBOX && params->cpu_inbox_addr != 0) {
+                       out->fb[i].cpu_addr = (uint8_t *)params->cpu_inbox_addr + reg->base;
+                       out->fb[i].gpu_addr = params->gpu_inbox_addr + reg->base;
+               }
+
                out->fb[i].size = reg->top - reg->base;
        }
 
@@ -707,9 +723,16 @@ enum dmub_status dmub_srv_sync_inbox1(struct dmub_srv *dmub)
                return DMUB_STATUS_INVALID;
 
        if (dmub->hw_funcs.get_inbox1_rptr && dmub->hw_funcs.get_inbox1_wptr) {
-               dmub->inbox1_rb.rptr = dmub->hw_funcs.get_inbox1_rptr(dmub);
-               dmub->inbox1_rb.wrpt = dmub->hw_funcs.get_inbox1_wptr(dmub);
-               dmub->inbox1_last_wptr = dmub->inbox1_rb.wrpt;
+               uint32_t rptr = dmub->hw_funcs.get_inbox1_rptr(dmub);
+               uint32_t wptr = dmub->hw_funcs.get_inbox1_wptr(dmub);
+
+               if (rptr > dmub->inbox1_rb.capacity || wptr > dmub->inbox1_rb.capacity) {
+                       return DMUB_STATUS_HW_FAILURE;
+               } else {
+                       dmub->inbox1_rb.rptr = rptr;
+                       dmub->inbox1_rb.wrpt = wptr;
+                       dmub->inbox1_last_wptr = dmub->inbox1_rb.wrpt;
+               }
        }
 
        return DMUB_STATUS_OK;
@@ -743,6 +766,11 @@ enum dmub_status dmub_srv_cmd_queue(struct dmub_srv *dmub,
        if (!dmub->hw_init)
                return DMUB_STATUS_INVALID;
 
+       if (dmub->inbox1_rb.rptr > dmub->inbox1_rb.capacity ||
+           dmub->inbox1_rb.wrpt > dmub->inbox1_rb.capacity) {
+               return DMUB_STATUS_HW_FAILURE;
+       }
+
        if (dmub_rb_push_front(&dmub->inbox1_rb, cmd))
                return DMUB_STATUS_OK;
 
index dab35d8..fef2d29 100644 (file)
@@ -123,7 +123,7 @@ typedef enum {
   VOLTAGE_GUARDBAND_COUNT
 } GFX_GUARDBAND_e;
 
-#define SMU_METRICS_TABLE_VERSION 0x8
+#define SMU_METRICS_TABLE_VERSION 0x9
 
 typedef struct __attribute__((packed, aligned(4))) {
   uint32_t AccumulationCounter;
@@ -211,6 +211,14 @@ typedef struct __attribute__((packed, aligned(4))) {
   //XGMI Data tranfser size
   uint64_t XgmiReadDataSizeAcc[8];//in KByte
   uint64_t XgmiWriteDataSizeAcc[8];//in KByte
+
+  //PCIE BW Data and error count
+  uint32_t PcieBandwidth[4];
+  uint32_t PCIeL0ToRecoveryCountAcc;      // The Pcie counter itself is accumulated
+  uint32_t PCIenReplayAAcc;               // The Pcie counter itself is accumulated
+  uint32_t PCIenReplayARolloverCountAcc;  // The Pcie counter itself is accumulated
+  uint32_t PCIeNAKSentCountAcc;           // The Pcie counter itself is accumulated
+  uint32_t PCIeNAKReceivedCountAcc;       // The Pcie counter itself is accumulated
 } MetricsTable_t;
 
 #define SMU_VF_METRICS_TABLE_VERSION 0x3
index 891605d..0e5a77c 100644 (file)
@@ -1454,7 +1454,7 @@ static int smu_v13_0_6_register_irq_handler(struct smu_context *smu)
 
 static int smu_v13_0_6_notify_unload(struct smu_context *smu)
 {
-       if (smu->smc_fw_version <= 0x553500)
+       if (amdgpu_in_reset(smu->adev))
                return 0;
 
        dev_dbg(smu->adev->dev, "Notify PMFW about driver unload");
@@ -2095,6 +2095,14 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table
                        smu_v13_0_6_get_current_pcie_link_speed(smu);
                gpu_metrics->pcie_bandwidth_acc =
                                SMUQ10_ROUND(metrics->PcieBandwidthAcc[0]);
+               gpu_metrics->pcie_bandwidth_inst =
+                               SMUQ10_ROUND(metrics->PcieBandwidth[0]);
+               gpu_metrics->pcie_l0_to_recov_count_acc =
+                               metrics->PCIeL0ToRecoveryCountAcc;
+               gpu_metrics->pcie_replay_count_acc =
+                               metrics->PCIenReplayAAcc;
+               gpu_metrics->pcie_replay_rover_count_acc =
+                               metrics->PCIenReplayARolloverCountAcc;
        }
 
        gpu_metrics->system_clock_counter = ktime_get_boottime_ns();
index 2aee323..772f3b0 100644 (file)
@@ -174,6 +174,17 @@ to_ast_sil164_connector(struct drm_connector *connector)
        return container_of(connector, struct ast_sil164_connector, base);
 }
 
+struct ast_bmc_connector {
+       struct drm_connector base;
+       struct drm_connector *physical_connector;
+};
+
+static inline struct ast_bmc_connector *
+to_ast_bmc_connector(struct drm_connector *connector)
+{
+       return container_of(connector, struct ast_bmc_connector, base);
+}
+
 /*
  * Device
  */
@@ -218,7 +229,7 @@ struct ast_device {
                } astdp;
                struct {
                        struct drm_encoder encoder;
-                       struct drm_connector connector;
+                       struct ast_bmc_connector bmc_connector;
                } bmc;
        } output;
 
index cb96149..c20534d 100644 (file)
@@ -1767,6 +1767,30 @@ static const struct drm_encoder_funcs ast_bmc_encoder_funcs = {
        .destroy = drm_encoder_cleanup,
 };
 
+static int ast_bmc_connector_helper_detect_ctx(struct drm_connector *connector,
+                                              struct drm_modeset_acquire_ctx *ctx,
+                                              bool force)
+{
+       struct ast_bmc_connector *bmc_connector = to_ast_bmc_connector(connector);
+       struct drm_connector *physical_connector = bmc_connector->physical_connector;
+
+       /*
+        * Most user-space compositors cannot handle more than one connected
+        * connector per CRTC. Hence, we only mark the BMC as connected if the
+        * physical connector is disconnected. If the physical connector's status
+        * is connected or unknown, the BMC remains disconnected. This has no
+        * effect on the output of the BMC.
+        *
+        * FIXME: Remove this logic once user-space compositors can handle more
+        *        than one connector per CRTC. The BMC should always be connected.
+        */
+
+       if (physical_connector && physical_connector->status == connector_status_disconnected)
+               return connector_status_connected;
+
+       return connector_status_disconnected;
+}
+
 static int ast_bmc_connector_helper_get_modes(struct drm_connector *connector)
 {
        return drm_add_modes_noedid(connector, 4096, 4096);
@@ -1774,6 +1798,7 @@ static int ast_bmc_connector_helper_get_modes(struct drm_connector *connector)
 
 static const struct drm_connector_helper_funcs ast_bmc_connector_helper_funcs = {
        .get_modes = ast_bmc_connector_helper_get_modes,
+       .detect_ctx = ast_bmc_connector_helper_detect_ctx,
 };
 
 static const struct drm_connector_funcs ast_bmc_connector_funcs = {
@@ -1784,12 +1809,33 @@ static const struct drm_connector_funcs ast_bmc_connector_funcs = {
        .atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
 };
 
-static int ast_bmc_output_init(struct ast_device *ast)
+static int ast_bmc_connector_init(struct drm_device *dev,
+                                 struct ast_bmc_connector *bmc_connector,
+                                 struct drm_connector *physical_connector)
+{
+       struct drm_connector *connector = &bmc_connector->base;
+       int ret;
+
+       ret = drm_connector_init(dev, connector, &ast_bmc_connector_funcs,
+                                DRM_MODE_CONNECTOR_VIRTUAL);
+       if (ret)
+               return ret;
+
+       drm_connector_helper_add(connector, &ast_bmc_connector_helper_funcs);
+
+       bmc_connector->physical_connector = physical_connector;
+
+       return 0;
+}
+
+static int ast_bmc_output_init(struct ast_device *ast,
+                              struct drm_connector *physical_connector)
 {
        struct drm_device *dev = &ast->base;
        struct drm_crtc *crtc = &ast->crtc;
        struct drm_encoder *encoder = &ast->output.bmc.encoder;
-       struct drm_connector *connector = &ast->output.bmc.connector;
+       struct ast_bmc_connector *bmc_connector = &ast->output.bmc.bmc_connector;
+       struct drm_connector *connector = &bmc_connector->base;
        int ret;
 
        ret = drm_encoder_init(dev, encoder,
@@ -1799,13 +1845,10 @@ static int ast_bmc_output_init(struct ast_device *ast)
                return ret;
        encoder->possible_crtcs = drm_crtc_mask(crtc);
 
-       ret = drm_connector_init(dev, connector, &ast_bmc_connector_funcs,
-                                DRM_MODE_CONNECTOR_VIRTUAL);
+       ret = ast_bmc_connector_init(dev, bmc_connector, physical_connector);
        if (ret)
                return ret;
 
-       drm_connector_helper_add(connector, &ast_bmc_connector_helper_funcs);
-
        ret = drm_connector_attach_encoder(connector, encoder);
        if (ret)
                return ret;
@@ -1864,6 +1907,7 @@ static const struct drm_mode_config_funcs ast_mode_config_funcs = {
 int ast_mode_config_init(struct ast_device *ast)
 {
        struct drm_device *dev = &ast->base;
+       struct drm_connector *physical_connector = NULL;
        int ret;
 
        ret = drmm_mode_config_init(dev);
@@ -1904,23 +1948,27 @@ int ast_mode_config_init(struct ast_device *ast)
                ret = ast_vga_output_init(ast);
                if (ret)
                        return ret;
+               physical_connector = &ast->output.vga.vga_connector.base;
        }
        if (ast->tx_chip_types & AST_TX_SIL164_BIT) {
                ret = ast_sil164_output_init(ast);
                if (ret)
                        return ret;
+               physical_connector = &ast->output.sil164.sil164_connector.base;
        }
        if (ast->tx_chip_types & AST_TX_DP501_BIT) {
                ret = ast_dp501_output_init(ast);
                if (ret)
                        return ret;
+               physical_connector = &ast->output.dp501.connector;
        }
        if (ast->tx_chip_types & AST_TX_ASTDP_BIT) {
                ret = ast_astdp_output_init(ast);
                if (ret)
                        return ret;
+               physical_connector = &ast->output.astdp.connector;
        }
-       ret = ast_bmc_output_init(ast);
+       ret = ast_bmc_output_init(ast, physical_connector);
        if (ret)
                return ret;
 
index d8856d1..e9994c9 100644 (file)
@@ -5,7 +5,7 @@ termcolor==2.3.0
 certifi==2023.7.22
 charset-normalizer==3.2.0
 idna==3.4
-pip==23.2.1
+pip==23.3
 python-gitlab==3.15.0
 requests==2.31.0
 requests-toolbelt==1.0.0
@@ -13,5 +13,5 @@ ruamel.yaml==0.17.32
 ruamel.yaml.clib==0.2.7
 setuptools==68.0.0
 tenacity==8.2.3
-urllib3==2.0.4
-wheel==0.41.1
\ No newline at end of file
+urllib3==2.0.7
+wheel==0.41.1
index d5c1529..3d92f66 100644 (file)
@@ -336,6 +336,12 @@ static const struct dmi_system_id orientation_data[] = {
                  DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "IdeaPad Duet 3 10IGL5"),
                },
                .driver_data = (void *)&lcd1200x1920_rightside_up,
+       }, {    /* Lenovo Legion Go 8APU1 */
+               .matches = {
+                 DMI_EXACT_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+                 DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "Legion Go 8APU1"),
+               },
+               .driver_data = (void *)&lcd1600x2560_leftside_up,
        }, {    /* Lenovo Yoga Book X90F / X90L */
                .matches = {
                  DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Intel Corporation"),
index 7b4628f..851b312 100644 (file)
@@ -1161,6 +1161,14 @@ static struct drm_connector *intel_dp_add_mst_connector(struct drm_dp_mst_topolo
        intel_connector->port = port;
        drm_dp_mst_get_port_malloc(port);
 
+       /*
+        * TODO: set the AUX for the actual MST port decompressing the stream.
+        * At the moment the driver only supports enabling this globally in the
+        * first downstream MST branch, via intel_dp's (root port) AUX.
+        */
+       intel_connector->dp.dsc_decompression_aux = &intel_dp->aux;
+       intel_dp_mst_read_decompression_port_dsc_caps(intel_dp, intel_connector);
+
        connector = &intel_connector->base;
        ret = drm_connector_init(dev, connector, &intel_dp_mst_connector_funcs,
                                 DRM_MODE_CONNECTOR_DisplayPort);
@@ -1172,14 +1180,6 @@ static struct drm_connector *intel_dp_add_mst_connector(struct drm_dp_mst_topolo
 
        drm_connector_helper_add(connector, &intel_dp_mst_connector_helper_funcs);
 
-       /*
-        * TODO: set the AUX for the actual MST port decompressing the stream.
-        * At the moment the driver only supports enabling this globally in the
-        * first downstream MST branch, via intel_dp's (root port) AUX.
-        */
-       intel_connector->dp.dsc_decompression_aux = &intel_dp->aux;
-       intel_dp_mst_read_decompression_port_dsc_caps(intel_dp, intel_connector);
-
        for_each_pipe(dev_priv, pipe) {
                struct drm_encoder *enc =
                        &intel_dp->mst_encoders[pipe]->base.base;
index ed32bf5..ba1186f 100644 (file)
@@ -982,8 +982,6 @@ int intel_gt_probe_all(struct drm_i915_private *i915)
 
 err:
        i915_probe_error(i915, "Failed to initialize %s! (%d)\n", gtdef->name, ret);
-       intel_gt_release_all(i915);
-
        return ret;
 }
 
@@ -1002,15 +1000,6 @@ int intel_gt_tiles_init(struct drm_i915_private *i915)
        return 0;
 }
 
-void intel_gt_release_all(struct drm_i915_private *i915)
-{
-       struct intel_gt *gt;
-       unsigned int id;
-
-       for_each_gt(gt, i915, id)
-               i915->gt[id] = NULL;
-}
-
 void intel_gt_info_print(const struct intel_gt_info *info,
                         struct drm_printer *p)
 {
index 8a0e2c7..802de2c 100644 (file)
@@ -782,7 +782,7 @@ int i915_driver_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
        ret = i915_driver_mmio_probe(i915);
        if (ret < 0)
-               goto out_tiles_cleanup;
+               goto out_runtime_pm_put;
 
        ret = i915_driver_hw_probe(i915);
        if (ret < 0)
@@ -842,8 +842,6 @@ out_cleanup_hw:
        i915_ggtt_driver_late_release(i915);
 out_cleanup_mmio:
        i915_driver_mmio_release(i915);
-out_tiles_cleanup:
-       intel_gt_release_all(i915);
 out_runtime_pm_put:
        enable_rpm_wakeref_asserts(&i915->runtime_pm);
        i915_driver_late_release(i915);
index 1ccd1ed..4c05287 100644 (file)
@@ -406,6 +406,7 @@ static const struct dpu_perf_cfg sc8280xp_perf_data = {
        .min_llcc_ib = 0,
        .min_dram_ib = 800000,
        .danger_lut_tbl = {0xf, 0xffff, 0x0},
+       .safe_lut_tbl = {0xfe00, 0xfe00, 0xffff},
        .qos_lut_tbl = {
                {.nentry = ARRAY_SIZE(sc8180x_qos_linear),
                .entries = sc8180x_qos_linear
index 11d9fc2..ec933d5 100644 (file)
@@ -844,8 +844,7 @@ static int mdp5_init(struct platform_device *pdev, struct drm_device *dev)
 
        return 0;
 fail:
-       if (mdp5_kms)
-               mdp5_destroy(mdp5_kms);
+       mdp5_destroy(mdp5_kms);
        return ret;
 }
 
index e329e03..1b88fb5 100644 (file)
@@ -365,9 +365,11 @@ static int dp_display_send_hpd_notification(struct dp_display_private *dp,
        /* reset video pattern flag on disconnect */
        if (!hpd) {
                dp->panel->video_test = false;
-               drm_dp_set_subconnector_property(dp->dp_display.connector,
-                                                connector_status_disconnected,
-                                                dp->panel->dpcd, dp->panel->downstream_ports);
+               if (!dp->dp_display.is_edp)
+                       drm_dp_set_subconnector_property(dp->dp_display.connector,
+                                                        connector_status_disconnected,
+                                                        dp->panel->dpcd,
+                                                        dp->panel->downstream_ports);
        }
 
        dp->dp_display.is_connected = hpd;
@@ -396,8 +398,11 @@ static int dp_display_process_hpd_high(struct dp_display_private *dp)
 
        dp_link_process_request(dp->link);
 
-       drm_dp_set_subconnector_property(dp->dp_display.connector, connector_status_connected,
-                                        dp->panel->dpcd, dp->panel->downstream_ports);
+       if (!dp->dp_display.is_edp)
+               drm_dp_set_subconnector_property(dp->dp_display.connector,
+                                                connector_status_connected,
+                                                dp->panel->dpcd,
+                                                dp->panel->downstream_ports);
 
        edid = dp->panel->edid;
 
index 40e7344..e3bdd7d 100644 (file)
@@ -345,6 +345,9 @@ struct drm_connector *dp_drm_connector_init(struct msm_dp *dp_display, struct dr
        if (IS_ERR(connector))
                return connector;
 
+       if (!dp_display->is_edp)
+               drm_connector_attach_dp_subconnector_property(connector);
+
        drm_connector_attach_encoder(connector, encoder);
 
        return connector;
index 3b1ed02..89a6344 100644 (file)
@@ -918,7 +918,7 @@ static int dsi_7nm_phy_enable(struct msm_dsi_phy *phy,
        if ((phy->cfg->quirks & DSI_PHY_7NM_QUIRK_V5_2)) {
                if (phy->cphy_mode) {
                        vreg_ctrl_0 = 0x45;
-                       vreg_ctrl_1 = 0x45;
+                       vreg_ctrl_1 = 0x41;
                        glbl_rescode_top_ctrl = 0x00;
                        glbl_rescode_bot_ctrl = 0x00;
                } else {
index 2aae7d1..3f217b5 100644 (file)
@@ -288,8 +288,6 @@ static int msm_drm_init(struct device *dev, const struct drm_driver *drv)
        if (ret)
                goto err_msm_uninit;
 
-       drm_kms_helper_poll_init(ddev);
-
        if (priv->kms_init) {
                drm_kms_helper_poll_init(ddev);
                msm_fbdev_setup(ddev);
index 82b267c..460459a 100644 (file)
@@ -14,7 +14,7 @@ struct nvkm_event {
        int index_nr;
 
        spinlock_t refs_lock;
-       spinlock_t list_lock;
+       rwlock_t list_lock;
        int *refs;
 
        struct list_head ntfy;
@@ -38,7 +38,7 @@ nvkm_event_init(const struct nvkm_event_func *func, struct nvkm_subdev *subdev,
                int types_nr, int index_nr, struct nvkm_event *event)
 {
        spin_lock_init(&event->refs_lock);
-       spin_lock_init(&event->list_lock);
+       rwlock_init(&event->list_lock);
        return __nvkm_event_init(func, subdev, types_nr, index_nr, event);
 }
 
index d8c9252..f28f9a8 100644 (file)
@@ -726,6 +726,11 @@ nouveau_display_create(struct drm_device *dev)
 
        if (nouveau_modeset != 2) {
                ret = nvif_disp_ctor(&drm->client.device, "kmsDisp", 0, &disp->disp);
+               /* no display hw */
+               if (ret == -ENODEV) {
+                       ret = 0;
+                       goto disp_create_err;
+               }
 
                if (!ret && (disp->disp.outp_mask || drm->vbios.dcb.entries)) {
                        nouveau_display_create_properties(dev);
index a6c8771..61fed77 100644 (file)
@@ -81,17 +81,17 @@ nvkm_event_ntfy_state(struct nvkm_event_ntfy *ntfy)
 static void
 nvkm_event_ntfy_remove(struct nvkm_event_ntfy *ntfy)
 {
-       spin_lock_irq(&ntfy->event->list_lock);
+       write_lock_irq(&ntfy->event->list_lock);
        list_del_init(&ntfy->head);
-       spin_unlock_irq(&ntfy->event->list_lock);
+       write_unlock_irq(&ntfy->event->list_lock);
 }
 
 static void
 nvkm_event_ntfy_insert(struct nvkm_event_ntfy *ntfy)
 {
-       spin_lock_irq(&ntfy->event->list_lock);
+       write_lock_irq(&ntfy->event->list_lock);
        list_add_tail(&ntfy->head, &ntfy->event->ntfy);
-       spin_unlock_irq(&ntfy->event->list_lock);
+       write_unlock_irq(&ntfy->event->list_lock);
 }
 
 static void
@@ -176,7 +176,7 @@ nvkm_event_ntfy(struct nvkm_event *event, int id, u32 bits)
                return;
 
        nvkm_trace(event->subdev, "event: ntfy %08x on %d\n", bits, id);
-       spin_lock_irqsave(&event->list_lock, flags);
+       read_lock_irqsave(&event->list_lock, flags);
 
        list_for_each_entry_safe(ntfy, ntmp, &event->ntfy, head) {
                if (ntfy->id == id && ntfy->bits & bits) {
@@ -185,7 +185,7 @@ nvkm_event_ntfy(struct nvkm_event *event, int id, u32 bits)
                }
        }
 
-       spin_unlock_irqrestore(&event->list_lock, flags);
+       read_unlock_irqrestore(&event->list_lock, flags);
 }
 
 void
index 3adbb05..d088e63 100644 (file)
@@ -539,7 +539,7 @@ r535_fifo_runl_ctor(struct nvkm_fifo *fifo)
        struct nvkm_runl *runl;
        struct nvkm_engn *engn;
        u32 cgids = 2048;
-       u32 chids = 2048 / CHID_PER_USERD;
+       u32 chids = 2048;
        int ret;
        NV2080_CTRL_FIFO_GET_DEVICE_INFO_TABLE_PARAMS *ctrl;
 
index e31f964..dc44f5c 100644 (file)
@@ -689,8 +689,8 @@ r535_gsp_rpc_get(struct nvkm_gsp *gsp, u32 fn, u32 argc)
        struct nvfw_gsp_rpc *rpc;
 
        rpc = r535_gsp_cmdq_get(gsp, ALIGN(sizeof(*rpc) + argc, sizeof(u64)));
-       if (!rpc)
-               return NULL;
+       if (IS_ERR(rpc))
+               return ERR_CAST(rpc);
 
        rpc->header_version = 0x03000000;
        rpc->signature = ('C' << 24) | ('P' << 16) | ('R' << 8) | 'V';
@@ -1159,7 +1159,7 @@ static void
 r535_gsp_acpi_mux_id(acpi_handle handle, u32 id, MUX_METHOD_DATA_ELEMENT *mode,
                                                 MUX_METHOD_DATA_ELEMENT *part)
 {
-       acpi_handle iter = NULL, handle_mux;
+       acpi_handle iter = NULL, handle_mux = NULL;
        acpi_status status;
        unsigned long long value;
 
index 9323e7b..be8f48e 100644 (file)
@@ -1709,6 +1709,7 @@ static const struct panel_desc auo_b101uan08_3_desc = {
        .mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_SYNC_PULSE |
                      MIPI_DSI_MODE_LPM,
        .init_cmds = auo_b101uan08_3_init_cmd,
+       .lp11_before_reset = true,
 };
 
 static const struct drm_display_mode boe_tv105wum_nw0_default_mode = {
@@ -1766,11 +1767,11 @@ static const struct panel_desc starry_qfh032011_53g_desc = {
 };
 
 static const struct drm_display_mode starry_himax83102_j02_default_mode = {
-       .clock = 161600,
+       .clock = 162850,
        .hdisplay = 1200,
-       .hsync_start = 1200 + 40,
-       .hsync_end = 1200 + 40 + 20,
-       .htotal = 1200 + 40 + 20 + 40,
+       .hsync_start = 1200 + 50,
+       .hsync_end = 1200 + 50 + 20,
+       .htotal = 1200 + 50 + 20 + 50,
        .vdisplay = 1920,
        .vsync_start = 1920 + 116,
        .vsync_end = 1920 + 116 + 8,
index 6cd32b9..9367a45 100644 (file)
@@ -2379,13 +2379,13 @@ static const struct panel_desc innolux_g070y2_t02 = {
 static const struct display_timing innolux_g101ice_l01_timing = {
        .pixelclock = { 60400000, 71100000, 74700000 },
        .hactive = { 1280, 1280, 1280 },
-       .hfront_porch = { 41, 80, 100 },
-       .hback_porch = { 40, 79, 99 },
-       .hsync_len = { 1, 1, 1 },
+       .hfront_porch = { 30, 60, 70 },
+       .hback_porch = { 30, 60, 70 },
+       .hsync_len = { 22, 40, 60 },
        .vactive = { 800, 800, 800 },
-       .vfront_porch = { 5, 11, 14 },
-       .vback_porch = { 4, 11, 14 },
-       .vsync_len = { 1, 1, 1 },
+       .vfront_porch = { 3, 8, 14 },
+       .vback_porch = { 3, 8, 14 },
+       .vsync_len = { 4, 7, 12 },
        .flags = DISPLAY_FLAGS_DE_HIGH,
 };
 
@@ -2402,6 +2402,7 @@ static const struct panel_desc innolux_g101ice_l01 = {
                .disable = 200,
        },
        .bus_format = MEDIA_BUS_FMT_RGB888_1X7X4_SPWG,
+       .bus_flags = DRM_BUS_FLAG_DE_HIGH,
        .connector_type = DRM_MODE_CONNECTOR_LVDS,
 };
 
index 0662998..a13473b 100644 (file)
@@ -247,14 +247,22 @@ static inline void vop_cfg_done(struct vop *vop)
        VOP_REG_SET(vop, common, cfg_done, 1);
 }
 
-static bool has_rb_swapped(uint32_t format)
+static bool has_rb_swapped(uint32_t version, uint32_t format)
 {
        switch (format) {
        case DRM_FORMAT_XBGR8888:
        case DRM_FORMAT_ABGR8888:
-       case DRM_FORMAT_BGR888:
        case DRM_FORMAT_BGR565:
                return true;
+       /*
+        * full framework (IP version 3.x) only need rb swapped for RGB888 and
+        * little framework (IP version 2.x) only need rb swapped for BGR888,
+        * check for 3.x to also only rb swap BGR888 for unknown vop version
+        */
+       case DRM_FORMAT_RGB888:
+               return VOP_MAJOR(version) == 3;
+       case DRM_FORMAT_BGR888:
+               return VOP_MAJOR(version) != 3;
        default:
                return false;
        }
@@ -1030,7 +1038,7 @@ static void vop_plane_atomic_update(struct drm_plane *plane,
        VOP_WIN_SET(vop, win, dsp_info, dsp_info);
        VOP_WIN_SET(vop, win, dsp_st, dsp_st);
 
-       rb_swap = has_rb_swapped(fb->format->format);
+       rb_swap = has_rb_swapped(vop->data->version, fb->format->format);
        VOP_WIN_SET(vop, win, rb_swap, rb_swap);
 
        /*
index 3ca4597..d9e9829 100644 (file)
@@ -345,6 +345,8 @@ static const struct apple_non_apple_keyboard non_apple_keyboards[] = {
        { "AONE" },
        { "GANSS" },
        { "Hailuck" },
+       { "Jamesdonkey" },
+       { "A3R" },
 };
 
 static bool apple_is_non_apple_keyboard(struct hid_device *hdev)
index fd61dba..78cdfb8 100644 (file)
@@ -381,7 +381,7 @@ static int asus_raw_event(struct hid_device *hdev,
        return 0;
 }
 
-static int asus_kbd_set_report(struct hid_device *hdev, u8 *buf, size_t buf_size)
+static int asus_kbd_set_report(struct hid_device *hdev, const u8 *buf, size_t buf_size)
 {
        unsigned char *dmabuf;
        int ret;
@@ -404,7 +404,7 @@ static int asus_kbd_set_report(struct hid_device *hdev, u8 *buf, size_t buf_size
 
 static int asus_kbd_init(struct hid_device *hdev)
 {
-       u8 buf[] = { FEATURE_KBD_REPORT_ID, 0x41, 0x53, 0x55, 0x53, 0x20, 0x54,
+       const u8 buf[] = { FEATURE_KBD_REPORT_ID, 0x41, 0x53, 0x55, 0x53, 0x20, 0x54,
                     0x65, 0x63, 0x68, 0x2e, 0x49, 0x6e, 0x63, 0x2e, 0x00 };
        int ret;
 
@@ -418,7 +418,7 @@ static int asus_kbd_init(struct hid_device *hdev)
 static int asus_kbd_get_functions(struct hid_device *hdev,
                                  unsigned char *kbd_func)
 {
-       u8 buf[] = { FEATURE_KBD_REPORT_ID, 0x05, 0x20, 0x31, 0x00, 0x08 };
+       const u8 buf[] = { FEATURE_KBD_REPORT_ID, 0x05, 0x20, 0x31, 0x00, 0x08 };
        u8 *readbuf;
        int ret;
 
@@ -449,7 +449,7 @@ static int asus_kbd_get_functions(struct hid_device *hdev,
 
 static int rog_nkey_led_init(struct hid_device *hdev)
 {
-       u8 buf_init_start[] = { FEATURE_KBD_LED_REPORT_ID1, 0xB9 };
+       const u8 buf_init_start[] = { FEATURE_KBD_LED_REPORT_ID1, 0xB9 };
        u8 buf_init2[] = { FEATURE_KBD_LED_REPORT_ID1, 0x41, 0x53, 0x55, 0x53, 0x20,
                                0x54, 0x65, 0x63, 0x68, 0x2e, 0x49, 0x6e, 0x63, 0x2e, 0x00 };
        u8 buf_init3[] = { FEATURE_KBD_LED_REPORT_ID1,
@@ -1000,6 +1000,24 @@ static int asus_start_multitouch(struct hid_device *hdev)
        return 0;
 }
 
+static int __maybe_unused asus_resume(struct hid_device *hdev) {
+       struct asus_drvdata *drvdata = hid_get_drvdata(hdev);
+       int ret = 0;
+
+       if (drvdata->kbd_backlight) {
+               const u8 buf[] = { FEATURE_KBD_REPORT_ID, 0xba, 0xc5, 0xc4,
+                               drvdata->kbd_backlight->cdev.brightness };
+               ret = asus_kbd_set_report(hdev, buf, sizeof(buf));
+               if (ret < 0) {
+                       hid_err(hdev, "Asus failed to set keyboard backlight: %d\n", ret);
+                       goto asus_resume_err;
+               }
+       }
+
+asus_resume_err:
+       return ret;
+}
+
 static int __maybe_unused asus_reset_resume(struct hid_device *hdev)
 {
        struct asus_drvdata *drvdata = hid_get_drvdata(hdev);
@@ -1294,6 +1312,7 @@ static struct hid_driver asus_driver = {
        .input_configured       = asus_input_configured,
 #ifdef CONFIG_PM
        .reset_resume           = asus_reset_resume,
+       .resume                                 = asus_resume,
 #endif
        .event                  = asus_event,
        .raw_event              = asus_raw_event
index 8992e3c..e018121 100644 (file)
@@ -702,15 +702,22 @@ static void hid_close_report(struct hid_device *device)
  * Free a device structure, all reports, and all fields.
  */
 
-static void hid_device_release(struct device *dev)
+void hiddev_free(struct kref *ref)
 {
-       struct hid_device *hid = to_hid_device(dev);
+       struct hid_device *hid = container_of(ref, struct hid_device, ref);
 
        hid_close_report(hid);
        kfree(hid->dev_rdesc);
        kfree(hid);
 }
 
+static void hid_device_release(struct device *dev)
+{
+       struct hid_device *hid = to_hid_device(dev);
+
+       kref_put(&hid->ref, hiddev_free);
+}
+
 /*
  * Fetch a report description item from the data stream. We support long
  * items, though they are not used yet.
@@ -2846,6 +2853,7 @@ struct hid_device *hid_allocate_device(void)
        spin_lock_init(&hdev->debug_list_lock);
        sema_init(&hdev->driver_input_lock, 1);
        mutex_init(&hdev->ll_open_lock);
+       kref_init(&hdev->ref);
 
        hid_bpf_device_init(hdev);
 
index e7ef1ea..7dd83ec 100644 (file)
@@ -1135,6 +1135,7 @@ static int hid_debug_events_open(struct inode *inode, struct file *file)
                goto out;
        }
        list->hdev = (struct hid_device *) inode->i_private;
+       kref_get(&list->hdev->ref);
        file->private_data = list;
        mutex_init(&list->read_mutex);
 
@@ -1227,6 +1228,8 @@ static int hid_debug_events_release(struct inode *inode, struct file *file)
        list_del(&list->node);
        spin_unlock_irqrestore(&list->hdev->debug_list_lock, flags);
        kfifo_free(&list->hid_debug_fifo);
+
+       kref_put(&list->hdev->ref, hiddev_free);
        kfree(list);
 
        return 0;
index 558eb08..281b3a7 100644 (file)
@@ -21,6 +21,10 @@ MODULE_DESCRIPTION("HID driver for Glorious PC Gaming Race mice");
  * Glorious Model O and O- specify the const flag in the consumer input
  * report descriptor, which leads to inputs being ignored. Fix this
  * by patching the descriptor.
+ *
+ * Glorious Model I incorrectly specifes the Usage Minimum for its
+ * keyboard HID report, causing keycodes to be misinterpreted.
+ * Fix this by setting Usage Minimum to 0 in that report.
  */
 static __u8 *glorious_report_fixup(struct hid_device *hdev, __u8 *rdesc,
                unsigned int *rsize)
@@ -32,6 +36,10 @@ static __u8 *glorious_report_fixup(struct hid_device *hdev, __u8 *rdesc,
                rdesc[85] = rdesc[113] = rdesc[141] = \
                        HID_MAIN_ITEM_VARIABLE | HID_MAIN_ITEM_RELATIVE;
        }
+       if (*rsize == 156 && rdesc[41] == 1) {
+               hid_info(hdev, "patching Glorious Model I keyboard report descriptor\n");
+               rdesc[41] = 0;
+       }
        return rdesc;
 }
 
@@ -44,6 +52,8 @@ static void glorious_update_name(struct hid_device *hdev)
                model = "Model O"; break;
        case USB_DEVICE_ID_GLORIOUS_MODEL_D:
                model = "Model D"; break;
+       case USB_DEVICE_ID_GLORIOUS_MODEL_I:
+               model = "Model I"; break;
        }
 
        snprintf(hdev->name, sizeof(hdev->name), "%s %s", "Glorious", model);
@@ -66,10 +76,12 @@ static int glorious_probe(struct hid_device *hdev,
 }
 
 static const struct hid_device_id glorious_devices[] = {
-       { HID_USB_DEVICE(USB_VENDOR_ID_GLORIOUS,
+       { HID_USB_DEVICE(USB_VENDOR_ID_SINOWEALTH,
                USB_DEVICE_ID_GLORIOUS_MODEL_O) },
-       { HID_USB_DEVICE(USB_VENDOR_ID_GLORIOUS,
+       { HID_USB_DEVICE(USB_VENDOR_ID_SINOWEALTH,
                USB_DEVICE_ID_GLORIOUS_MODEL_D) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_LAVIEW,
+               USB_DEVICE_ID_GLORIOUS_MODEL_I) },
        { }
 };
 MODULE_DEVICE_TABLE(hid, glorious_devices);
index f7973cc..c6e4e0d 100644 (file)
 #define USB_DEVICE_ID_GENERAL_TOUCH_WIN8_PIT_010A 0x010a
 #define USB_DEVICE_ID_GENERAL_TOUCH_WIN8_PIT_E100 0xe100
 
-#define USB_VENDOR_ID_GLORIOUS  0x258a
-#define USB_DEVICE_ID_GLORIOUS_MODEL_D 0x0033
-#define USB_DEVICE_ID_GLORIOUS_MODEL_O 0x0036
-
 #define I2C_VENDOR_ID_GOODIX           0x27c6
 #define I2C_DEVICE_ID_GOODIX_01F0      0x01f0
 
 #define USB_VENDOR_ID_LABTEC           0x1020
 #define USB_DEVICE_ID_LABTEC_WIRELESS_KEYBOARD 0x0006
 
+#define USB_VENDOR_ID_LAVIEW           0x22D4
+#define USB_DEVICE_ID_GLORIOUS_MODEL_I 0x1503
+
 #define USB_VENDOR_ID_LCPOWER          0x1241
 #define USB_DEVICE_ID_LCPOWER_LC1000   0xf767
 
 #define USB_DEVICE_ID_LOGITECH_NANO_RECEIVER_2         0xc534
 #define USB_DEVICE_ID_LOGITECH_NANO_RECEIVER_LIGHTSPEED_1      0xc539
 #define USB_DEVICE_ID_LOGITECH_NANO_RECEIVER_LIGHTSPEED_1_1    0xc53f
-#define USB_DEVICE_ID_LOGITECH_NANO_RECEIVER_LIGHTSPEED_1_2    0xc547
 #define USB_DEVICE_ID_LOGITECH_NANO_RECEIVER_POWERPLAY 0xc53a
 #define USB_DEVICE_ID_SPACETRAVELLER   0xc623
 #define USB_DEVICE_ID_SPACENAVIGATOR   0xc626
 #define USB_VENDOR_ID_SIGMATEL         0x066F
 #define USB_DEVICE_ID_SIGMATEL_STMP3780        0x3780
 
+#define USB_VENDOR_ID_SINOWEALTH  0x258a
+#define USB_DEVICE_ID_GLORIOUS_MODEL_D 0x0033
+#define USB_DEVICE_ID_GLORIOUS_MODEL_O 0x0036
+
 #define USB_VENDOR_ID_SIS_TOUCH                0x0457
 #define USB_DEVICE_ID_SIS9200_TOUCH    0x9200
 #define USB_DEVICE_ID_SIS817_TOUCH     0x0817
index 8afe3be..e6a8b6d 100644 (file)
@@ -1695,12 +1695,11 @@ static int logi_dj_raw_event(struct hid_device *hdev,
                }
                /*
                 * Mouse-only receivers send unnumbered mouse data. The 27 MHz
-                * receiver uses 6 byte packets, the nano receiver 8 bytes,
-                * the lightspeed receiver (Pro X Superlight) 13 bytes.
+                * receiver uses 6 byte packets, the nano receiver 8 bytes.
                 */
                if (djrcv_dev->unnumbered_application == HID_GD_MOUSE &&
-                   size <= 13){
-                       u8 mouse_report[14];
+                   size <= 8) {
+                       u8 mouse_report[9];
 
                        /* Prepend report id */
                        mouse_report[0] = REPORT_TYPE_MOUSE;
@@ -1984,10 +1983,6 @@ static const struct hid_device_id logi_dj_receivers[] = {
          HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH,
                USB_DEVICE_ID_LOGITECH_NANO_RECEIVER_LIGHTSPEED_1_1),
         .driver_data = recvr_type_gaming_hidpp},
-       { /* Logitech lightspeed receiver (0xc547) */
-         HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH,
-               USB_DEVICE_ID_LOGITECH_NANO_RECEIVER_LIGHTSPEED_1_2),
-        .driver_data = recvr_type_gaming_hidpp},
 
        { /* Logitech 27 MHz HID++ 1.0 receiver (0xc513) */
          HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_MX3000_RECEIVER),
index 72883e0..aef0785 100644 (file)
@@ -1142,6 +1142,8 @@ static int mcp2221_probe(struct hid_device *hdev,
        if (ret)
                return ret;
 
+       hid_device_io_start(hdev);
+
        /* Set I2C bus clock diviser */
        if (i2c_clk_freq > 400)
                i2c_clk_freq = 400;
@@ -1157,12 +1159,12 @@ static int mcp2221_probe(struct hid_device *hdev,
        snprintf(mcp->adapter.name, sizeof(mcp->adapter.name),
                        "MCP2221 usb-i2c bridge");
 
+       i2c_set_adapdata(&mcp->adapter, mcp);
        ret = devm_i2c_add_adapter(&hdev->dev, &mcp->adapter);
        if (ret) {
                hid_err(hdev, "can't add usb-i2c adapter: %d\n", ret);
                return ret;
        }
-       i2c_set_adapdata(&mcp->adapter, mcp);
 
 #if IS_REACHABLE(CONFIG_GPIOLIB)
        /* Setup GPIO chip */
index e098cc7..fd5b063 100644 (file)
@@ -2046,6 +2046,11 @@ static const struct hid_device_id mt_devices[] = {
                MT_USB_DEVICE(USB_VENDOR_ID_HANVON_ALT,
                        USB_DEVICE_ID_HANVON_ALT_MULTITOUCH) },
 
+       /* HONOR GLO-GXXX panel */
+       { .driver_data = MT_CLS_VTL,
+               HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8,
+                       0x347d, 0x7853) },
+
        /* Ilitek dual touch panel */
        {  .driver_data = MT_CLS_NSMU,
                MT_USB_DEVICE(USB_VENDOR_ID_ILITEK,
index 5a48fca..ea47292 100644 (file)
@@ -33,6 +33,7 @@ static const struct hid_device_id hid_quirks[] = {
        { HID_USB_DEVICE(USB_VENDOR_ID_AKAI, USB_DEVICE_ID_AKAI_MPKMINI2), HID_QUIRK_NO_INIT_REPORTS },
        { HID_USB_DEVICE(USB_VENDOR_ID_ALPS, USB_DEVICE_ID_IBM_GAMEPAD), HID_QUIRK_BADPAD },
        { HID_USB_DEVICE(USB_VENDOR_ID_AMI, USB_DEVICE_ID_AMI_VIRT_KEYBOARD_AND_MOUSE), HID_QUIRK_ALWAYS_POLL },
+       { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_ANSI), HID_QUIRK_ALWAYS_POLL },
        { HID_USB_DEVICE(USB_VENDOR_ID_ATEN, USB_DEVICE_ID_ATEN_2PORTKVM), HID_QUIRK_NOGET },
        { HID_USB_DEVICE(USB_VENDOR_ID_ATEN, USB_DEVICE_ID_ATEN_4PORTKVMC), HID_QUIRK_NOGET },
        { HID_USB_DEVICE(USB_VENDOR_ID_ATEN, USB_DEVICE_ID_ATEN_4PORTKVM), HID_QUIRK_NOGET },
index affcfb2..35f7628 100644 (file)
@@ -63,7 +63,7 @@ static int dw_reg_read(void *context, unsigned int reg, unsigned int *val)
 {
        struct dw_i2c_dev *dev = context;
 
-       *val = readl_relaxed(dev->base + reg);
+       *val = readl(dev->base + reg);
 
        return 0;
 }
@@ -72,7 +72,7 @@ static int dw_reg_write(void *context, unsigned int reg, unsigned int val)
 {
        struct dw_i2c_dev *dev = context;
 
-       writel_relaxed(val, dev->base + reg);
+       writel(val, dev->base + reg);
 
        return 0;
 }
@@ -81,7 +81,7 @@ static int dw_reg_read_swab(void *context, unsigned int reg, unsigned int *val)
 {
        struct dw_i2c_dev *dev = context;
 
-       *val = swab32(readl_relaxed(dev->base + reg));
+       *val = swab32(readl(dev->base + reg));
 
        return 0;
 }
@@ -90,7 +90,7 @@ static int dw_reg_write_swab(void *context, unsigned int reg, unsigned int val)
 {
        struct dw_i2c_dev *dev = context;
 
-       writel_relaxed(swab32(val), dev->base + reg);
+       writel(swab32(val), dev->base + reg);
 
        return 0;
 }
@@ -99,8 +99,8 @@ static int dw_reg_read_word(void *context, unsigned int reg, unsigned int *val)
 {
        struct dw_i2c_dev *dev = context;
 
-       *val = readw_relaxed(dev->base + reg) |
-               (readw_relaxed(dev->base + reg + 2) << 16);
+       *val = readw(dev->base + reg) |
+               (readw(dev->base + reg + 2) << 16);
 
        return 0;
 }
@@ -109,8 +109,8 @@ static int dw_reg_write_word(void *context, unsigned int reg, unsigned int val)
 {
        struct dw_i2c_dev *dev = context;
 
-       writew_relaxed(val, dev->base + reg);
-       writew_relaxed(val >> 16, dev->base + reg + 2);
+       writew(val, dev->base + reg);
+       writew(val >> 16, dev->base + reg + 2);
 
        return 0;
 }
index 041a76f..e106af8 100644 (file)
@@ -771,8 +771,8 @@ static int ocores_i2c_resume(struct device *dev)
        return ocores_init(dev, i2c);
 }
 
-static DEFINE_SIMPLE_DEV_PM_OPS(ocores_i2c_pm,
-                               ocores_i2c_suspend, ocores_i2c_resume);
+static DEFINE_NOIRQ_DEV_PM_OPS(ocores_i2c_pm,
+                              ocores_i2c_suspend, ocores_i2c_resume);
 
 static struct platform_driver ocores_i2c_driver = {
        .probe   = ocores_i2c_probe,
index 1d76482..76f79b6 100644 (file)
@@ -265,6 +265,9 @@ struct pxa_i2c {
        u32                     hs_mask;
 
        struct i2c_bus_recovery_info recovery;
+       struct pinctrl          *pinctrl;
+       struct pinctrl_state    *pinctrl_default;
+       struct pinctrl_state    *pinctrl_recovery;
 };
 
 #define _IBMR(i2c)     ((i2c)->reg_ibmr)
@@ -1299,12 +1302,13 @@ static void i2c_pxa_prepare_recovery(struct i2c_adapter *adap)
         */
        gpiod_set_value(i2c->recovery.scl_gpiod, ibmr & IBMR_SCLS);
        gpiod_set_value(i2c->recovery.sda_gpiod, ibmr & IBMR_SDAS);
+
+       WARN_ON(pinctrl_select_state(i2c->pinctrl, i2c->pinctrl_recovery));
 }
 
 static void i2c_pxa_unprepare_recovery(struct i2c_adapter *adap)
 {
        struct pxa_i2c *i2c = adap->algo_data;
-       struct i2c_bus_recovery_info *bri = adap->bus_recovery_info;
        u32 isr;
 
        /*
@@ -1318,7 +1322,7 @@ static void i2c_pxa_unprepare_recovery(struct i2c_adapter *adap)
                i2c_pxa_do_reset(i2c);
        }
 
-       WARN_ON(pinctrl_select_state(bri->pinctrl, bri->pins_default));
+       WARN_ON(pinctrl_select_state(i2c->pinctrl, i2c->pinctrl_default));
 
        dev_dbg(&i2c->adap.dev, "recovery: IBMR 0x%08x ISR 0x%08x\n",
                readl(_IBMR(i2c)), readl(_ISR(i2c)));
@@ -1340,20 +1344,76 @@ static int i2c_pxa_init_recovery(struct pxa_i2c *i2c)
        if (IS_ENABLED(CONFIG_I2C_PXA_SLAVE))
                return 0;
 
-       bri->pinctrl = devm_pinctrl_get(dev);
-       if (PTR_ERR(bri->pinctrl) == -ENODEV) {
-               bri->pinctrl = NULL;
+       i2c->pinctrl = devm_pinctrl_get(dev);
+       if (PTR_ERR(i2c->pinctrl) == -ENODEV)
+               i2c->pinctrl = NULL;
+       if (IS_ERR(i2c->pinctrl))
+               return PTR_ERR(i2c->pinctrl);
+
+       if (!i2c->pinctrl)
+               return 0;
+
+       i2c->pinctrl_default = pinctrl_lookup_state(i2c->pinctrl,
+                                                   PINCTRL_STATE_DEFAULT);
+       i2c->pinctrl_recovery = pinctrl_lookup_state(i2c->pinctrl, "recovery");
+
+       if (IS_ERR(i2c->pinctrl_default) || IS_ERR(i2c->pinctrl_recovery)) {
+               dev_info(dev, "missing pinmux recovery information: %ld %ld\n",
+                        PTR_ERR(i2c->pinctrl_default),
+                        PTR_ERR(i2c->pinctrl_recovery));
+               return 0;
+       }
+
+       /*
+        * Claiming GPIOs can influence the pinmux state, and may glitch the
+        * I2C bus. Do this carefully.
+        */
+       bri->scl_gpiod = devm_gpiod_get(dev, "scl", GPIOD_OUT_HIGH_OPEN_DRAIN);
+       if (bri->scl_gpiod == ERR_PTR(-EPROBE_DEFER))
+               return -EPROBE_DEFER;
+       if (IS_ERR(bri->scl_gpiod)) {
+               dev_info(dev, "missing scl gpio recovery information: %pe\n",
+                        bri->scl_gpiod);
+               return 0;
+       }
+
+       /*
+        * We have SCL. Pull SCL low and wait a bit so that SDA glitches
+        * have no effect.
+        */
+       gpiod_direction_output(bri->scl_gpiod, 0);
+       udelay(10);
+       bri->sda_gpiod = devm_gpiod_get(dev, "sda", GPIOD_OUT_HIGH_OPEN_DRAIN);
+
+       /* Wait a bit in case of a SDA glitch, and then release SCL. */
+       udelay(10);
+       gpiod_direction_output(bri->scl_gpiod, 1);
+
+       if (bri->sda_gpiod == ERR_PTR(-EPROBE_DEFER))
+               return -EPROBE_DEFER;
+
+       if (IS_ERR(bri->sda_gpiod)) {
+               dev_info(dev, "missing sda gpio recovery information: %pe\n",
+                        bri->sda_gpiod);
                return 0;
        }
-       if (IS_ERR(bri->pinctrl))
-               return PTR_ERR(bri->pinctrl);
 
        bri->prepare_recovery = i2c_pxa_prepare_recovery;
        bri->unprepare_recovery = i2c_pxa_unprepare_recovery;
+       bri->recover_bus = i2c_generic_scl_recovery;
 
        i2c->adap.bus_recovery_info = bri;
 
-       return 0;
+       /*
+        * Claiming GPIOs can change the pinmux state, which confuses the
+        * pinctrl since pinctrl's idea of the current setting is unaffected
+        * by the pinmux change caused by claiming the GPIO. Work around that
+        * by switching pinctrl to the GPIO state here. We do it this way to
+        * avoid glitching the I2C bus.
+        */
+       pinctrl_select_state(i2c->pinctrl, i2c->pinctrl_recovery);
+
+       return pinctrl_select_state(i2c->pinctrl, i2c->pinctrl_default);
 }
 
 static int i2c_pxa_probe(struct platform_device *dev)
index a8c89df..9a7a742 100644 (file)
@@ -2379,12 +2379,12 @@ retry_baser:
                break;
        }
 
+       if (!shr)
+               gic_flush_dcache_to_poc(base, PAGE_ORDER_TO_SIZE(order));
+
        its_write_baser(its, baser, val);
        tmp = baser->val;
 
-       if (its->flags & ITS_FLAGS_FORCE_NON_SHAREABLE)
-               tmp &= ~GITS_BASER_SHAREABILITY_MASK;
-
        if ((val ^ tmp) & GITS_BASER_SHAREABILITY_MASK) {
                /*
                 * Shareability didn't stick. Just use
@@ -2394,10 +2394,9 @@ retry_baser:
                 * non-cacheable as well.
                 */
                shr = tmp & GITS_BASER_SHAREABILITY_MASK;
-               if (!shr) {
+               if (!shr)
                        cache = GITS_BASER_nC;
-                       gic_flush_dcache_to_poc(base, PAGE_ORDER_TO_SIZE(order));
-               }
+
                goto retry_baser;
        }
 
@@ -2609,6 +2608,11 @@ static int its_alloc_tables(struct its_node *its)
                /* erratum 24313: ignore memory access type */
                cache = GITS_BASER_nCnB;
 
+       if (its->flags & ITS_FLAGS_FORCE_NON_SHAREABLE) {
+               cache = GITS_BASER_nC;
+               shr = 0;
+       }
+
        for (i = 0; i < GITS_BASER_NR_REGS; i++) {
                struct its_baser *baser = its->tables + i;
                u64 val = its_read_baser(its, baser);
index 05be59a..6ae2329 100644 (file)
@@ -265,6 +265,7 @@ struct bcache_device {
 #define BCACHE_DEV_WB_RUNNING          3
 #define BCACHE_DEV_RATE_DW_RUNNING     4
        int                     nr_stripes;
+#define BCH_MIN_STRIPE_SZ              ((4 << 20) >> SECTOR_SHIFT)
        unsigned int            stripe_size;
        atomic_t                *stripe_sectors_dirty;
        unsigned long           *full_dirty_stripes;
index ae5cbb5..de30199 100644 (file)
@@ -1000,6 +1000,9 @@ err:
  *
  * The btree node will have either a read or a write lock held, depending on
  * level and op->lock.
+ *
+ * Note: Only error code or btree pointer will be returned, it is unncessary
+ *       for callers to check NULL pointer.
  */
 struct btree *bch_btree_node_get(struct cache_set *c, struct btree_op *op,
                                 struct bkey *k, int level, bool write,
@@ -1111,6 +1114,10 @@ retry:
        mutex_unlock(&b->c->bucket_lock);
 }
 
+/*
+ * Only error code or btree pointer will be returned, it is unncessary for
+ * callers to check NULL pointer.
+ */
 struct btree *__bch_btree_node_alloc(struct cache_set *c, struct btree_op *op,
                                     int level, bool wait,
                                     struct btree *parent)
@@ -1368,7 +1375,7 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op,
        memset(new_nodes, 0, sizeof(new_nodes));
        closure_init_stack(&cl);
 
-       while (nodes < GC_MERGE_NODES && !IS_ERR(r[nodes].b))
+       while (nodes < GC_MERGE_NODES && !IS_ERR_OR_NULL(r[nodes].b))
                keys += r[nodes++].keys;
 
        blocks = btree_default_blocks(b->c) * 2 / 3;
@@ -1532,6 +1539,8 @@ static int btree_gc_rewrite_node(struct btree *b, struct btree_op *op,
                return 0;
 
        n = btree_node_alloc_replacement(replace, NULL);
+       if (IS_ERR(n))
+               return 0;
 
        /* recheck reserve after allocating replacement node */
        if (btree_check_reserve(b, NULL)) {
index 8bd8997..bfe1685 100644 (file)
@@ -905,6 +905,8 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size,
 
        if (!d->stripe_size)
                d->stripe_size = 1 << 31;
+       else if (d->stripe_size < BCH_MIN_STRIPE_SZ)
+               d->stripe_size = roundup(BCH_MIN_STRIPE_SZ, d->stripe_size);
 
        n = DIV_ROUND_UP_ULL(sectors, d->stripe_size);
        if (!n || n > max_stripes) {
@@ -2016,7 +2018,7 @@ static int run_cache_set(struct cache_set *c)
                c->root = bch_btree_node_get(c, NULL, k,
                                             j->btree_level,
                                             true, NULL);
-               if (IS_ERR_OR_NULL(c->root))
+               if (IS_ERR(c->root))
                        goto err;
 
                list_del_init(&c->root->list);
index 45d8af7..a438efb 100644 (file)
@@ -1104,7 +1104,7 @@ SHOW(__bch_cache)
                        sum += INITIAL_PRIO - cached[i];
 
                if (n)
-                       do_div(sum, n);
+                       sum = div64_u64(sum, n);
 
                for (i = 0; i < ARRAY_SIZE(q); i++)
                        q[i] = INITIAL_PRIO - cached[n * (i + 1) /
index 24c0490..3accfda 100644 (file)
@@ -913,7 +913,7 @@ static int bch_dirty_init_thread(void *arg)
        int cur_idx, prev_idx, skip_nr;
 
        k = p = NULL;
-       cur_idx = prev_idx = 0;
+       prev_idx = 0;
 
        bch_btree_iter_init(&c->root->keys, &iter, NULL);
        k = bch_btree_iter_next_filter(&iter, &c->root->keys, bch_ptr_bad);
@@ -977,24 +977,35 @@ static int bch_btre_dirty_init_thread_nr(void)
 void bch_sectors_dirty_init(struct bcache_device *d)
 {
        int i;
+       struct btree *b = NULL;
        struct bkey *k = NULL;
        struct btree_iter iter;
        struct sectors_dirty_init op;
        struct cache_set *c = d->c;
        struct bch_dirty_init_state state;
 
+retry_lock:
+       b = c->root;
+       rw_lock(0, b, b->level);
+       if (b != c->root) {
+               rw_unlock(0, b);
+               goto retry_lock;
+       }
+
        /* Just count root keys if no leaf node */
-       rw_lock(0, c->root, c->root->level);
        if (c->root->level == 0) {
                bch_btree_op_init(&op.op, -1);
                op.inode = d->id;
                op.count = 0;
 
                for_each_key_filter(&c->root->keys,
-                                   k, &iter, bch_ptr_invalid)
+                                   k, &iter, bch_ptr_invalid) {
+                       if (KEY_INODE(k) != op.inode)
+                               continue;
                        sectors_dirty_init_fn(&op.op, c->root, k);
+               }
 
-               rw_unlock(0, c->root);
+               rw_unlock(0, b);
                return;
        }
 
@@ -1014,23 +1025,24 @@ void bch_sectors_dirty_init(struct bcache_device *d)
                if (atomic_read(&state.enough))
                        break;
 
+               atomic_inc(&state.started);
                state.infos[i].state = &state;
                state.infos[i].thread =
                        kthread_run(bch_dirty_init_thread, &state.infos[i],
                                    "bch_dirtcnt[%d]", i);
                if (IS_ERR(state.infos[i].thread)) {
                        pr_err("fails to run thread bch_dirty_init[%d]\n", i);
+                       atomic_dec(&state.started);
                        for (--i; i >= 0; i--)
                                kthread_stop(state.infos[i].thread);
                        goto out;
                }
-               atomic_inc(&state.started);
        }
 
 out:
        /* Must wait for all threads to stop. */
        wait_event(state.wait, atomic_read(&state.started) == 0);
-       rw_unlock(0, c->root);
+       rw_unlock(0, b);
 }
 
 void bch_cached_dev_writeback_init(struct cached_dev *dc)
index 62eb276..f03d7db 100644 (file)
@@ -254,7 +254,7 @@ enum evict_result {
 
 typedef enum evict_result (*le_predicate)(struct lru_entry *le, void *context);
 
-static struct lru_entry *lru_evict(struct lru *lru, le_predicate pred, void *context)
+static struct lru_entry *lru_evict(struct lru *lru, le_predicate pred, void *context, bool no_sleep)
 {
        unsigned long tested = 0;
        struct list_head *h = lru->cursor;
@@ -295,7 +295,8 @@ static struct lru_entry *lru_evict(struct lru *lru, le_predicate pred, void *con
 
                h = h->next;
 
-               cond_resched();
+               if (!no_sleep)
+                       cond_resched();
        }
 
        return NULL;
@@ -382,7 +383,10 @@ struct dm_buffer {
  */
 
 struct buffer_tree {
-       struct rw_semaphore lock;
+       union {
+               struct rw_semaphore lock;
+               rwlock_t spinlock;
+       } u;
        struct rb_root root;
 } ____cacheline_aligned_in_smp;
 
@@ -393,9 +397,12 @@ struct dm_buffer_cache {
         * on the locks.
         */
        unsigned int num_locks;
+       bool no_sleep;
        struct buffer_tree trees[];
 };
 
+static DEFINE_STATIC_KEY_FALSE(no_sleep_enabled);
+
 static inline unsigned int cache_index(sector_t block, unsigned int num_locks)
 {
        return dm_hash_locks_index(block, num_locks);
@@ -403,22 +410,34 @@ static inline unsigned int cache_index(sector_t block, unsigned int num_locks)
 
 static inline void cache_read_lock(struct dm_buffer_cache *bc, sector_t block)
 {
-       down_read(&bc->trees[cache_index(block, bc->num_locks)].lock);
+       if (static_branch_unlikely(&no_sleep_enabled) && bc->no_sleep)
+               read_lock_bh(&bc->trees[cache_index(block, bc->num_locks)].u.spinlock);
+       else
+               down_read(&bc->trees[cache_index(block, bc->num_locks)].u.lock);
 }
 
 static inline void cache_read_unlock(struct dm_buffer_cache *bc, sector_t block)
 {
-       up_read(&bc->trees[cache_index(block, bc->num_locks)].lock);
+       if (static_branch_unlikely(&no_sleep_enabled) && bc->no_sleep)
+               read_unlock_bh(&bc->trees[cache_index(block, bc->num_locks)].u.spinlock);
+       else
+               up_read(&bc->trees[cache_index(block, bc->num_locks)].u.lock);
 }
 
 static inline void cache_write_lock(struct dm_buffer_cache *bc, sector_t block)
 {
-       down_write(&bc->trees[cache_index(block, bc->num_locks)].lock);
+       if (static_branch_unlikely(&no_sleep_enabled) && bc->no_sleep)
+               write_lock_bh(&bc->trees[cache_index(block, bc->num_locks)].u.spinlock);
+       else
+               down_write(&bc->trees[cache_index(block, bc->num_locks)].u.lock);
 }
 
 static inline void cache_write_unlock(struct dm_buffer_cache *bc, sector_t block)
 {
-       up_write(&bc->trees[cache_index(block, bc->num_locks)].lock);
+       if (static_branch_unlikely(&no_sleep_enabled) && bc->no_sleep)
+               write_unlock_bh(&bc->trees[cache_index(block, bc->num_locks)].u.spinlock);
+       else
+               up_write(&bc->trees[cache_index(block, bc->num_locks)].u.lock);
 }
 
 /*
@@ -442,18 +461,32 @@ static void lh_init(struct lock_history *lh, struct dm_buffer_cache *cache, bool
 
 static void __lh_lock(struct lock_history *lh, unsigned int index)
 {
-       if (lh->write)
-               down_write(&lh->cache->trees[index].lock);
-       else
-               down_read(&lh->cache->trees[index].lock);
+       if (lh->write) {
+               if (static_branch_unlikely(&no_sleep_enabled) && lh->cache->no_sleep)
+                       write_lock_bh(&lh->cache->trees[index].u.spinlock);
+               else
+                       down_write(&lh->cache->trees[index].u.lock);
+       } else {
+               if (static_branch_unlikely(&no_sleep_enabled) && lh->cache->no_sleep)
+                       read_lock_bh(&lh->cache->trees[index].u.spinlock);
+               else
+                       down_read(&lh->cache->trees[index].u.lock);
+       }
 }
 
 static void __lh_unlock(struct lock_history *lh, unsigned int index)
 {
-       if (lh->write)
-               up_write(&lh->cache->trees[index].lock);
-       else
-               up_read(&lh->cache->trees[index].lock);
+       if (lh->write) {
+               if (static_branch_unlikely(&no_sleep_enabled) && lh->cache->no_sleep)
+                       write_unlock_bh(&lh->cache->trees[index].u.spinlock);
+               else
+                       up_write(&lh->cache->trees[index].u.lock);
+       } else {
+               if (static_branch_unlikely(&no_sleep_enabled) && lh->cache->no_sleep)
+                       read_unlock_bh(&lh->cache->trees[index].u.spinlock);
+               else
+                       up_read(&lh->cache->trees[index].u.lock);
+       }
 }
 
 /*
@@ -502,14 +535,18 @@ static struct dm_buffer *list_to_buffer(struct list_head *l)
        return le_to_buffer(le);
 }
 
-static void cache_init(struct dm_buffer_cache *bc, unsigned int num_locks)
+static void cache_init(struct dm_buffer_cache *bc, unsigned int num_locks, bool no_sleep)
 {
        unsigned int i;
 
        bc->num_locks = num_locks;
+       bc->no_sleep = no_sleep;
 
        for (i = 0; i < bc->num_locks; i++) {
-               init_rwsem(&bc->trees[i].lock);
+               if (no_sleep)
+                       rwlock_init(&bc->trees[i].u.spinlock);
+               else
+                       init_rwsem(&bc->trees[i].u.lock);
                bc->trees[i].root = RB_ROOT;
        }
 
@@ -648,7 +685,7 @@ static struct dm_buffer *__cache_evict(struct dm_buffer_cache *bc, int list_mode
        struct lru_entry *le;
        struct dm_buffer *b;
 
-       le = lru_evict(&bc->lru[list_mode], __evict_pred, &w);
+       le = lru_evict(&bc->lru[list_mode], __evict_pred, &w, bc->no_sleep);
        if (!le)
                return NULL;
 
@@ -702,7 +739,7 @@ static void __cache_mark_many(struct dm_buffer_cache *bc, int old_mode, int new_
        struct evict_wrapper w = {.lh = lh, .pred = pred, .context = context};
 
        while (true) {
-               le = lru_evict(&bc->lru[old_mode], __evict_pred, &w);
+               le = lru_evict(&bc->lru[old_mode], __evict_pred, &w, bc->no_sleep);
                if (!le)
                        break;
 
@@ -915,10 +952,11 @@ static void cache_remove_range(struct dm_buffer_cache *bc,
 {
        unsigned int i;
 
+       BUG_ON(bc->no_sleep);
        for (i = 0; i < bc->num_locks; i++) {
-               down_write(&bc->trees[i].lock);
+               down_write(&bc->trees[i].u.lock);
                __remove_range(bc, &bc->trees[i].root, begin, end, pred, release);
-               up_write(&bc->trees[i].lock);
+               up_write(&bc->trees[i].u.lock);
        }
 }
 
@@ -979,8 +1017,6 @@ struct dm_bufio_client {
        struct dm_buffer_cache cache; /* must be last member */
 };
 
-static DEFINE_STATIC_KEY_FALSE(no_sleep_enabled);
-
 /*----------------------------------------------------------------*/
 
 #define dm_bufio_in_request()  (!!current->bio_list)
@@ -1871,7 +1907,8 @@ static void *new_read(struct dm_bufio_client *c, sector_t block,
        if (need_submit)
                submit_io(b, REQ_OP_READ, read_endio);
 
-       wait_on_bit_io(&b->state, B_READING, TASK_UNINTERRUPTIBLE);
+       if (nf != NF_GET)       /* we already tested this condition above */
+               wait_on_bit_io(&b->state, B_READING, TASK_UNINTERRUPTIBLE);
 
        if (b->read_error) {
                int error = blk_status_to_errno(b->read_error);
@@ -2421,7 +2458,7 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign
                r = -ENOMEM;
                goto bad_client;
        }
-       cache_init(&c->cache, num_locks);
+       cache_init(&c->cache, num_locks, (flags & DM_BUFIO_CLIENT_NO_SLEEP) != 0);
 
        c->bdev = bdev;
        c->block_size = block_size;
index 6de107a..2ae8560 100644 (file)
@@ -1673,7 +1673,7 @@ static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned int size)
        unsigned int nr_iovecs = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
        gfp_t gfp_mask = GFP_NOWAIT | __GFP_HIGHMEM;
        unsigned int remaining_size;
-       unsigned int order = MAX_ORDER - 1;
+       unsigned int order = MAX_ORDER;
 
 retry:
        if (unlikely(gfp_mask & __GFP_DIRECT_RECLAIM))
index efd5109..5eabdb0 100644 (file)
@@ -33,7 +33,7 @@ struct delay_c {
        struct work_struct flush_expired_bios;
        struct list_head delayed_bios;
        struct task_struct *worker;
-       atomic_t may_delay;
+       bool may_delay;
 
        struct delay_class read;
        struct delay_class write;
@@ -73,39 +73,6 @@ static inline bool delay_is_fast(struct delay_c *dc)
        return !!dc->worker;
 }
 
-static void flush_delayed_bios_fast(struct delay_c *dc, bool flush_all)
-{
-       struct dm_delay_info *delayed, *next;
-
-       mutex_lock(&delayed_bios_lock);
-       list_for_each_entry_safe(delayed, next, &dc->delayed_bios, list) {
-               if (flush_all || time_after_eq(jiffies, delayed->expires)) {
-                       struct bio *bio = dm_bio_from_per_bio_data(delayed,
-                                               sizeof(struct dm_delay_info));
-                       list_del(&delayed->list);
-                       dm_submit_bio_remap(bio, NULL);
-                       delayed->class->ops--;
-               }
-       }
-       mutex_unlock(&delayed_bios_lock);
-}
-
-static int flush_worker_fn(void *data)
-{
-       struct delay_c *dc = data;
-
-       while (1) {
-               flush_delayed_bios_fast(dc, false);
-               if (unlikely(list_empty(&dc->delayed_bios))) {
-                       set_current_state(TASK_INTERRUPTIBLE);
-                       schedule();
-               } else
-                       cond_resched();
-       }
-
-       return 0;
-}
-
 static void flush_bios(struct bio *bio)
 {
        struct bio *n;
@@ -118,36 +85,61 @@ static void flush_bios(struct bio *bio)
        }
 }
 
-static struct bio *flush_delayed_bios(struct delay_c *dc, bool flush_all)
+static void flush_delayed_bios(struct delay_c *dc, bool flush_all)
 {
        struct dm_delay_info *delayed, *next;
+       struct bio_list flush_bio_list;
        unsigned long next_expires = 0;
-       unsigned long start_timer = 0;
-       struct bio_list flush_bios = { };
+       bool start_timer = false;
+       bio_list_init(&flush_bio_list);
 
        mutex_lock(&delayed_bios_lock);
        list_for_each_entry_safe(delayed, next, &dc->delayed_bios, list) {
+               cond_resched();
                if (flush_all || time_after_eq(jiffies, delayed->expires)) {
                        struct bio *bio = dm_bio_from_per_bio_data(delayed,
                                                sizeof(struct dm_delay_info));
                        list_del(&delayed->list);
-                       bio_list_add(&flush_bios, bio);
+                       bio_list_add(&flush_bio_list, bio);
                        delayed->class->ops--;
                        continue;
                }
 
-               if (!start_timer) {
-                       start_timer = 1;
-                       next_expires = delayed->expires;
-               } else
-                       next_expires = min(next_expires, delayed->expires);
+               if (!delay_is_fast(dc)) {
+                       if (!start_timer) {
+                               start_timer = true;
+                               next_expires = delayed->expires;
+                       } else {
+                               next_expires = min(next_expires, delayed->expires);
+                       }
+               }
        }
        mutex_unlock(&delayed_bios_lock);
 
        if (start_timer)
                queue_timeout(dc, next_expires);
 
-       return bio_list_get(&flush_bios);
+       flush_bios(bio_list_get(&flush_bio_list));
+}
+
+static int flush_worker_fn(void *data)
+{
+       struct delay_c *dc = data;
+
+       while (!kthread_should_stop()) {
+               flush_delayed_bios(dc, false);
+               mutex_lock(&delayed_bios_lock);
+               if (unlikely(list_empty(&dc->delayed_bios))) {
+                       set_current_state(TASK_INTERRUPTIBLE);
+                       mutex_unlock(&delayed_bios_lock);
+                       schedule();
+               } else {
+                       mutex_unlock(&delayed_bios_lock);
+                       cond_resched();
+               }
+       }
+
+       return 0;
 }
 
 static void flush_expired_bios(struct work_struct *work)
@@ -155,10 +147,7 @@ static void flush_expired_bios(struct work_struct *work)
        struct delay_c *dc;
 
        dc = container_of(work, struct delay_c, flush_expired_bios);
-       if (delay_is_fast(dc))
-               flush_delayed_bios_fast(dc, false);
-       else
-               flush_bios(flush_delayed_bios(dc, false));
+       flush_delayed_bios(dc, false);
 }
 
 static void delay_dtr(struct dm_target *ti)
@@ -177,8 +166,7 @@ static void delay_dtr(struct dm_target *ti)
        if (dc->worker)
                kthread_stop(dc->worker);
 
-       if (!delay_is_fast(dc))
-               mutex_destroy(&dc->timer_lock);
+       mutex_destroy(&dc->timer_lock);
 
        kfree(dc);
 }
@@ -236,7 +224,8 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 
        ti->private = dc;
        INIT_LIST_HEAD(&dc->delayed_bios);
-       atomic_set(&dc->may_delay, 1);
+       mutex_init(&dc->timer_lock);
+       dc->may_delay = true;
        dc->argc = argc;
 
        ret = delay_class_ctr(ti, &dc->read, argv);
@@ -282,12 +271,12 @@ out:
                                            "dm-delay-flush-worker");
                if (IS_ERR(dc->worker)) {
                        ret = PTR_ERR(dc->worker);
+                       dc->worker = NULL;
                        goto bad;
                }
        } else {
                timer_setup(&dc->delay_timer, handle_delayed_timer, 0);
                INIT_WORK(&dc->flush_expired_bios, flush_expired_bios);
-               mutex_init(&dc->timer_lock);
                dc->kdelayd_wq = alloc_workqueue("kdelayd", WQ_MEM_RECLAIM, 0);
                if (!dc->kdelayd_wq) {
                        ret = -EINVAL;
@@ -312,7 +301,7 @@ static int delay_bio(struct delay_c *dc, struct delay_class *c, struct bio *bio)
        struct dm_delay_info *delayed;
        unsigned long expires = 0;
 
-       if (!c->delay || !atomic_read(&dc->may_delay))
+       if (!c->delay)
                return DM_MAPIO_REMAPPED;
 
        delayed = dm_per_bio_data(bio, sizeof(struct dm_delay_info));
@@ -321,6 +310,10 @@ static int delay_bio(struct delay_c *dc, struct delay_class *c, struct bio *bio)
        delayed->expires = expires = jiffies + msecs_to_jiffies(c->delay);
 
        mutex_lock(&delayed_bios_lock);
+       if (unlikely(!dc->may_delay)) {
+               mutex_unlock(&delayed_bios_lock);
+               return DM_MAPIO_REMAPPED;
+       }
        c->ops++;
        list_add_tail(&delayed->list, &dc->delayed_bios);
        mutex_unlock(&delayed_bios_lock);
@@ -337,21 +330,20 @@ static void delay_presuspend(struct dm_target *ti)
 {
        struct delay_c *dc = ti->private;
 
-       atomic_set(&dc->may_delay, 0);
+       mutex_lock(&delayed_bios_lock);
+       dc->may_delay = false;
+       mutex_unlock(&delayed_bios_lock);
 
-       if (delay_is_fast(dc))
-               flush_delayed_bios_fast(dc, true);
-       else {
+       if (!delay_is_fast(dc))
                del_timer_sync(&dc->delay_timer);
-               flush_bios(flush_delayed_bios(dc, true));
-       }
+       flush_delayed_bios(dc, true);
 }
 
 static void delay_resume(struct dm_target *ti)
 {
        struct delay_c *dc = ti->private;
 
-       atomic_set(&dc->may_delay, 1);
+       dc->may_delay = true;
 }
 
 static int delay_map(struct dm_target *ti, struct bio *bio)
index 3ef9f01..2099c75 100644 (file)
@@ -185,7 +185,7 @@ static int fec_is_erasure(struct dm_verity *v, struct dm_verity_io *io,
 {
        if (unlikely(verity_hash(v, verity_io_hash_req(v, io),
                                 data, 1 << v->data_dev_block_bits,
-                                verity_io_real_digest(v, io))))
+                                verity_io_real_digest(v, io), true)))
                return 0;
 
        return memcmp(verity_io_real_digest(v, io), want_digest,
@@ -386,7 +386,7 @@ static int fec_decode_rsb(struct dm_verity *v, struct dm_verity_io *io,
        /* Always re-validate the corrected block against the expected hash */
        r = verity_hash(v, verity_io_hash_req(v, io), fio->output,
                        1 << v->data_dev_block_bits,
-                       verity_io_real_digest(v, io));
+                       verity_io_real_digest(v, io), true);
        if (unlikely(r < 0))
                return r;
 
index 26adcfe..e115fcf 100644 (file)
@@ -135,20 +135,21 @@ static int verity_hash_update(struct dm_verity *v, struct ahash_request *req,
  * Wrapper for crypto_ahash_init, which handles verity salting.
  */
 static int verity_hash_init(struct dm_verity *v, struct ahash_request *req,
-                               struct crypto_wait *wait)
+                               struct crypto_wait *wait, bool may_sleep)
 {
        int r;
 
        ahash_request_set_tfm(req, v->tfm);
-       ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP |
-                                       CRYPTO_TFM_REQ_MAY_BACKLOG,
-                                       crypto_req_done, (void *)wait);
+       ahash_request_set_callback(req,
+               may_sleep ? CRYPTO_TFM_REQ_MAY_SLEEP | CRYPTO_TFM_REQ_MAY_BACKLOG : 0,
+               crypto_req_done, (void *)wait);
        crypto_init_wait(wait);
 
        r = crypto_wait_req(crypto_ahash_init(req), wait);
 
        if (unlikely(r < 0)) {
-               DMERR("crypto_ahash_init failed: %d", r);
+               if (r != -ENOMEM)
+                       DMERR("crypto_ahash_init failed: %d", r);
                return r;
        }
 
@@ -179,12 +180,12 @@ out:
 }
 
 int verity_hash(struct dm_verity *v, struct ahash_request *req,
-               const u8 *data, size_t len, u8 *digest)
+               const u8 *data, size_t len, u8 *digest, bool may_sleep)
 {
        int r;
        struct crypto_wait wait;
 
-       r = verity_hash_init(v, req, &wait);
+       r = verity_hash_init(v, req, &wait, may_sleep);
        if (unlikely(r < 0))
                goto out;
 
@@ -322,7 +323,7 @@ static int verity_verify_level(struct dm_verity *v, struct dm_verity_io *io,
 
                r = verity_hash(v, verity_io_hash_req(v, io),
                                data, 1 << v->hash_dev_block_bits,
-                               verity_io_real_digest(v, io));
+                               verity_io_real_digest(v, io), !io->in_tasklet);
                if (unlikely(r < 0))
                        goto release_ret_r;
 
@@ -556,7 +557,7 @@ static int verity_verify_io(struct dm_verity_io *io)
                        continue;
                }
 
-               r = verity_hash_init(v, req, &wait);
+               r = verity_hash_init(v, req, &wait, !io->in_tasklet);
                if (unlikely(r < 0))
                        return r;
 
@@ -652,7 +653,7 @@ static void verity_tasklet(unsigned long data)
 
        io->in_tasklet = true;
        err = verity_verify_io(io);
-       if (err == -EAGAIN) {
+       if (err == -EAGAIN || err == -ENOMEM) {
                /* fallback to retrying with work-queue */
                INIT_WORK(&io->work, verity_work);
                queue_work(io->v->verify_wq, &io->work);
@@ -1033,7 +1034,7 @@ static int verity_alloc_zero_digest(struct dm_verity *v)
                goto out;
 
        r = verity_hash(v, req, zero_data, 1 << v->data_dev_block_bits,
-                       v->zero_digest);
+                       v->zero_digest, true);
 
 out:
        kfree(req);
index 2f555b4..f96f4e2 100644 (file)
@@ -128,7 +128,7 @@ extern int verity_for_bv_block(struct dm_verity *v, struct dm_verity_io *io,
                                              u8 *data, size_t len));
 
 extern int verity_hash(struct dm_verity *v, struct ahash_request *req,
-                      const u8 *data, size_t len, u8 *digest);
+                      const u8 *data, size_t len, u8 *digest, bool may_sleep);
 
 extern int verity_hash_for_block(struct dm_verity *v, struct dm_verity_io *io,
                                 sector_t block, u8 *digest, bool *is_zero);
index 4ee4593..c94373d 100644 (file)
@@ -8666,7 +8666,8 @@ static void md_end_clone_io(struct bio *bio)
        struct bio *orig_bio = md_io_clone->orig_bio;
        struct mddev *mddev = md_io_clone->mddev;
 
-       orig_bio->bi_status = bio->bi_status;
+       if (bio->bi_status && !orig_bio->bi_status)
+               orig_bio->bi_status = bio->bi_status;
 
        if (md_io_clone->start_time)
                bio_end_io_acct(orig_bio, md_io_clone->start_time);
index 51d47ed..8e6cc0e 100644 (file)
@@ -1500,6 +1500,10 @@ done:
 static void bond_setup_by_slave(struct net_device *bond_dev,
                                struct net_device *slave_dev)
 {
+       bool was_up = !!(bond_dev->flags & IFF_UP);
+
+       dev_close(bond_dev);
+
        bond_dev->header_ops        = slave_dev->header_ops;
 
        bond_dev->type              = slave_dev->type;
@@ -1514,6 +1518,8 @@ static void bond_setup_by_slave(struct net_device *bond_dev,
                bond_dev->flags &= ~(IFF_BROADCAST | IFF_MULTICAST);
                bond_dev->flags |= (IFF_POINTOPOINT | IFF_NOARP);
        }
+       if (was_up)
+               dev_open(bond_dev, NULL);
 }
 
 /* On bonding slaves other than the currently active slave, suppress
index 045fe13..5beadab 100644 (file)
@@ -146,7 +146,7 @@ irqreturn_t pdsc_adminq_isr(int irq, void *data)
        }
 
        queue_work(pdsc->wq, &qcq->work);
-       pds_core_intr_mask(&pdsc->intr_ctrl[irq], PDS_CORE_INTR_MASK_CLEAR);
+       pds_core_intr_mask(&pdsc->intr_ctrl[qcq->intx], PDS_CORE_INTR_MASK_CLEAR);
 
        return IRQ_HANDLED;
 }
index f3a7ded..e35d3e7 100644 (file)
@@ -15,7 +15,7 @@
 #define PDSC_DRV_DESCRIPTION   "AMD/Pensando Core Driver"
 
 #define PDSC_WATCHDOG_SECS     5
-#define PDSC_QUEUE_NAME_MAX_SZ  32
+#define PDSC_QUEUE_NAME_MAX_SZ  16
 #define PDSC_ADMINQ_MIN_LENGTH 16      /* must be a power of two */
 #define PDSC_NOTIFYQ_LENGTH    64      /* must be a power of two */
 #define PDSC_TEARDOWN_RECOVERY false
index 7c1b965..31940b8 100644 (file)
@@ -261,10 +261,14 @@ static int pdsc_identify(struct pdsc *pdsc)
        struct pds_core_drv_identity drv = {};
        size_t sz;
        int err;
+       int n;
 
        drv.drv_type = cpu_to_le32(PDS_DRIVER_LINUX);
-       snprintf(drv.driver_ver_str, sizeof(drv.driver_ver_str),
-                "%s %s", PDS_CORE_DRV_NAME, utsname()->release);
+       /* Catching the return quiets a Wformat-truncation complaint */
+       n = snprintf(drv.driver_ver_str, sizeof(drv.driver_ver_str),
+                    "%s %s", PDS_CORE_DRV_NAME, utsname()->release);
+       if (n > sizeof(drv.driver_ver_str))
+               dev_dbg(pdsc->dev, "release name truncated, don't care\n");
 
        /* Next let's get some info about the device
         * We use the devcmd_lock at this level in order to
index 57f88c8..e9948ea 100644 (file)
@@ -104,7 +104,7 @@ int pdsc_dl_info_get(struct devlink *dl, struct devlink_info_req *req,
        struct pds_core_fw_list_info fw_list;
        struct pdsc *pdsc = devlink_priv(dl);
        union pds_core_dev_comp comp;
-       char buf[16];
+       char buf[32];
        int listlen;
        int err;
        int i;
index 614c027..6b73648 100644 (file)
@@ -682,10 +682,24 @@ static void xgbe_service(struct work_struct *work)
 static void xgbe_service_timer(struct timer_list *t)
 {
        struct xgbe_prv_data *pdata = from_timer(pdata, t, service_timer);
+       struct xgbe_channel *channel;
+       unsigned int i;
 
        queue_work(pdata->dev_workqueue, &pdata->service_work);
 
        mod_timer(&pdata->service_timer, jiffies + HZ);
+
+       if (!pdata->tx_usecs)
+               return;
+
+       for (i = 0; i < pdata->channel_count; i++) {
+               channel = pdata->channel[i];
+               if (!channel->tx_ring || channel->tx_timer_active)
+                       break;
+               channel->tx_timer_active = 1;
+               mod_timer(&channel->tx_timer,
+                         jiffies + usecs_to_jiffies(pdata->tx_usecs));
+       }
 }
 
 static void xgbe_init_timers(struct xgbe_prv_data *pdata)
index 6e83ff5..32fab5e 100644 (file)
@@ -314,10 +314,15 @@ static int xgbe_get_link_ksettings(struct net_device *netdev,
 
        cmd->base.phy_address = pdata->phy.address;
 
-       cmd->base.autoneg = pdata->phy.autoneg;
-       cmd->base.speed = pdata->phy.speed;
-       cmd->base.duplex = pdata->phy.duplex;
+       if (netif_carrier_ok(netdev)) {
+               cmd->base.speed = pdata->phy.speed;
+               cmd->base.duplex = pdata->phy.duplex;
+       } else {
+               cmd->base.speed = SPEED_UNKNOWN;
+               cmd->base.duplex = DUPLEX_UNKNOWN;
+       }
 
+       cmd->base.autoneg = pdata->phy.autoneg;
        cmd->base.port = PORT_NONE;
 
        XGBE_LM_COPY(cmd, supported, lks, supported);
index 32d2c6f..4a2dc70 100644 (file)
@@ -1193,7 +1193,19 @@ static int xgbe_phy_config_fixed(struct xgbe_prv_data *pdata)
        if (pdata->phy.duplex != DUPLEX_FULL)
                return -EINVAL;
 
-       xgbe_set_mode(pdata, mode);
+       /* Force the mode change for SFI in Fixed PHY config.
+        * Fixed PHY configs needs PLL to be enabled while doing mode set.
+        * When the SFP module isn't connected during boot, driver assumes
+        * AN is ON and attempts autonegotiation. However, if the connected
+        * SFP comes up in Fixed PHY config, the link will not come up as
+        * PLL isn't enabled while the initial mode set command is issued.
+        * So, force the mode change for SFI in Fixed PHY configuration to
+        * fix link issues.
+        */
+       if (mode == XGBE_MODE_SFI)
+               xgbe_change_mode(pdata, mode);
+       else
+               xgbe_set_mode(pdata, mode);
 
        return 0;
 }
index 1dee273..48b6191 100644 (file)
@@ -6889,7 +6889,7 @@ static int tg3_rx(struct tg3_napi *tnapi, int budget)
                                       desc_idx, *post_ptr);
                drop_it_no_recycle:
                        /* Other statistics kept track of by card. */
-                       tp->rx_dropped++;
+                       tnapi->rx_dropped++;
                        goto next_pkt;
                }
 
@@ -7918,8 +7918,10 @@ static int tg3_tso_bug(struct tg3 *tp, struct tg3_napi *tnapi,
 
        segs = skb_gso_segment(skb, tp->dev->features &
                                    ~(NETIF_F_TSO | NETIF_F_TSO6));
-       if (IS_ERR(segs) || !segs)
+       if (IS_ERR(segs) || !segs) {
+               tnapi->tx_dropped++;
                goto tg3_tso_bug_end;
+       }
 
        skb_list_walk_safe(segs, seg, next) {
                skb_mark_not_on_list(seg);
@@ -8190,7 +8192,7 @@ dma_error:
 drop:
        dev_kfree_skb_any(skb);
 drop_nofree:
-       tp->tx_dropped++;
+       tnapi->tx_dropped++;
        return NETDEV_TX_OK;
 }
 
@@ -9405,7 +9407,7 @@ static void __tg3_set_rx_mode(struct net_device *);
 /* tp->lock is held. */
 static int tg3_halt(struct tg3 *tp, int kind, bool silent)
 {
-       int err;
+       int err, i;
 
        tg3_stop_fw(tp);
 
@@ -9426,6 +9428,13 @@ static int tg3_halt(struct tg3 *tp, int kind, bool silent)
 
                /* And make sure the next sample is new data */
                memset(tp->hw_stats, 0, sizeof(struct tg3_hw_stats));
+
+               for (i = 0; i < TG3_IRQ_MAX_VECS; ++i) {
+                       struct tg3_napi *tnapi = &tp->napi[i];
+
+                       tnapi->rx_dropped = 0;
+                       tnapi->tx_dropped = 0;
+               }
        }
 
        return err;
@@ -11975,6 +11984,9 @@ static void tg3_get_nstats(struct tg3 *tp, struct rtnl_link_stats64 *stats)
 {
        struct rtnl_link_stats64 *old_stats = &tp->net_stats_prev;
        struct tg3_hw_stats *hw_stats = tp->hw_stats;
+       unsigned long rx_dropped;
+       unsigned long tx_dropped;
+       int i;
 
        stats->rx_packets = old_stats->rx_packets +
                get_stat64(&hw_stats->rx_ucast_packets) +
@@ -12021,8 +12033,26 @@ static void tg3_get_nstats(struct tg3 *tp, struct rtnl_link_stats64 *stats)
        stats->rx_missed_errors = old_stats->rx_missed_errors +
                get_stat64(&hw_stats->rx_discards);
 
-       stats->rx_dropped = tp->rx_dropped;
-       stats->tx_dropped = tp->tx_dropped;
+       /* Aggregate per-queue counters. The per-queue counters are updated
+        * by a single writer, race-free. The result computed by this loop
+        * might not be 100% accurate (counters can be updated in the middle of
+        * the loop) but the next tg3_get_nstats() will recompute the current
+        * value so it is acceptable.
+        *
+        * Note that these counters wrap around at 4G on 32bit machines.
+        */
+       rx_dropped = (unsigned long)(old_stats->rx_dropped);
+       tx_dropped = (unsigned long)(old_stats->tx_dropped);
+
+       for (i = 0; i < tp->irq_cnt; i++) {
+               struct tg3_napi *tnapi = &tp->napi[i];
+
+               rx_dropped += tnapi->rx_dropped;
+               tx_dropped += tnapi->tx_dropped;
+       }
+
+       stats->rx_dropped = rx_dropped;
+       stats->tx_dropped = tx_dropped;
 }
 
 static int tg3_get_regs_len(struct net_device *dev)
index ae5c01b..5016475 100644 (file)
@@ -3018,6 +3018,7 @@ struct tg3_napi {
        u16                             *rx_rcb_prod_idx;
        struct tg3_rx_prodring_set      prodring;
        struct tg3_rx_buffer_desc       *rx_rcb;
+       unsigned long                   rx_dropped;
 
        u32                             tx_prod ____cacheline_aligned;
        u32                             tx_cons;
@@ -3026,6 +3027,7 @@ struct tg3_napi {
        u32                             prodmbox;
        struct tg3_tx_buffer_desc       *tx_ring;
        struct tg3_tx_ring_info         *tx_buffers;
+       unsigned long                   tx_dropped;
 
        dma_addr_t                      status_mapping;
        dma_addr_t                      rx_rcb_mapping;
@@ -3220,8 +3222,6 @@ struct tg3 {
 
 
        /* begin "everything else" cacheline(s) section */
-       unsigned long                   rx_dropped;
-       unsigned long                   tx_dropped;
        struct rtnl_link_stats64        net_stats_prev;
        struct tg3_ethtool_stats        estats_prev;
 
index 5423fe2..78287cf 100644 (file)
@@ -432,8 +432,8 @@ static const struct gmac_max_framelen gmac_maxlens[] = {
                .val = CONFIG0_MAXLEN_1536,
        },
        {
-               .max_l3_len = 1542,
-               .val = CONFIG0_MAXLEN_1542,
+               .max_l3_len = 1548,
+               .val = CONFIG0_MAXLEN_1548,
        },
        {
                .max_l3_len = 9212,
@@ -1145,6 +1145,7 @@ static int gmac_map_tx_bufs(struct net_device *netdev, struct sk_buff *skb,
        dma_addr_t mapping;
        unsigned short mtu;
        void *buffer;
+       int ret;
 
        mtu  = ETH_HLEN;
        mtu += netdev->mtu;
@@ -1159,9 +1160,30 @@ static int gmac_map_tx_bufs(struct net_device *netdev, struct sk_buff *skb,
                word3 |= mtu;
        }
 
-       if (skb->ip_summed != CHECKSUM_NONE) {
+       if (skb->len >= ETH_FRAME_LEN) {
+               /* Hardware offloaded checksumming isn't working on frames
+                * bigger than 1514 bytes. A hypothesis about this is that the
+                * checksum buffer is only 1518 bytes, so when the frames get
+                * bigger they get truncated, or the last few bytes get
+                * overwritten by the FCS.
+                *
+                * Just use software checksumming and bypass on bigger frames.
+                */
+               if (skb->ip_summed == CHECKSUM_PARTIAL) {
+                       ret = skb_checksum_help(skb);
+                       if (ret)
+                               return ret;
+               }
+               word1 |= TSS_BYPASS_BIT;
+       } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
                int tcp = 0;
 
+               /* We do not switch off the checksumming on non TCP/UDP
+                * frames: as is shown from tests, the checksumming engine
+                * is smart enough to see that a frame is not actually TCP
+                * or UDP and then just pass it through without any changes
+                * to the frame.
+                */
                if (skb->protocol == htons(ETH_P_IP)) {
                        word1 |= TSS_IP_CHKSUM_BIT;
                        tcp = ip_hdr(skb)->protocol == IPPROTO_TCP;
@@ -1978,15 +2000,6 @@ static int gmac_change_mtu(struct net_device *netdev, int new_mtu)
        return 0;
 }
 
-static netdev_features_t gmac_fix_features(struct net_device *netdev,
-                                          netdev_features_t features)
-{
-       if (netdev->mtu + ETH_HLEN + VLAN_HLEN > MTU_SIZE_BIT_MASK)
-               features &= ~GMAC_OFFLOAD_FEATURES;
-
-       return features;
-}
-
 static int gmac_set_features(struct net_device *netdev,
                             netdev_features_t features)
 {
@@ -2212,7 +2225,6 @@ static const struct net_device_ops gmac_351x_ops = {
        .ndo_set_mac_address    = gmac_set_mac_address,
        .ndo_get_stats64        = gmac_get_stats64,
        .ndo_change_mtu         = gmac_change_mtu,
-       .ndo_fix_features       = gmac_fix_features,
        .ndo_set_features       = gmac_set_features,
 };
 
@@ -2464,11 +2476,12 @@ static int gemini_ethernet_port_probe(struct platform_device *pdev)
 
        netdev->hw_features = GMAC_OFFLOAD_FEATURES;
        netdev->features |= GMAC_OFFLOAD_FEATURES | NETIF_F_GRO;
-       /* We can handle jumbo frames up to 10236 bytes so, let's accept
-        * payloads of 10236 bytes minus VLAN and ethernet header
+       /* We can receive jumbo frames up to 10236 bytes but only
+        * transmit 2047 bytes so, let's accept payloads of 2047
+        * bytes minus VLAN and ethernet header
         */
        netdev->min_mtu = ETH_MIN_MTU;
-       netdev->max_mtu = 10236 - VLAN_ETH_HLEN;
+       netdev->max_mtu = MTU_SIZE_BIT_MASK - VLAN_ETH_HLEN;
 
        port->freeq_refill = 0;
        netif_napi_add(netdev, &port->napi, gmac_napi_poll);
index 9fdf77d..24bb989 100644 (file)
@@ -502,7 +502,7 @@ union gmac_txdesc_3 {
 #define SOF_BIT                        0x80000000
 #define EOF_BIT                        0x40000000
 #define EOFIE_BIT              BIT(29)
-#define MTU_SIZE_BIT_MASK      0x1fff
+#define MTU_SIZE_BIT_MASK      0x7ff /* Max MTU 2047 bytes */
 
 /* GMAC Tx Descriptor */
 struct gmac_txdesc {
@@ -787,7 +787,7 @@ union gmac_config0 {
 #define  CONFIG0_MAXLEN_1536   0
 #define  CONFIG0_MAXLEN_1518   1
 #define  CONFIG0_MAXLEN_1522   2
-#define  CONFIG0_MAXLEN_1542   3
+#define  CONFIG0_MAXLEN_1548   3
 #define  CONFIG0_MAXLEN_9k     4       /* 9212 */
 #define  CONFIG0_MAXLEN_10k    5       /* 10236 */
 #define  CONFIG0_MAXLEN_1518__6        6
index 276f996..2d42e73 100644 (file)
@@ -254,10 +254,13 @@ static int gve_napi_poll(struct napi_struct *napi, int budget)
        if (block->tx) {
                if (block->tx->q_num < priv->tx_cfg.num_queues)
                        reschedule |= gve_tx_poll(block, budget);
-               else
+               else if (budget)
                        reschedule |= gve_xdp_poll(block, budget);
        }
 
+       if (!budget)
+               return 0;
+
        if (block->rx) {
                work_done = gve_rx_poll(block, budget);
                reschedule |= work_done == budget;
@@ -298,6 +301,9 @@ static int gve_napi_poll_dqo(struct napi_struct *napi, int budget)
        if (block->tx)
                reschedule |= gve_tx_poll_dqo(block, /*do_clean=*/true);
 
+       if (!budget)
+               return 0;
+
        if (block->rx) {
                work_done = gve_rx_poll_dqo(block, budget);
                reschedule |= work_done == budget;
index e84a066..7365534 100644 (file)
@@ -1007,10 +1007,6 @@ int gve_rx_poll(struct gve_notify_block *block, int budget)
 
        feat = block->napi.dev->features;
 
-       /* If budget is 0, do all the work */
-       if (budget == 0)
-               budget = INT_MAX;
-
        if (budget > 0)
                work_done = gve_clean_rx_done(rx, budget, feat);
 
index 6957a86..9f6ffc4 100644 (file)
@@ -925,10 +925,6 @@ bool gve_xdp_poll(struct gve_notify_block *block, int budget)
        bool repoll;
        u32 to_do;
 
-       /* If budget is 0, do all the work */
-       if (budget == 0)
-               budget = INT_MAX;
-
        /* Find out how much work there is to be done */
        nic_done = gve_tx_load_event_counter(priv, tx);
        to_do = min_t(u32, (nic_done - tx->done), budget);
index 0b13863..c083d1d 100644 (file)
@@ -503,11 +503,14 @@ static void hns3_get_coal_info(struct hns3_enet_tqp_vector *tqp_vector,
        }
 
        sprintf(result[j++], "%d", i);
-       sprintf(result[j++], "%s", dim_state_str[dim->state]);
+       sprintf(result[j++], "%s", dim->state < ARRAY_SIZE(dim_state_str) ?
+               dim_state_str[dim->state] : "unknown");
        sprintf(result[j++], "%u", dim->profile_ix);
-       sprintf(result[j++], "%s", dim_cqe_mode_str[dim->mode]);
+       sprintf(result[j++], "%s", dim->mode < ARRAY_SIZE(dim_cqe_mode_str) ?
+               dim_cqe_mode_str[dim->mode] : "unknown");
        sprintf(result[j++], "%s",
-               dim_tune_stat_str[dim->tune_state]);
+               dim->tune_state < ARRAY_SIZE(dim_tune_stat_str) ?
+               dim_tune_stat_str[dim->tune_state] : "unknown");
        sprintf(result[j++], "%u", dim->steps_left);
        sprintf(result[j++], "%u", dim->steps_right);
        sprintf(result[j++], "%u", dim->tired);
index 0611750..b618797 100644 (file)
@@ -5139,7 +5139,7 @@ static int hns3_init_mac_addr(struct net_device *netdev)
        struct hns3_nic_priv *priv = netdev_priv(netdev);
        char format_mac_addr[HNAE3_FORMAT_MAC_ADDR_LEN];
        struct hnae3_handle *h = priv->ae_handle;
-       u8 mac_addr_temp[ETH_ALEN];
+       u8 mac_addr_temp[ETH_ALEN] = {0};
        int ret = 0;
 
        if (h->ae_algo->ops->get_mac_addr)
index 66e5807..5ea9e59 100644 (file)
@@ -61,6 +61,7 @@ static void hclge_sync_fd_table(struct hclge_dev *hdev);
 static void hclge_update_fec_stats(struct hclge_dev *hdev);
 static int hclge_mac_link_status_wait(struct hclge_dev *hdev, int link_ret,
                                      int wait_cnt);
+static int hclge_update_port_info(struct hclge_dev *hdev);
 
 static struct hnae3_ae_algo ae_algo;
 
@@ -3041,6 +3042,9 @@ static void hclge_update_link_status(struct hclge_dev *hdev)
 
        if (state != hdev->hw.mac.link) {
                hdev->hw.mac.link = state;
+               if (state == HCLGE_LINK_STATUS_UP)
+                       hclge_update_port_info(hdev);
+
                client->ops->link_status_change(handle, state);
                hclge_config_mac_tnl_int(hdev, state);
                if (rclient && rclient->ops->link_status_change)
@@ -10025,8 +10029,6 @@ static void hclge_rm_vport_vlan_table(struct hclge_vport *vport, u16 vlan_id,
        struct hclge_vport_vlan_cfg *vlan, *tmp;
        struct hclge_dev *hdev = vport->back;
 
-       mutex_lock(&hdev->vport_lock);
-
        list_for_each_entry_safe(vlan, tmp, &vport->vlan_list, node) {
                if (vlan->vlan_id == vlan_id) {
                        if (is_write_tbl && vlan->hd_tbl_status)
@@ -10041,8 +10043,6 @@ static void hclge_rm_vport_vlan_table(struct hclge_vport *vport, u16 vlan_id,
                        break;
                }
        }
-
-       mutex_unlock(&hdev->vport_lock);
 }
 
 void hclge_rm_vport_all_vlan_table(struct hclge_vport *vport, bool is_del_list)
@@ -10451,11 +10451,16 @@ int hclge_set_vlan_filter(struct hnae3_handle *handle, __be16 proto,
         * handle mailbox. Just record the vlan id, and remove it after
         * reset finished.
         */
+       mutex_lock(&hdev->vport_lock);
        if ((test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state) ||
             test_bit(HCLGE_STATE_RST_FAIL, &hdev->state)) && is_kill) {
                set_bit(vlan_id, vport->vlan_del_fail_bmap);
+               mutex_unlock(&hdev->vport_lock);
                return -EBUSY;
+       } else if (!is_kill && test_bit(vlan_id, vport->vlan_del_fail_bmap)) {
+               clear_bit(vlan_id, vport->vlan_del_fail_bmap);
        }
+       mutex_unlock(&hdev->vport_lock);
 
        /* when port base vlan enabled, we use port base vlan as the vlan
         * filter entry. In this case, we don't update vlan filter table
@@ -10470,17 +10475,22 @@ int hclge_set_vlan_filter(struct hnae3_handle *handle, __be16 proto,
        }
 
        if (!ret) {
-               if (!is_kill)
+               if (!is_kill) {
                        hclge_add_vport_vlan_table(vport, vlan_id,
                                                   writen_to_tbl);
-               else if (is_kill && vlan_id != 0)
+               } else if (is_kill && vlan_id != 0) {
+                       mutex_lock(&hdev->vport_lock);
                        hclge_rm_vport_vlan_table(vport, vlan_id, false);
+                       mutex_unlock(&hdev->vport_lock);
+               }
        } else if (is_kill) {
                /* when remove hw vlan filter failed, record the vlan id,
                 * and try to remove it from hw later, to be consistence
                 * with stack
                 */
+               mutex_lock(&hdev->vport_lock);
                set_bit(vlan_id, vport->vlan_del_fail_bmap);
+               mutex_unlock(&hdev->vport_lock);
        }
 
        hclge_set_vport_vlan_fltr_change(vport);
@@ -10520,6 +10530,7 @@ static void hclge_sync_vlan_filter(struct hclge_dev *hdev)
        int i, ret, sync_cnt = 0;
        u16 vlan_id;
 
+       mutex_lock(&hdev->vport_lock);
        /* start from vport 1 for PF is always alive */
        for (i = 0; i < hdev->num_alloc_vport; i++) {
                struct hclge_vport *vport = &hdev->vport[i];
@@ -10530,21 +10541,26 @@ static void hclge_sync_vlan_filter(struct hclge_dev *hdev)
                        ret = hclge_set_vlan_filter_hw(hdev, htons(ETH_P_8021Q),
                                                       vport->vport_id, vlan_id,
                                                       true);
-                       if (ret && ret != -EINVAL)
+                       if (ret && ret != -EINVAL) {
+                               mutex_unlock(&hdev->vport_lock);
                                return;
+                       }
 
                        clear_bit(vlan_id, vport->vlan_del_fail_bmap);
                        hclge_rm_vport_vlan_table(vport, vlan_id, false);
                        hclge_set_vport_vlan_fltr_change(vport);
 
                        sync_cnt++;
-                       if (sync_cnt >= HCLGE_MAX_SYNC_COUNT)
+                       if (sync_cnt >= HCLGE_MAX_SYNC_COUNT) {
+                               mutex_unlock(&hdev->vport_lock);
                                return;
+                       }
 
                        vlan_id = find_first_bit(vport->vlan_del_fail_bmap,
                                                 VLAN_N_VID);
                }
        }
+       mutex_unlock(&hdev->vport_lock);
 
        hclge_sync_vlan_fltr_state(hdev);
 }
@@ -11651,6 +11667,7 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
                goto err_msi_irq_uninit;
 
        if (hdev->hw.mac.media_type == HNAE3_MEDIA_TYPE_COPPER) {
+               clear_bit(HNAE3_DEV_SUPPORT_FEC_B, ae_dev->caps);
                if (hnae3_dev_phy_imp_supported(hdev))
                        ret = hclge_update_tp_port_info(hdev);
                else
index a4d68fb..0aa9bee 100644 (file)
@@ -1206,6 +1206,8 @@ static int hclgevf_set_vlan_filter(struct hnae3_handle *handle,
             test_bit(HCLGEVF_STATE_RST_FAIL, &hdev->state)) && is_kill) {
                set_bit(vlan_id, hdev->vlan_del_fail_bmap);
                return -EBUSY;
+       } else if (!is_kill && test_bit(vlan_id, hdev->vlan_del_fail_bmap)) {
+               clear_bit(vlan_id, hdev->vlan_del_fail_bmap);
        }
 
        hclgevf_build_send_msg(&send_msg, HCLGE_MBX_SET_VLAN,
@@ -1233,20 +1235,25 @@ static void hclgevf_sync_vlan_filter(struct hclgevf_dev *hdev)
        int ret, sync_cnt = 0;
        u16 vlan_id;
 
+       if (bitmap_empty(hdev->vlan_del_fail_bmap, VLAN_N_VID))
+               return;
+
+       rtnl_lock();
        vlan_id = find_first_bit(hdev->vlan_del_fail_bmap, VLAN_N_VID);
        while (vlan_id != VLAN_N_VID) {
                ret = hclgevf_set_vlan_filter(handle, htons(ETH_P_8021Q),
                                              vlan_id, true);
                if (ret)
-                       return;
+                       break;
 
                clear_bit(vlan_id, hdev->vlan_del_fail_bmap);
                sync_cnt++;
                if (sync_cnt >= HCLGEVF_MAX_SYNC_COUNT)
-                       return;
+                       break;
 
                vlan_id = find_first_bit(hdev->vlan_del_fail_bmap, VLAN_N_VID);
        }
+       rtnl_unlock();
 }
 
 static int hclgevf_en_hw_strip_rxvtag(struct hnae3_handle *handle, bool enable)
@@ -1974,8 +1981,18 @@ static enum hclgevf_evt_cause hclgevf_check_evt_cause(struct hclgevf_dev *hdev,
        return HCLGEVF_VECTOR0_EVENT_OTHER;
 }
 
+static void hclgevf_reset_timer(struct timer_list *t)
+{
+       struct hclgevf_dev *hdev = from_timer(hdev, t, reset_timer);
+
+       hclgevf_clear_event_cause(hdev, HCLGEVF_VECTOR0_EVENT_RST);
+       hclgevf_reset_task_schedule(hdev);
+}
+
 static irqreturn_t hclgevf_misc_irq_handle(int irq, void *data)
 {
+#define HCLGEVF_RESET_DELAY    5
+
        enum hclgevf_evt_cause event_cause;
        struct hclgevf_dev *hdev = data;
        u32 clearval;
@@ -1987,7 +2004,8 @@ static irqreturn_t hclgevf_misc_irq_handle(int irq, void *data)
 
        switch (event_cause) {
        case HCLGEVF_VECTOR0_EVENT_RST:
-               hclgevf_reset_task_schedule(hdev);
+               mod_timer(&hdev->reset_timer,
+                         jiffies + msecs_to_jiffies(HCLGEVF_RESET_DELAY));
                break;
        case HCLGEVF_VECTOR0_EVENT_MBX:
                hclgevf_mbx_handler(hdev);
@@ -2930,6 +2948,7 @@ static int hclgevf_init_hdev(struct hclgevf_dev *hdev)
                 HCLGEVF_DRIVER_NAME);
 
        hclgevf_task_schedule(hdev, round_jiffies_relative(HZ));
+       timer_setup(&hdev->reset_timer, hclgevf_reset_timer, 0);
 
        return 0;
 
index 81c16b8..a73f2bf 100644 (file)
@@ -219,6 +219,7 @@ struct hclgevf_dev {
        enum hnae3_reset_type reset_level;
        unsigned long reset_pending;
        enum hnae3_reset_type reset_type;
+       struct timer_list reset_timer;
 
 #define HCLGEVF_RESET_REQUESTED                0
 #define HCLGEVF_RESET_PENDING          1
index bbf7b14..85c2a63 100644 (file)
@@ -63,6 +63,9 @@ static int hclgevf_get_mbx_resp(struct hclgevf_dev *hdev, u16 code0, u16 code1,
                i++;
        }
 
+       /* ensure additional_info will be seen after received_resp */
+       smp_rmb();
+
        if (i >= HCLGEVF_MAX_TRY_TIMES) {
                dev_err(&hdev->pdev->dev,
                        "VF could not get mbx(%u,%u) resp(=%d) from PF in %d tries\n",
@@ -178,6 +181,10 @@ static void hclgevf_handle_mbx_response(struct hclgevf_dev *hdev,
        resp->resp_status = hclgevf_resp_to_errno(resp_status);
        memcpy(resp->additional_info, req->msg.resp_data,
               HCLGE_MBX_MAX_RESP_DATA_SIZE * sizeof(u8));
+
+       /* ensure additional_info will be seen before setting received_resp */
+       smp_wmb();
+
        if (match_id) {
                /* If match_id is not zero, it means PF support match_id.
                 * if the match_id is right, VF get the right response, or
index 08d7edc..3f99eb1 100644 (file)
@@ -3844,7 +3844,7 @@ static int i40e_vc_add_cloud_filter(struct i40e_vf *vf, u8 *msg)
        struct i40e_pf *pf = vf->pf;
        struct i40e_vsi *vsi = NULL;
        int aq_ret = 0;
-       int i, ret;
+       int i;
 
        if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) {
                aq_ret = -EINVAL;
@@ -3868,8 +3868,10 @@ static int i40e_vc_add_cloud_filter(struct i40e_vf *vf, u8 *msg)
        }
 
        cfilter = kzalloc(sizeof(*cfilter), GFP_KERNEL);
-       if (!cfilter)
-               return -ENOMEM;
+       if (!cfilter) {
+               aq_ret = -ENOMEM;
+               goto err_out;
+       }
 
        /* parse destination mac address */
        for (i = 0; i < ETH_ALEN; i++)
@@ -3917,13 +3919,13 @@ static int i40e_vc_add_cloud_filter(struct i40e_vf *vf, u8 *msg)
 
        /* Adding cloud filter programmed as TC filter */
        if (tcf.dst_port)
-               ret = i40e_add_del_cloud_filter_big_buf(vsi, cfilter, true);
+               aq_ret = i40e_add_del_cloud_filter_big_buf(vsi, cfilter, true);
        else
-               ret = i40e_add_del_cloud_filter(vsi, cfilter, true);
-       if (ret) {
+               aq_ret = i40e_add_del_cloud_filter(vsi, cfilter, true);
+       if (aq_ret) {
                dev_err(&pf->pdev->dev,
                        "VF %d: Failed to add cloud filter, err %pe aq_err %s\n",
-                       vf->vf_id, ERR_PTR(ret),
+                       vf->vf_id, ERR_PTR(aq_ret),
                        i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
                goto err_free;
        }
index cfb1580..8b7504a 100644 (file)
@@ -1479,14 +1479,14 @@ ice_post_dwnld_pkg_actions(struct ice_hw *hw)
 }
 
 /**
- * ice_download_pkg
+ * ice_download_pkg_with_sig_seg
  * @hw: pointer to the hardware structure
  * @pkg_hdr: pointer to package header
  *
  * Handles the download of a complete package.
  */
 static enum ice_ddp_state
-ice_download_pkg(struct ice_hw *hw, struct ice_pkg_hdr *pkg_hdr)
+ice_download_pkg_with_sig_seg(struct ice_hw *hw, struct ice_pkg_hdr *pkg_hdr)
 {
        enum ice_aq_err aq_err = hw->adminq.sq_last_status;
        enum ice_ddp_state state = ICE_DDP_PKG_ERR;
@@ -1519,6 +1519,103 @@ ice_download_pkg(struct ice_hw *hw, struct ice_pkg_hdr *pkg_hdr)
                state = ice_post_dwnld_pkg_actions(hw);
 
        ice_release_global_cfg_lock(hw);
+
+       return state;
+}
+
+/**
+ * ice_dwnld_cfg_bufs
+ * @hw: pointer to the hardware structure
+ * @bufs: pointer to an array of buffers
+ * @count: the number of buffers in the array
+ *
+ * Obtains global config lock and downloads the package configuration buffers
+ * to the firmware.
+ */
+static enum ice_ddp_state
+ice_dwnld_cfg_bufs(struct ice_hw *hw, struct ice_buf *bufs, u32 count)
+{
+       enum ice_ddp_state state;
+       struct ice_buf_hdr *bh;
+       int status;
+
+       if (!bufs || !count)
+               return ICE_DDP_PKG_ERR;
+
+       /* If the first buffer's first section has its metadata bit set
+        * then there are no buffers to be downloaded, and the operation is
+        * considered a success.
+        */
+       bh = (struct ice_buf_hdr *)bufs;
+       if (le32_to_cpu(bh->section_entry[0].type) & ICE_METADATA_BUF)
+               return ICE_DDP_PKG_SUCCESS;
+
+       status = ice_acquire_global_cfg_lock(hw, ICE_RES_WRITE);
+       if (status) {
+               if (status == -EALREADY)
+                       return ICE_DDP_PKG_ALREADY_LOADED;
+               return ice_map_aq_err_to_ddp_state(hw->adminq.sq_last_status);
+       }
+
+       state = ice_dwnld_cfg_bufs_no_lock(hw, bufs, 0, count, true);
+       if (!state)
+               state = ice_post_dwnld_pkg_actions(hw);
+
+       ice_release_global_cfg_lock(hw);
+
+       return state;
+}
+
+/**
+ * ice_download_pkg_without_sig_seg
+ * @hw: pointer to the hardware structure
+ * @ice_seg: pointer to the segment of the package to be downloaded
+ *
+ * Handles the download of a complete package without signature segment.
+ */
+static enum ice_ddp_state
+ice_download_pkg_without_sig_seg(struct ice_hw *hw, struct ice_seg *ice_seg)
+{
+       struct ice_buf_table *ice_buf_tbl;
+
+       ice_debug(hw, ICE_DBG_PKG, "Segment format version: %d.%d.%d.%d\n",
+                 ice_seg->hdr.seg_format_ver.major,
+                 ice_seg->hdr.seg_format_ver.minor,
+                 ice_seg->hdr.seg_format_ver.update,
+                 ice_seg->hdr.seg_format_ver.draft);
+
+       ice_debug(hw, ICE_DBG_PKG, "Seg: type 0x%X, size %d, name %s\n",
+                 le32_to_cpu(ice_seg->hdr.seg_type),
+                 le32_to_cpu(ice_seg->hdr.seg_size), ice_seg->hdr.seg_id);
+
+       ice_buf_tbl = ice_find_buf_table(ice_seg);
+
+       ice_debug(hw, ICE_DBG_PKG, "Seg buf count: %d\n",
+                 le32_to_cpu(ice_buf_tbl->buf_count));
+
+       return ice_dwnld_cfg_bufs(hw, ice_buf_tbl->buf_array,
+                                 le32_to_cpu(ice_buf_tbl->buf_count));
+}
+
+/**
+ * ice_download_pkg
+ * @hw: pointer to the hardware structure
+ * @pkg_hdr: pointer to package header
+ * @ice_seg: pointer to the segment of the package to be downloaded
+ *
+ * Handles the download of a complete package.
+ */
+static enum ice_ddp_state
+ice_download_pkg(struct ice_hw *hw, struct ice_pkg_hdr *pkg_hdr,
+                struct ice_seg *ice_seg)
+{
+       enum ice_ddp_state state;
+
+       if (hw->pkg_has_signing_seg)
+               state = ice_download_pkg_with_sig_seg(hw, pkg_hdr);
+       else
+               state = ice_download_pkg_without_sig_seg(hw, ice_seg);
+
        ice_post_pkg_dwnld_vlan_mode_cfg(hw);
 
        return state;
@@ -2083,7 +2180,7 @@ enum ice_ddp_state ice_init_pkg(struct ice_hw *hw, u8 *buf, u32 len)
 
        /* initialize package hints and then download package */
        ice_init_pkg_hints(hw, seg);
-       state = ice_download_pkg(hw, pkg);
+       state = ice_download_pkg(hw, pkg, seg);
        if (state == ICE_DDP_PKG_ALREADY_LOADED) {
                ice_debug(hw, ICE_DBG_INIT,
                          "package previously loaded - no work.\n");
index 835c419..86b180c 100644 (file)
@@ -815,12 +815,6 @@ ice_dpll_input_prio_set(const struct dpll_pin *pin, void *pin_priv,
        struct ice_pf *pf = d->pf;
        int ret;
 
-       if (prio > ICE_DPLL_PRIO_MAX) {
-               NL_SET_ERR_MSG_FMT(extack, "prio out of supported range 0-%d",
-                                  ICE_DPLL_PRIO_MAX);
-               return -EINVAL;
-       }
-
        mutex_lock(&pf->dplls.lock);
        ret = ice_dpll_hw_input_prio_set(pf, d, p, prio, extack);
        mutex_unlock(&pf->dplls.lock);
@@ -1756,6 +1750,7 @@ ice_dpll_init_dpll(struct ice_pf *pf, struct ice_dpll *d, bool cgu,
        }
        d->pf = pf;
        if (cgu) {
+               ice_dpll_update_state(pf, d, true);
                ret = dpll_device_register(d->dpll, type, &ice_dpll_ops, d);
                if (ret) {
                        dpll_device_put(d->dpll);
@@ -1796,8 +1791,6 @@ static int ice_dpll_init_worker(struct ice_pf *pf)
        struct ice_dplls *d = &pf->dplls;
        struct kthread_worker *kworker;
 
-       ice_dpll_update_state(pf, &d->eec, true);
-       ice_dpll_update_state(pf, &d->pps, true);
        kthread_init_delayed_work(&d->work, ice_dpll_periodic_work);
        kworker = kthread_create_worker(0, "ice-dplls-%s",
                                        dev_name(ice_pf_to_dev(pf)));
@@ -1830,6 +1823,7 @@ ice_dpll_init_info_direct_pins(struct ice_pf *pf,
        int num_pins, i, ret = -EINVAL;
        struct ice_hw *hw = &pf->hw;
        struct ice_dpll_pin *pins;
+       unsigned long caps;
        u8 freq_supp_num;
        bool input;
 
@@ -1849,6 +1843,7 @@ ice_dpll_init_info_direct_pins(struct ice_pf *pf,
        }
 
        for (i = 0; i < num_pins; i++) {
+               caps = 0;
                pins[i].idx = i;
                pins[i].prop.board_label = ice_cgu_get_pin_name(hw, i, input);
                pins[i].prop.type = ice_cgu_get_pin_type(hw, i, input);
@@ -1861,8 +1856,8 @@ ice_dpll_init_info_direct_pins(struct ice_pf *pf,
                                                      &dp->input_prio[i]);
                        if (ret)
                                return ret;
-                       pins[i].prop.capabilities |=
-                               DPLL_PIN_CAPABILITIES_PRIORITY_CAN_CHANGE;
+                       caps |= (DPLL_PIN_CAPABILITIES_PRIORITY_CAN_CHANGE |
+                                DPLL_PIN_CAPABILITIES_STATE_CAN_CHANGE);
                        pins[i].prop.phase_range.min =
                                pf->dplls.input_phase_adj_max;
                        pins[i].prop.phase_range.max =
@@ -1872,9 +1867,11 @@ ice_dpll_init_info_direct_pins(struct ice_pf *pf,
                                pf->dplls.output_phase_adj_max;
                        pins[i].prop.phase_range.max =
                                -pf->dplls.output_phase_adj_max;
+                       ret = ice_cgu_get_output_pin_state_caps(hw, i, &caps);
+                       if (ret)
+                               return ret;
                }
-               pins[i].prop.capabilities |=
-                       DPLL_PIN_CAPABILITIES_STATE_CAN_CHANGE;
+               pins[i].prop.capabilities = caps;
                ret = ice_dpll_pin_state_update(pf, &pins[i], pin_type, NULL);
                if (ret)
                        return ret;
index bb32b6d..93172e9 100644 (file)
@@ -6,7 +6,6 @@
 
 #include "ice.h"
 
-#define ICE_DPLL_PRIO_MAX      0xF
 #define ICE_DPLL_RCLK_NUM_MAX  4
 
 /** ice_dpll_pin - store info about pins
index 6607fa6..fb9c93f 100644 (file)
@@ -7401,15 +7401,6 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type)
                goto err_vsi_rebuild;
        }
 
-       /* configure PTP timestamping after VSI rebuild */
-       if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags)) {
-               if (pf->ptp.tx_interrupt_mode == ICE_PTP_TX_INTERRUPT_SELF)
-                       ice_ptp_cfg_timestamp(pf, false);
-               else if (pf->ptp.tx_interrupt_mode == ICE_PTP_TX_INTERRUPT_ALL)
-                       /* for E82x PHC owner always need to have interrupts */
-                       ice_ptp_cfg_timestamp(pf, true);
-       }
-
        err = ice_vsi_rebuild_by_type(pf, ICE_VSI_SWITCHDEV_CTRL);
        if (err) {
                dev_err(dev, "Switchdev CTRL VSI rebuild failed: %d\n", err);
@@ -7461,6 +7452,9 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type)
        ice_plug_aux_dev(pf);
        if (ice_is_feature_supported(pf, ICE_F_SRIOV_LAG))
                ice_lag_rebuild(pf);
+
+       /* Restore timestamp mode settings after VSI rebuild */
+       ice_ptp_restore_timestamp_mode(pf);
        return;
 
 err_vsi_rebuild:
index 1eddcbe..71f405f 100644 (file)
@@ -256,48 +256,42 @@ ice_verify_pin_e810t(struct ptp_clock_info *info, unsigned int pin,
 }
 
 /**
- * ice_ptp_configure_tx_tstamp - Enable or disable Tx timestamp interrupt
- * @pf: The PF pointer to search in
- * @on: bool value for whether timestamp interrupt is enabled or disabled
+ * ice_ptp_cfg_tx_interrupt - Configure Tx timestamp interrupt for the device
+ * @pf: Board private structure
+ *
+ * Program the device to respond appropriately to the Tx timestamp interrupt
+ * cause.
  */
-static void ice_ptp_configure_tx_tstamp(struct ice_pf *pf, bool on)
+static void ice_ptp_cfg_tx_interrupt(struct ice_pf *pf)
 {
+       struct ice_hw *hw = &pf->hw;
+       bool enable;
        u32 val;
 
+       switch (pf->ptp.tx_interrupt_mode) {
+       case ICE_PTP_TX_INTERRUPT_ALL:
+               /* React to interrupts across all quads. */
+               wr32(hw, PFINT_TSYN_MSK + (0x4 * hw->pf_id), (u32)0x1f);
+               enable = true;
+               break;
+       case ICE_PTP_TX_INTERRUPT_NONE:
+               /* Do not react to interrupts on any quad. */
+               wr32(hw, PFINT_TSYN_MSK + (0x4 * hw->pf_id), (u32)0x0);
+               enable = false;
+               break;
+       case ICE_PTP_TX_INTERRUPT_SELF:
+       default:
+               enable = pf->ptp.tstamp_config.tx_type == HWTSTAMP_TX_ON;
+               break;
+       }
+
        /* Configure the Tx timestamp interrupt */
-       val = rd32(&pf->hw, PFINT_OICR_ENA);
-       if (on)
+       val = rd32(hw, PFINT_OICR_ENA);
+       if (enable)
                val |= PFINT_OICR_TSYN_TX_M;
        else
                val &= ~PFINT_OICR_TSYN_TX_M;
-       wr32(&pf->hw, PFINT_OICR_ENA, val);
-}
-
-/**
- * ice_set_tx_tstamp - Enable or disable Tx timestamping
- * @pf: The PF pointer to search in
- * @on: bool value for whether timestamps are enabled or disabled
- */
-static void ice_set_tx_tstamp(struct ice_pf *pf, bool on)
-{
-       struct ice_vsi *vsi;
-       u16 i;
-
-       vsi = ice_get_main_vsi(pf);
-       if (!vsi)
-               return;
-
-       /* Set the timestamp enable flag for all the Tx rings */
-       ice_for_each_txq(vsi, i) {
-               if (!vsi->tx_rings[i])
-                       continue;
-               vsi->tx_rings[i]->ptp_tx = on;
-       }
-
-       if (pf->ptp.tx_interrupt_mode == ICE_PTP_TX_INTERRUPT_SELF)
-               ice_ptp_configure_tx_tstamp(pf, on);
-
-       pf->ptp.tstamp_config.tx_type = on ? HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF;
+       wr32(hw, PFINT_OICR_ENA, val);
 }
 
 /**
@@ -311,7 +305,7 @@ static void ice_set_rx_tstamp(struct ice_pf *pf, bool on)
        u16 i;
 
        vsi = ice_get_main_vsi(pf);
-       if (!vsi)
+       if (!vsi || !vsi->rx_rings)
                return;
 
        /* Set the timestamp flag for all the Rx rings */
@@ -320,23 +314,50 @@ static void ice_set_rx_tstamp(struct ice_pf *pf, bool on)
                        continue;
                vsi->rx_rings[i]->ptp_rx = on;
        }
+}
+
+/**
+ * ice_ptp_disable_timestamp_mode - Disable current timestamp mode
+ * @pf: Board private structure
+ *
+ * Called during preparation for reset to temporarily disable timestamping on
+ * the device. Called during remove to disable timestamping while cleaning up
+ * driver resources.
+ */
+static void ice_ptp_disable_timestamp_mode(struct ice_pf *pf)
+{
+       struct ice_hw *hw = &pf->hw;
+       u32 val;
+
+       val = rd32(hw, PFINT_OICR_ENA);
+       val &= ~PFINT_OICR_TSYN_TX_M;
+       wr32(hw, PFINT_OICR_ENA, val);
 
-       pf->ptp.tstamp_config.rx_filter = on ? HWTSTAMP_FILTER_ALL :
-                                              HWTSTAMP_FILTER_NONE;
+       ice_set_rx_tstamp(pf, false);
 }
 
 /**
- * ice_ptp_cfg_timestamp - Configure timestamp for init/deinit
+ * ice_ptp_restore_timestamp_mode - Restore timestamp configuration
  * @pf: Board private structure
- * @ena: bool value to enable or disable time stamp
  *
- * This function will configure timestamping during PTP initialization
- * and deinitialization
+ * Called at the end of rebuild to restore timestamp configuration after
+ * a device reset.
  */
-void ice_ptp_cfg_timestamp(struct ice_pf *pf, bool ena)
+void ice_ptp_restore_timestamp_mode(struct ice_pf *pf)
 {
-       ice_set_tx_tstamp(pf, ena);
-       ice_set_rx_tstamp(pf, ena);
+       struct ice_hw *hw = &pf->hw;
+       bool enable_rx;
+
+       ice_ptp_cfg_tx_interrupt(pf);
+
+       enable_rx = pf->ptp.tstamp_config.rx_filter == HWTSTAMP_FILTER_ALL;
+       ice_set_rx_tstamp(pf, enable_rx);
+
+       /* Trigger an immediate software interrupt to ensure that timestamps
+        * which occurred during reset are handled now.
+        */
+       wr32(hw, PFINT_OICR, PFINT_OICR_TSYN_TX_M);
+       ice_flush(hw);
 }
 
 /**
@@ -2037,10 +2058,10 @@ ice_ptp_set_timestamp_mode(struct ice_pf *pf, struct hwtstamp_config *config)
 {
        switch (config->tx_type) {
        case HWTSTAMP_TX_OFF:
-               ice_set_tx_tstamp(pf, false);
+               pf->ptp.tstamp_config.tx_type = HWTSTAMP_TX_OFF;
                break;
        case HWTSTAMP_TX_ON:
-               ice_set_tx_tstamp(pf, true);
+               pf->ptp.tstamp_config.tx_type = HWTSTAMP_TX_ON;
                break;
        default:
                return -ERANGE;
@@ -2048,7 +2069,7 @@ ice_ptp_set_timestamp_mode(struct ice_pf *pf, struct hwtstamp_config *config)
 
        switch (config->rx_filter) {
        case HWTSTAMP_FILTER_NONE:
-               ice_set_rx_tstamp(pf, false);
+               pf->ptp.tstamp_config.rx_filter = HWTSTAMP_FILTER_NONE;
                break;
        case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
        case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
@@ -2064,12 +2085,15 @@ ice_ptp_set_timestamp_mode(struct ice_pf *pf, struct hwtstamp_config *config)
        case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
        case HWTSTAMP_FILTER_NTP_ALL:
        case HWTSTAMP_FILTER_ALL:
-               ice_set_rx_tstamp(pf, true);
+               pf->ptp.tstamp_config.rx_filter = HWTSTAMP_FILTER_ALL;
                break;
        default:
                return -ERANGE;
        }
 
+       /* Immediately update the device timestamping mode */
+       ice_ptp_restore_timestamp_mode(pf);
+
        return 0;
 }
 
@@ -2737,7 +2761,7 @@ void ice_ptp_prepare_for_reset(struct ice_pf *pf)
        clear_bit(ICE_FLAG_PTP, pf->flags);
 
        /* Disable timestamping for both Tx and Rx */
-       ice_ptp_cfg_timestamp(pf, false);
+       ice_ptp_disable_timestamp_mode(pf);
 
        kthread_cancel_delayed_work_sync(&ptp->work);
 
@@ -2803,15 +2827,7 @@ static int ice_ptp_init_owner(struct ice_pf *pf)
        /* Release the global hardware lock */
        ice_ptp_unlock(hw);
 
-       if (pf->ptp.tx_interrupt_mode == ICE_PTP_TX_INTERRUPT_ALL) {
-               /* The clock owner for this device type handles the timestamp
-                * interrupt for all ports.
-                */
-               ice_ptp_configure_tx_tstamp(pf, true);
-
-               /* React on all quads interrupts for E82x */
-               wr32(hw, PFINT_TSYN_MSK + (0x4 * hw->pf_id), (u32)0x1f);
-
+       if (!ice_is_e810(hw)) {
                /* Enable quad interrupts */
                err = ice_ptp_tx_ena_intr(pf, true, itr);
                if (err)
@@ -2881,13 +2897,6 @@ static int ice_ptp_init_port(struct ice_pf *pf, struct ice_ptp_port *ptp_port)
        case ICE_PHY_E810:
                return ice_ptp_init_tx_e810(pf, &ptp_port->tx);
        case ICE_PHY_E822:
-               /* Non-owner PFs don't react to any interrupts on E82x,
-                * neither on own quad nor on others
-                */
-               if (!ice_ptp_pf_handles_tx_interrupt(pf)) {
-                       ice_ptp_configure_tx_tstamp(pf, false);
-                       wr32(hw, PFINT_TSYN_MSK + (0x4 * hw->pf_id), (u32)0x0);
-               }
                kthread_init_delayed_work(&ptp_port->ov_work,
                                          ice_ptp_wait_for_offsets);
 
@@ -3032,6 +3041,9 @@ void ice_ptp_init(struct ice_pf *pf)
        /* Start the PHY timestamping block */
        ice_ptp_reset_phy_timestamping(pf);
 
+       /* Configure initial Tx interrupt settings */
+       ice_ptp_cfg_tx_interrupt(pf);
+
        set_bit(ICE_FLAG_PTP, pf->flags);
        err = ice_ptp_init_work(pf, ptp);
        if (err)
@@ -3067,7 +3079,7 @@ void ice_ptp_release(struct ice_pf *pf)
                return;
 
        /* Disable timestamping for both Tx and Rx */
-       ice_ptp_cfg_timestamp(pf, false);
+       ice_ptp_disable_timestamp_mode(pf);
 
        ice_ptp_remove_auxbus_device(pf);
 
index 8f6f943..06a3308 100644 (file)
@@ -292,7 +292,7 @@ int ice_ptp_clock_index(struct ice_pf *pf);
 struct ice_pf;
 int ice_ptp_set_ts_config(struct ice_pf *pf, struct ifreq *ifr);
 int ice_ptp_get_ts_config(struct ice_pf *pf, struct ifreq *ifr);
-void ice_ptp_cfg_timestamp(struct ice_pf *pf, bool ena);
+void ice_ptp_restore_timestamp_mode(struct ice_pf *pf);
 
 void ice_ptp_extts_event(struct ice_pf *pf);
 s8 ice_ptp_request_ts(struct ice_ptp_tx *tx, struct sk_buff *skb);
@@ -317,8 +317,7 @@ static inline int ice_ptp_get_ts_config(struct ice_pf *pf, struct ifreq *ifr)
        return -EOPNOTSUPP;
 }
 
-static inline void ice_ptp_cfg_timestamp(struct ice_pf *pf, bool ena) { }
-
+static inline void ice_ptp_restore_timestamp_mode(struct ice_pf *pf) { }
 static inline void ice_ptp_extts_event(struct ice_pf *pf) { }
 static inline s8
 ice_ptp_request_ts(struct ice_ptp_tx *tx, struct sk_buff *skb)
index 6d57390..a00b55e 100644 (file)
@@ -3961,3 +3961,57 @@ int ice_get_cgu_rclk_pin_info(struct ice_hw *hw, u8 *base_idx, u8 *pin_num)
 
        return ret;
 }
+
+/**
+ * ice_cgu_get_output_pin_state_caps - get output pin state capabilities
+ * @hw: pointer to the hw struct
+ * @pin_id: id of a pin
+ * @caps: capabilities to modify
+ *
+ * Return:
+ * * 0 - success, state capabilities were modified
+ * * negative - failure, capabilities were not modified
+ */
+int ice_cgu_get_output_pin_state_caps(struct ice_hw *hw, u8 pin_id,
+                                     unsigned long *caps)
+{
+       bool can_change = true;
+
+       switch (hw->device_id) {
+       case ICE_DEV_ID_E810C_SFP:
+               if (pin_id == ZL_OUT2 || pin_id == ZL_OUT3)
+                       can_change = false;
+               break;
+       case ICE_DEV_ID_E810C_QSFP:
+               if (pin_id == ZL_OUT2 || pin_id == ZL_OUT3 || pin_id == ZL_OUT4)
+                       can_change = false;
+               break;
+       case ICE_DEV_ID_E823L_10G_BASE_T:
+       case ICE_DEV_ID_E823L_1GBE:
+       case ICE_DEV_ID_E823L_BACKPLANE:
+       case ICE_DEV_ID_E823L_QSFP:
+       case ICE_DEV_ID_E823L_SFP:
+       case ICE_DEV_ID_E823C_10G_BASE_T:
+       case ICE_DEV_ID_E823C_BACKPLANE:
+       case ICE_DEV_ID_E823C_QSFP:
+       case ICE_DEV_ID_E823C_SFP:
+       case ICE_DEV_ID_E823C_SGMII:
+               if (hw->cgu_part_number ==
+                   ICE_AQC_GET_LINK_TOPO_NODE_NR_ZL30632_80032 &&
+                   pin_id == ZL_OUT2)
+                       can_change = false;
+               else if (hw->cgu_part_number ==
+                        ICE_AQC_GET_LINK_TOPO_NODE_NR_SI5383_5384 &&
+                        pin_id == SI_OUT1)
+                       can_change = false;
+               break;
+       default:
+               return -EINVAL;
+       }
+       if (can_change)
+               *caps |= DPLL_PIN_CAPABILITIES_STATE_CAN_CHANGE;
+       else
+               *caps &= ~DPLL_PIN_CAPABILITIES_STATE_CAN_CHANGE;
+
+       return 0;
+}
index 36aeeef..cf76701 100644 (file)
@@ -282,6 +282,8 @@ int ice_get_cgu_state(struct ice_hw *hw, u8 dpll_idx,
 int ice_get_cgu_rclk_pin_info(struct ice_hw *hw, u8 *base_idx, u8 *pin_num);
 
 void ice_ptp_init_phy_model(struct ice_hw *hw);
+int ice_cgu_get_output_pin_state_caps(struct ice_hw *hw, u8 pin_id,
+                                     unsigned long *caps);
 
 #define PFTSYN_SEM_BYTES       4
 
index 52d0a12..9e97ea8 100644 (file)
@@ -2306,9 +2306,6 @@ ice_tstamp(struct ice_tx_ring *tx_ring, struct sk_buff *skb,
        if (likely(!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)))
                return;
 
-       if (!tx_ring->ptp_tx)
-               return;
-
        /* Tx timestamps cannot be sampled when doing TSO */
        if (first->tx_flags & ICE_TX_FLAGS_TSO)
                return;
index 166413f..daf7b9d 100644 (file)
@@ -380,7 +380,6 @@ struct ice_tx_ring {
 #define ICE_TX_FLAGS_RING_VLAN_L2TAG2  BIT(2)
        u8 flags;
        u8 dcb_tc;                      /* Traffic class of ring */
-       u8 ptp_tx;
 } ____cacheline_internodealigned_in_smp;
 
 static inline bool ice_ring_uses_build_skb(struct ice_rx_ring *ring)
index 9081713..29aac32 100644 (file)
@@ -4790,14 +4790,17 @@ static void mvneta_ethtool_get_strings(struct net_device *netdev, u32 sset,
                                       u8 *data)
 {
        if (sset == ETH_SS_STATS) {
+               struct mvneta_port *pp = netdev_priv(netdev);
                int i;
 
                for (i = 0; i < ARRAY_SIZE(mvneta_statistics); i++)
                        memcpy(data + i * ETH_GSTRING_LEN,
                               mvneta_statistics[i].name, ETH_GSTRING_LEN);
 
-               data += ETH_GSTRING_LEN * ARRAY_SIZE(mvneta_statistics);
-               page_pool_ethtool_stats_get_strings(data);
+               if (!pp->bm_priv) {
+                       data += ETH_GSTRING_LEN * ARRAY_SIZE(mvneta_statistics);
+                       page_pool_ethtool_stats_get_strings(data);
+               }
        }
 }
 
@@ -4915,8 +4918,10 @@ static void mvneta_ethtool_pp_stats(struct mvneta_port *pp, u64 *data)
        struct page_pool_stats stats = {};
        int i;
 
-       for (i = 0; i < rxq_number; i++)
-               page_pool_get_stats(pp->rxqs[i].page_pool, &stats);
+       for (i = 0; i < rxq_number; i++) {
+               if (pp->rxqs[i].page_pool)
+                       page_pool_get_stats(pp->rxqs[i].page_pool, &stats);
+       }
 
        page_pool_ethtool_stats_get(data, &stats);
 }
@@ -4932,14 +4937,21 @@ static void mvneta_ethtool_get_stats(struct net_device *dev,
        for (i = 0; i < ARRAY_SIZE(mvneta_statistics); i++)
                *data++ = pp->ethtool_stats[i];
 
-       mvneta_ethtool_pp_stats(pp, data);
+       if (!pp->bm_priv)
+               mvneta_ethtool_pp_stats(pp, data);
 }
 
 static int mvneta_ethtool_get_sset_count(struct net_device *dev, int sset)
 {
-       if (sset == ETH_SS_STATS)
-               return ARRAY_SIZE(mvneta_statistics) +
-                      page_pool_ethtool_stats_get_count();
+       if (sset == ETH_SS_STATS) {
+               int count = ARRAY_SIZE(mvneta_statistics);
+               struct mvneta_port *pp = netdev_priv(dev);
+
+               if (!pp->bm_priv)
+                       count += page_pool_ethtool_stats_get_count();
+
+               return count;
+       }
 
        return -EOPNOTSUPP;
 }
index 4762dbe..97a71e9 100644 (file)
@@ -1088,6 +1088,7 @@ int otx2_add_flow(struct otx2_nic *pfvf, struct ethtool_rxnfc *nfc)
        struct ethhdr *eth_hdr;
        bool new = false;
        int err = 0;
+       u64 vf_num;
        u32 ring;
 
        if (!flow_cfg->max_flows) {
@@ -1100,7 +1101,21 @@ int otx2_add_flow(struct otx2_nic *pfvf, struct ethtool_rxnfc *nfc)
        if (!(pfvf->flags & OTX2_FLAG_NTUPLE_SUPPORT))
                return -ENOMEM;
 
-       if (ring >= pfvf->hw.rx_queues && fsp->ring_cookie != RX_CLS_FLOW_DISC)
+       /* Number of queues on a VF can be greater or less than
+        * the PF's queue. Hence no need to check for the
+        * queue count. Hence no need to check queue count if PF
+        * is installing for its VF. Below is the expected vf_num value
+        * based on the ethtool commands.
+        *
+        * e.g.
+        * 1. ethtool -U <netdev> ... action -1  ==> vf_num:255
+        * 2. ethtool -U <netdev> ... action <queue_num>  ==> vf_num:0
+        * 3. ethtool -U <netdev> ... vf <vf_idx> queue <queue_num>  ==>
+        *    vf_num:vf_idx+1
+        */
+       vf_num = ethtool_get_flow_spec_ring_vf(fsp->ring_cookie);
+       if (!is_otx2_vf(pfvf->pcifunc) && !vf_num &&
+           ring >= pfvf->hw.rx_queues && fsp->ring_cookie != RX_CLS_FLOW_DISC)
                return -EINVAL;
 
        if (fsp->location >= otx2_get_maxflows(flow_cfg))
@@ -1182,6 +1197,9 @@ int otx2_add_flow(struct otx2_nic *pfvf, struct ethtool_rxnfc *nfc)
                flow_cfg->nr_flows++;
        }
 
+       if (flow->is_vf)
+               netdev_info(pfvf->netdev,
+                           "Make sure that VF's queue number is within its queue limit\n");
        return 0;
 }
 
index 91b99fd..ba95ac9 100644 (file)
@@ -1934,6 +1934,8 @@ int otx2_stop(struct net_device *netdev)
        /* Clear RSS enable flag */
        rss = &pf->hw.rss_info;
        rss->enable = false;
+       if (!netif_is_rxfh_configured(netdev))
+               kfree(rss->rss_ctx[DEFAULT_RSS_CONTEXT_GROUP]);
 
        /* Cleanup Queue IRQ */
        vec = pci_irq_vector(pf->pdev,
index bb11e64..af3928e 100644 (file)
@@ -177,6 +177,8 @@ static void mlx5e_ptpsq_mark_ts_cqes_undelivered(struct mlx5e_ptpsq *ptpsq,
 
 static void mlx5e_ptp_handle_ts_cqe(struct mlx5e_ptpsq *ptpsq,
                                    struct mlx5_cqe64 *cqe,
+                                   u8 *md_buff,
+                                   u8 *md_buff_sz,
                                    int budget)
 {
        struct mlx5e_ptp_port_ts_cqe_list *pending_cqe_list = ptpsq->ts_cqe_pending_list;
@@ -211,19 +213,24 @@ static void mlx5e_ptp_handle_ts_cqe(struct mlx5e_ptpsq *ptpsq,
        mlx5e_ptpsq_mark_ts_cqes_undelivered(ptpsq, hwtstamp);
 out:
        napi_consume_skb(skb, budget);
-       mlx5e_ptp_metadata_fifo_push(&ptpsq->metadata_freelist, metadata_id);
+       md_buff[*md_buff_sz++] = metadata_id;
        if (unlikely(mlx5e_ptp_metadata_map_unhealthy(&ptpsq->metadata_map)) &&
            !test_and_set_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state))
                queue_work(ptpsq->txqsq.priv->wq, &ptpsq->report_unhealthy_work);
 }
 
-static bool mlx5e_ptp_poll_ts_cq(struct mlx5e_cq *cq, int budget)
+static bool mlx5e_ptp_poll_ts_cq(struct mlx5e_cq *cq, int napi_budget)
 {
        struct mlx5e_ptpsq *ptpsq = container_of(cq, struct mlx5e_ptpsq, ts_cq);
-       struct mlx5_cqwq *cqwq = &cq->wq;
+       int budget = min(napi_budget, MLX5E_TX_CQ_POLL_BUDGET);
+       u8 metadata_buff[MLX5E_TX_CQ_POLL_BUDGET];
+       u8 metadata_buff_sz = 0;
+       struct mlx5_cqwq *cqwq;
        struct mlx5_cqe64 *cqe;
        int work_done = 0;
 
+       cqwq = &cq->wq;
+
        if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &ptpsq->txqsq.state)))
                return false;
 
@@ -234,7 +241,8 @@ static bool mlx5e_ptp_poll_ts_cq(struct mlx5e_cq *cq, int budget)
        do {
                mlx5_cqwq_pop(cqwq);
 
-               mlx5e_ptp_handle_ts_cqe(ptpsq, cqe, budget);
+               mlx5e_ptp_handle_ts_cqe(ptpsq, cqe,
+                                       metadata_buff, &metadata_buff_sz, napi_budget);
        } while ((++work_done < budget) && (cqe = mlx5_cqwq_get_cqe(cqwq)));
 
        mlx5_cqwq_update_db_record(cqwq);
@@ -242,6 +250,10 @@ static bool mlx5e_ptp_poll_ts_cq(struct mlx5e_cq *cq, int budget)
        /* ensure cq space is freed before enabling more cqes */
        wmb();
 
+       while (metadata_buff_sz > 0)
+               mlx5e_ptp_metadata_fifo_push(&ptpsq->metadata_freelist,
+                                            metadata_buff[--metadata_buff_sz]);
+
        mlx5e_txqsq_wake(&ptpsq->txqsq);
 
        return work_done == budget;
index fea8c0a..4358798 100644 (file)
@@ -492,11 +492,11 @@ static int mlx5e_rx_reporter_dump(struct devlink_health_reporter *reporter,
 
 void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq)
 {
-       char icosq_str[MLX5E_REPORTER_PER_Q_MAX_LEN] = {};
        char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN];
        struct mlx5e_icosq *icosq = rq->icosq;
        struct mlx5e_priv *priv = rq->priv;
        struct mlx5e_err_ctx err_ctx = {};
+       char icosq_str[32] = {};
 
        err_ctx.ctx = rq;
        err_ctx.recover = mlx5e_rx_reporter_timeout_recover;
@@ -505,7 +505,7 @@ void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq)
        if (icosq)
                snprintf(icosq_str, sizeof(icosq_str), "ICOSQ: 0x%x, ", icosq->sqn);
        snprintf(err_str, sizeof(err_str),
-                "RX timeout on channel: %d, %sRQ: 0x%x, CQ: 0x%x",
+                "RX timeout on channel: %d, %s RQ: 0x%x, CQ: 0x%x",
                 rq->ix, icosq_str, rq->rqn, rq->cq.mcq.cqn);
 
        mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx);
index 00a04fd..668da5c 100644 (file)
@@ -300,9 +300,6 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv,
        if (err)
                goto destroy_neigh_entry;
 
-       e->encap_size = ipv4_encap_size;
-       e->encap_header = encap_header;
-
        if (!(nud_state & NUD_VALID)) {
                neigh_event_send(attr.n, NULL);
                /* the encap entry will be made valid on neigh update event
@@ -322,6 +319,8 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv,
                goto destroy_neigh_entry;
        }
 
+       e->encap_size = ipv4_encap_size;
+       e->encap_header = encap_header;
        e->flags |= MLX5_ENCAP_ENTRY_VALID;
        mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev));
        mlx5e_route_lookup_ipv4_put(&attr);
@@ -404,16 +403,12 @@ int mlx5e_tc_tun_update_header_ipv4(struct mlx5e_priv *priv,
        if (err)
                goto free_encap;
 
-       e->encap_size = ipv4_encap_size;
-       kfree(e->encap_header);
-       e->encap_header = encap_header;
-
        if (!(nud_state & NUD_VALID)) {
                neigh_event_send(attr.n, NULL);
                /* the encap entry will be made valid on neigh update event
                 * and not used before that.
                 */
-               goto release_neigh;
+               goto free_encap;
        }
 
        memset(&reformat_params, 0, sizeof(reformat_params));
@@ -427,6 +422,10 @@ int mlx5e_tc_tun_update_header_ipv4(struct mlx5e_priv *priv,
                goto free_encap;
        }
 
+       e->encap_size = ipv4_encap_size;
+       kfree(e->encap_header);
+       e->encap_header = encap_header;
+
        e->flags |= MLX5_ENCAP_ENTRY_VALID;
        mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev));
        mlx5e_route_lookup_ipv4_put(&attr);
@@ -568,9 +567,6 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
        if (err)
                goto destroy_neigh_entry;
 
-       e->encap_size = ipv6_encap_size;
-       e->encap_header = encap_header;
-
        if (!(nud_state & NUD_VALID)) {
                neigh_event_send(attr.n, NULL);
                /* the encap entry will be made valid on neigh update event
@@ -590,6 +586,8 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
                goto destroy_neigh_entry;
        }
 
+       e->encap_size = ipv6_encap_size;
+       e->encap_header = encap_header;
        e->flags |= MLX5_ENCAP_ENTRY_VALID;
        mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev));
        mlx5e_route_lookup_ipv6_put(&attr);
@@ -671,16 +669,12 @@ int mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv *priv,
        if (err)
                goto free_encap;
 
-       e->encap_size = ipv6_encap_size;
-       kfree(e->encap_header);
-       e->encap_header = encap_header;
-
        if (!(nud_state & NUD_VALID)) {
                neigh_event_send(attr.n, NULL);
                /* the encap entry will be made valid on neigh update event
                 * and not used before that.
                 */
-               goto release_neigh;
+               goto free_encap;
        }
 
        memset(&reformat_params, 0, sizeof(reformat_params));
@@ -694,6 +688,10 @@ int mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv *priv,
                goto free_encap;
        }
 
+       e->encap_size = ipv6_encap_size;
+       kfree(e->encap_header);
+       e->encap_header = encap_header;
+
        e->flags |= MLX5_ENCAP_ENTRY_VALID;
        mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev));
        mlx5e_route_lookup_ipv6_put(&attr);
index 215261a..792a0ea 100644 (file)
@@ -43,12 +43,17 @@ void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv,
                               struct ethtool_drvinfo *drvinfo)
 {
        struct mlx5_core_dev *mdev = priv->mdev;
+       int count;
 
        strscpy(drvinfo->driver, KBUILD_MODNAME, sizeof(drvinfo->driver));
-       snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
-                "%d.%d.%04d (%.16s)",
-                fw_rev_maj(mdev), fw_rev_min(mdev), fw_rev_sub(mdev),
-                mdev->board_id);
+       count = snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
+                        "%d.%d.%04d (%.16s)", fw_rev_maj(mdev),
+                        fw_rev_min(mdev), fw_rev_sub(mdev), mdev->board_id);
+       if (count == sizeof(drvinfo->fw_version))
+               snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
+                        "%d.%d.%04d", fw_rev_maj(mdev),
+                        fw_rev_min(mdev), fw_rev_sub(mdev));
+
        strscpy(drvinfo->bus_info, dev_name(mdev->device),
                sizeof(drvinfo->bus_info));
 }
index 693e55b..3ab682b 100644 (file)
@@ -71,13 +71,17 @@ static void mlx5e_rep_get_drvinfo(struct net_device *dev,
 {
        struct mlx5e_priv *priv = netdev_priv(dev);
        struct mlx5_core_dev *mdev = priv->mdev;
+       int count;
 
        strscpy(drvinfo->driver, mlx5e_rep_driver_name,
                sizeof(drvinfo->driver));
-       snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
-                "%d.%d.%04d (%.16s)",
-                fw_rev_maj(mdev), fw_rev_min(mdev),
-                fw_rev_sub(mdev), mdev->board_id);
+       count = snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
+                        "%d.%d.%04d (%.16s)", fw_rev_maj(mdev),
+                        fw_rev_min(mdev), fw_rev_sub(mdev), mdev->board_id);
+       if (count == sizeof(drvinfo->fw_version))
+               snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
+                        "%d.%d.%04d", fw_rev_maj(mdev),
+                        fw_rev_min(mdev), fw_rev_sub(mdev));
 }
 
 static const struct counter_desc sw_rep_stats_desc[] = {
index 9a5a5c2..7ca9e5b 100644 (file)
@@ -3147,7 +3147,7 @@ static struct mlx5_fields fields[] = {
        OFFLOAD(DIPV6_31_0,   32, U32_MAX, ip6.daddr.s6_addr32[3], 0,
                dst_ipv4_dst_ipv6.ipv6_layout.ipv6[12]),
        OFFLOAD(IPV6_HOPLIMIT, 8,  U8_MAX, ip6.hop_limit, 0, ttl_hoplimit),
-       OFFLOAD(IP_DSCP, 16,  0xc00f, ip6, 0, ip_dscp),
+       OFFLOAD(IP_DSCP, 16,  0x0fc0, ip6, 0, ip_dscp),
 
        OFFLOAD(TCP_SPORT, 16, U16_MAX, tcp.source,  0, tcp_sport),
        OFFLOAD(TCP_DPORT, 16, U16_MAX, tcp.dest,    0, tcp_dport),
@@ -3158,21 +3158,31 @@ static struct mlx5_fields fields[] = {
        OFFLOAD(UDP_DPORT, 16, U16_MAX, udp.dest,   0, udp_dport),
 };
 
-static unsigned long mask_to_le(unsigned long mask, int size)
+static u32 mask_field_get(void *mask, struct mlx5_fields *f)
 {
-       __be32 mask_be32;
-       __be16 mask_be16;
-
-       if (size == 32) {
-               mask_be32 = (__force __be32)(mask);
-               mask = (__force unsigned long)cpu_to_le32(be32_to_cpu(mask_be32));
-       } else if (size == 16) {
-               mask_be32 = (__force __be32)(mask);
-               mask_be16 = *(__be16 *)&mask_be32;
-               mask = (__force unsigned long)cpu_to_le16(be16_to_cpu(mask_be16));
+       switch (f->field_bsize) {
+       case 32:
+               return be32_to_cpu(*(__be32 *)mask) & f->field_mask;
+       case 16:
+               return be16_to_cpu(*(__be16 *)mask) & (u16)f->field_mask;
+       default:
+               return *(u8 *)mask & (u8)f->field_mask;
        }
+}
 
-       return mask;
+static void mask_field_clear(void *mask, struct mlx5_fields *f)
+{
+       switch (f->field_bsize) {
+       case 32:
+               *(__be32 *)mask &= ~cpu_to_be32(f->field_mask);
+               break;
+       case 16:
+               *(__be16 *)mask &= ~cpu_to_be16((u16)f->field_mask);
+               break;
+       default:
+               *(u8 *)mask &= ~(u8)f->field_mask;
+               break;
+       }
 }
 
 static int offload_pedit_fields(struct mlx5e_priv *priv,
@@ -3184,11 +3194,12 @@ static int offload_pedit_fields(struct mlx5e_priv *priv,
        struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals;
        struct pedit_headers_action *hdrs = parse_attr->hdrs;
        void *headers_c, *headers_v, *action, *vals_p;
-       u32 *s_masks_p, *a_masks_p, s_mask, a_mask;
        struct mlx5e_tc_mod_hdr_acts *mod_acts;
-       unsigned long mask, field_mask;
+       void *s_masks_p, *a_masks_p;
        int i, first, last, next_z;
        struct mlx5_fields *f;
+       unsigned long mask;
+       u32 s_mask, a_mask;
        u8 cmd;
 
        mod_acts = &parse_attr->mod_hdr_acts;
@@ -3204,15 +3215,11 @@ static int offload_pedit_fields(struct mlx5e_priv *priv,
                bool skip;
 
                f = &fields[i];
-               /* avoid seeing bits set from previous iterations */
-               s_mask = 0;
-               a_mask = 0;
-
                s_masks_p = (void *)set_masks + f->offset;
                a_masks_p = (void *)add_masks + f->offset;
 
-               s_mask = *s_masks_p & f->field_mask;
-               a_mask = *a_masks_p & f->field_mask;
+               s_mask = mask_field_get(s_masks_p, f);
+               a_mask = mask_field_get(a_masks_p, f);
 
                if (!s_mask && !a_mask) /* nothing to offload here */
                        continue;
@@ -3239,22 +3246,20 @@ static int offload_pedit_fields(struct mlx5e_priv *priv,
                                         match_mask, f->field_bsize))
                                skip = true;
                        /* clear to denote we consumed this field */
-                       *s_masks_p &= ~f->field_mask;
+                       mask_field_clear(s_masks_p, f);
                } else {
                        cmd  = MLX5_ACTION_TYPE_ADD;
                        mask = a_mask;
                        vals_p = (void *)add_vals + f->offset;
                        /* add 0 is no change */
-                       if ((*(u32 *)vals_p & f->field_mask) == 0)
+                       if (!mask_field_get(vals_p, f))
                                skip = true;
                        /* clear to denote we consumed this field */
-                       *a_masks_p &= ~f->field_mask;
+                       mask_field_clear(a_masks_p, f);
                }
                if (skip)
                        continue;
 
-               mask = mask_to_le(mask, f->field_bsize);
-
                first = find_first_bit(&mask, f->field_bsize);
                next_z = find_next_zero_bit(&mask, f->field_bsize, first);
                last  = find_last_bit(&mask, f->field_bsize);
@@ -3281,10 +3286,9 @@ static int offload_pedit_fields(struct mlx5e_priv *priv,
                MLX5_SET(set_action_in, action, field, f->field);
 
                if (cmd == MLX5_ACTION_TYPE_SET) {
+                       unsigned long field_mask = f->field_mask;
                        int start;
 
-                       field_mask = mask_to_le(f->field_mask, f->field_bsize);
-
                        /* if field is bit sized it can start not from first bit */
                        start = find_first_bit(&field_mask, f->field_bsize);
 
index d41435c..f0b506e 100644 (file)
@@ -399,9 +399,9 @@ mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb,
                u8 metadata_index = be32_to_cpu(eseg->flow_table_metadata);
 
                mlx5e_skb_cb_hwtstamp_init(skb);
-               mlx5e_ptpsq_track_metadata(sq->ptpsq, metadata_index);
                mlx5e_ptp_metadata_map_put(&sq->ptpsq->metadata_map, skb,
                                           metadata_index);
+               mlx5e_ptpsq_track_metadata(sq->ptpsq, metadata_index);
                if (!netif_tx_queue_stopped(sq->txq) &&
                    mlx5e_ptpsq_metadata_freelist_empty(sq->ptpsq)) {
                        netif_tx_stop_queue(sq->txq);
@@ -494,10 +494,10 @@ mlx5e_sq_xmit_wqe(struct mlx5e_txqsq *sq, struct sk_buff *skb,
 
 err_drop:
        stats->dropped++;
-       dev_kfree_skb_any(skb);
        if (unlikely(sq->ptpsq && (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)))
                mlx5e_ptp_metadata_fifo_push(&sq->ptpsq->metadata_freelist,
                                             be32_to_cpu(eseg->flow_table_metadata));
+       dev_kfree_skb_any(skb);
        mlx5e_tx_flush(sq);
 }
 
index ea0405e..40a6cb0 100644 (file)
@@ -885,11 +885,14 @@ static void comp_irq_release_sf(struct mlx5_core_dev *dev, u16 vecidx)
 {
        struct mlx5_eq_table *table = dev->priv.eq_table;
        struct mlx5_irq *irq;
+       int cpu;
 
        irq = xa_load(&table->comp_irqs, vecidx);
        if (!irq)
                return;
 
+       cpu = cpumask_first(mlx5_irq_get_affinity_mask(irq));
+       cpumask_clear_cpu(cpu, &table->used_cpus);
        xa_erase(&table->comp_irqs, vecidx);
        mlx5_irq_affinity_irq_release(dev, irq);
 }
@@ -897,16 +900,26 @@ static void comp_irq_release_sf(struct mlx5_core_dev *dev, u16 vecidx)
 static int comp_irq_request_sf(struct mlx5_core_dev *dev, u16 vecidx)
 {
        struct mlx5_eq_table *table = dev->priv.eq_table;
+       struct mlx5_irq_pool *pool = mlx5_irq_pool_get(dev);
+       struct irq_affinity_desc af_desc = {};
        struct mlx5_irq *irq;
 
-       irq = mlx5_irq_affinity_irq_request_auto(dev, &table->used_cpus, vecidx);
-       if (IS_ERR(irq)) {
-               /* In case SF irq pool does not exist, fallback to the PF irqs*/
-               if (PTR_ERR(irq) == -ENOENT)
-                       return comp_irq_request_pci(dev, vecidx);
+       /* In case SF irq pool does not exist, fallback to the PF irqs*/
+       if (!mlx5_irq_pool_is_sf_pool(pool))
+               return comp_irq_request_pci(dev, vecidx);
 
+       af_desc.is_managed = 1;
+       cpumask_copy(&af_desc.mask, cpu_online_mask);
+       cpumask_andnot(&af_desc.mask, &af_desc.mask, &table->used_cpus);
+       irq = mlx5_irq_affinity_request(pool, &af_desc);
+       if (IS_ERR(irq))
                return PTR_ERR(irq);
-       }
+
+       cpumask_or(&table->used_cpus, &table->used_cpus, mlx5_irq_get_affinity_mask(irq));
+       mlx5_core_dbg(pool->dev, "IRQ %u mapped to cpu %*pbl, %u EQs on this irq\n",
+                     pci_irq_vector(dev->pdev, mlx5_irq_get_index(irq)),
+                     cpumask_pr_args(mlx5_irq_get_affinity_mask(irq)),
+                     mlx5_irq_read_locked(irq) / MLX5_EQ_REFS_PER_IRQ);
 
        return xa_err(xa_store(&table->comp_irqs, vecidx, irq, GFP_KERNEL));
 }
index b296ac5..88236e7 100644 (file)
@@ -984,7 +984,8 @@ mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *on_esw,
        dest.vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
        flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
 
-       if (rep->vport == MLX5_VPORT_UPLINK && on_esw->offloads.ft_ipsec_tx_pol) {
+       if (rep->vport == MLX5_VPORT_UPLINK &&
+           on_esw == from_esw && on_esw->offloads.ft_ipsec_tx_pol) {
                dest.ft = on_esw->offloads.ft_ipsec_tx_pol;
                flow_act.flags = FLOW_ACT_IGNORE_FLOW_LEVEL;
                dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
index 047d5fe..612e666 100644 (file)
@@ -168,45 +168,3 @@ void mlx5_irq_affinity_irq_release(struct mlx5_core_dev *dev, struct mlx5_irq *i
                if (pool->irqs_per_cpu)
                        cpu_put(pool, cpu);
 }
-
-/**
- * mlx5_irq_affinity_irq_request_auto - request one IRQ for mlx5 device.
- * @dev: mlx5 device that is requesting the IRQ.
- * @used_cpus: cpumask of bounded cpus by the device
- * @vecidx: vector index to request an IRQ for.
- *
- * Each IRQ is bounded to at most 1 CPU.
- * This function is requesting an IRQ according to the default assignment.
- * The default assignment policy is:
- * - request the least loaded IRQ which is not bound to any
- *   CPU of the previous IRQs requested.
- *
- * On success, this function updates used_cpus mask and returns an irq pointer.
- * In case of an error, an appropriate error pointer is returned.
- */
-struct mlx5_irq *mlx5_irq_affinity_irq_request_auto(struct mlx5_core_dev *dev,
-                                                   struct cpumask *used_cpus, u16 vecidx)
-{
-       struct mlx5_irq_pool *pool = mlx5_irq_pool_get(dev);
-       struct irq_affinity_desc af_desc = {};
-       struct mlx5_irq *irq;
-
-       if (!mlx5_irq_pool_is_sf_pool(pool))
-               return ERR_PTR(-ENOENT);
-
-       af_desc.is_managed = 1;
-       cpumask_copy(&af_desc.mask, cpu_online_mask);
-       cpumask_andnot(&af_desc.mask, &af_desc.mask, used_cpus);
-       irq = mlx5_irq_affinity_request(pool, &af_desc);
-
-       if (IS_ERR(irq))
-               return irq;
-
-       cpumask_or(used_cpus, used_cpus, mlx5_irq_get_affinity_mask(irq));
-       mlx5_core_dbg(pool->dev, "IRQ %u mapped to cpu %*pbl, %u EQs on this irq\n",
-                     pci_irq_vector(dev->pdev, mlx5_irq_get_index(irq)),
-                     cpumask_pr_args(mlx5_irq_get_affinity_mask(irq)),
-                     mlx5_irq_read_locked(irq) / MLX5_EQ_REFS_PER_IRQ);
-
-       return irq;
-}
index aa29f09..0c83ef1 100644 (file)
@@ -384,7 +384,12 @@ static int mlx5_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
 
 static int mlx5_ptp_adjphase(struct ptp_clock_info *ptp, s32 delta)
 {
-       return mlx5_ptp_adjtime(ptp, delta);
+       struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info);
+       struct mlx5_core_dev *mdev;
+
+       mdev = container_of(clock, struct mlx5_core_dev, clock);
+
+       return mlx5_ptp_adjtime_real_time(mdev, delta);
 }
 
 static int mlx5_ptp_freq_adj_real_time(struct mlx5_core_dev *mdev, long scaled_ppm)
index 6536482..4dcf995 100644 (file)
@@ -28,7 +28,7 @@
 struct mlx5_irq {
        struct atomic_notifier_head nh;
        cpumask_var_t mask;
-       char name[MLX5_MAX_IRQ_NAME];
+       char name[MLX5_MAX_IRQ_FORMATTED_NAME];
        struct mlx5_irq_pool *pool;
        int refcount;
        struct msi_map map;
@@ -292,8 +292,8 @@ struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i,
        else
                irq_sf_set_name(pool, name, i);
        ATOMIC_INIT_NOTIFIER_HEAD(&irq->nh);
-       snprintf(irq->name, MLX5_MAX_IRQ_NAME,
-                "%s@pci:%s", name, pci_name(dev->pdev));
+       snprintf(irq->name, MLX5_MAX_IRQ_FORMATTED_NAME,
+                MLX5_IRQ_NAME_FORMAT_STR, name, pci_name(dev->pdev));
        err = request_irq(irq->map.virq, irq_int_handler, 0, irq->name,
                          &irq->nh);
        if (err) {
index d3a77a0..c4d377f 100644 (file)
@@ -7,6 +7,9 @@
 #include <linux/mlx5/driver.h>
 
 #define MLX5_MAX_IRQ_NAME (32)
+#define MLX5_IRQ_NAME_FORMAT_STR ("%s@pci:%s")
+#define MLX5_MAX_IRQ_FORMATTED_NAME \
+       (MLX5_MAX_IRQ_NAME + sizeof(MLX5_IRQ_NAME_FORMAT_STR))
 /* max irq_index is 2047, so four chars */
 #define MLX5_MAX_IRQ_IDX_CHARS (4)
 #define MLX5_EQ_REFS_PER_IRQ (2)
index 6ea88a5..e3ec559 100644 (file)
@@ -57,7 +57,8 @@ static const char *dr_action_id_to_str(enum mlx5dr_action_type action_id)
 
 static bool mlx5dr_action_supp_fwd_fdb_multi_ft(struct mlx5_core_dev *dev)
 {
-       return (MLX5_CAP_ESW_FLOWTABLE(dev, fdb_multi_path_any_table_limit_regc) ||
+       return (MLX5_CAP_GEN(dev, steering_format_version) < MLX5_STEERING_FORMAT_CONNECTX_6DX ||
+               MLX5_CAP_ESW_FLOWTABLE(dev, fdb_multi_path_any_table_limit_regc) ||
                MLX5_CAP_ESW_FLOWTABLE(dev, fdb_multi_path_any_table));
 }
 
index 4e8527a..6fa06ba 100644 (file)
@@ -52,7 +52,6 @@ struct dr_qp_init_attr {
        u32 cqn;
        u32 pdn;
        u32 max_send_wr;
-       u32 max_send_sge;
        struct mlx5_uars_page *uar;
        u8 isolate_vl_tc:1;
 };
@@ -247,37 +246,6 @@ static int dr_poll_cq(struct mlx5dr_cq *dr_cq, int ne)
        return err == CQ_POLL_ERR ? err : npolled;
 }
 
-static int dr_qp_get_args_update_send_wqe_size(struct dr_qp_init_attr *attr)
-{
-       return roundup_pow_of_two(sizeof(struct mlx5_wqe_ctrl_seg) +
-                                 sizeof(struct mlx5_wqe_flow_update_ctrl_seg) +
-                                 sizeof(struct mlx5_wqe_header_modify_argument_update_seg));
-}
-
-/* We calculate for specific RC QP with the required functionality */
-static int dr_qp_calc_rc_send_wqe(struct dr_qp_init_attr *attr)
-{
-       int update_arg_size;
-       int inl_size = 0;
-       int tot_size;
-       int size;
-
-       update_arg_size = dr_qp_get_args_update_send_wqe_size(attr);
-
-       size = sizeof(struct mlx5_wqe_ctrl_seg) +
-              sizeof(struct mlx5_wqe_raddr_seg);
-       inl_size = size + ALIGN(sizeof(struct mlx5_wqe_inline_seg) +
-                               DR_STE_SIZE, 16);
-
-       size += attr->max_send_sge * sizeof(struct mlx5_wqe_data_seg);
-
-       size = max(size, update_arg_size);
-
-       tot_size = max(size, inl_size);
-
-       return ALIGN(tot_size, MLX5_SEND_WQE_BB);
-}
-
 static struct mlx5dr_qp *dr_create_rc_qp(struct mlx5_core_dev *mdev,
                                         struct dr_qp_init_attr *attr)
 {
@@ -285,7 +253,6 @@ static struct mlx5dr_qp *dr_create_rc_qp(struct mlx5_core_dev *mdev,
        u32 temp_qpc[MLX5_ST_SZ_DW(qpc)] = {};
        struct mlx5_wq_param wqp;
        struct mlx5dr_qp *dr_qp;
-       int wqe_size;
        int inlen;
        void *qpc;
        void *in;
@@ -365,15 +332,6 @@ static struct mlx5dr_qp *dr_create_rc_qp(struct mlx5_core_dev *mdev,
        if (err)
                goto err_in;
        dr_qp->uar = attr->uar;
-       wqe_size = dr_qp_calc_rc_send_wqe(attr);
-       dr_qp->max_inline_data = min(wqe_size -
-                                    (sizeof(struct mlx5_wqe_ctrl_seg) +
-                                     sizeof(struct mlx5_wqe_raddr_seg) +
-                                     sizeof(struct mlx5_wqe_inline_seg)),
-                                    (2 * MLX5_SEND_WQE_BB -
-                                     (sizeof(struct mlx5_wqe_ctrl_seg) +
-                                      sizeof(struct mlx5_wqe_raddr_seg) +
-                                      sizeof(struct mlx5_wqe_inline_seg))));
 
        return dr_qp;
 
@@ -437,48 +395,8 @@ dr_rdma_handle_flow_access_arg_segments(struct mlx5_wqe_ctrl_seg *wq_ctrl,
                MLX5_SEND_WQE_DS;
 }
 
-static int dr_set_data_inl_seg(struct mlx5dr_qp *dr_qp,
-                              struct dr_data_seg *data_seg, void *wqe)
-{
-       int inline_header_size = sizeof(struct mlx5_wqe_ctrl_seg) +
-                               sizeof(struct mlx5_wqe_raddr_seg) +
-                               sizeof(struct mlx5_wqe_inline_seg);
-       struct mlx5_wqe_inline_seg *seg;
-       int left_space;
-       int inl = 0;
-       void *addr;
-       int len;
-       int idx;
-
-       seg = wqe;
-       wqe += sizeof(*seg);
-       addr = (void *)(unsigned long)(data_seg->addr);
-       len  = data_seg->length;
-       inl += len;
-       left_space = MLX5_SEND_WQE_BB - inline_header_size;
-
-       if (likely(len > left_space)) {
-               memcpy(wqe, addr, left_space);
-               len -= left_space;
-               addr += left_space;
-               idx = (dr_qp->sq.pc + 1) & (dr_qp->sq.wqe_cnt - 1);
-               wqe = mlx5_wq_cyc_get_wqe(&dr_qp->wq.sq, idx);
-       }
-
-       memcpy(wqe, addr, len);
-
-       if (likely(inl)) {
-               seg->byte_count = cpu_to_be32(inl | MLX5_INLINE_SEG);
-               return DIV_ROUND_UP(inl + sizeof(seg->byte_count),
-                                   MLX5_SEND_WQE_DS);
-       } else {
-               return 0;
-       }
-}
-
 static void
-dr_rdma_handle_icm_write_segments(struct mlx5dr_qp *dr_qp,
-                                 struct mlx5_wqe_ctrl_seg *wq_ctrl,
+dr_rdma_handle_icm_write_segments(struct mlx5_wqe_ctrl_seg *wq_ctrl,
                                  u64 remote_addr,
                                  u32 rkey,
                                  struct dr_data_seg *data_seg,
@@ -494,17 +412,15 @@ dr_rdma_handle_icm_write_segments(struct mlx5dr_qp *dr_qp,
        wq_raddr->reserved = 0;
 
        wq_dseg = (void *)(wq_raddr + 1);
-       /* WQE ctrl segment + WQE remote addr segment */
-       *size = (sizeof(*wq_ctrl) + sizeof(*wq_raddr)) / MLX5_SEND_WQE_DS;
 
-       if (data_seg->send_flags & IB_SEND_INLINE) {
-               *size += dr_set_data_inl_seg(dr_qp, data_seg, wq_dseg);
-       } else {
-               wq_dseg->byte_count = cpu_to_be32(data_seg->length);
-               wq_dseg->lkey = cpu_to_be32(data_seg->lkey);
-               wq_dseg->addr = cpu_to_be64(data_seg->addr);
-               *size += sizeof(*wq_dseg) / MLX5_SEND_WQE_DS;  /* WQE data segment */
-       }
+       wq_dseg->byte_count = cpu_to_be32(data_seg->length);
+       wq_dseg->lkey = cpu_to_be32(data_seg->lkey);
+       wq_dseg->addr = cpu_to_be64(data_seg->addr);
+
+       *size = (sizeof(*wq_ctrl) +    /* WQE ctrl segment */
+                sizeof(*wq_dseg) +    /* WQE data segment */
+                sizeof(*wq_raddr)) /  /* WQE remote addr segment */
+               MLX5_SEND_WQE_DS;
 }
 
 static void dr_set_ctrl_seg(struct mlx5_wqe_ctrl_seg *wq_ctrl,
@@ -535,7 +451,7 @@ static void dr_rdma_segments(struct mlx5dr_qp *dr_qp, u64 remote_addr,
        switch (opcode) {
        case MLX5_OPCODE_RDMA_READ:
        case MLX5_OPCODE_RDMA_WRITE:
-               dr_rdma_handle_icm_write_segments(dr_qp, wq_ctrl, remote_addr,
+               dr_rdma_handle_icm_write_segments(wq_ctrl, remote_addr,
                                                  rkey, data_seg, &size);
                break;
        case MLX5_OPCODE_FLOW_TBL_ACCESS:
@@ -656,7 +572,7 @@ static void dr_fill_write_args_segs(struct mlx5dr_send_ring *send_ring,
        if (send_ring->pending_wqe % send_ring->signal_th == 0)
                send_info->write.send_flags |= IB_SEND_SIGNALED;
        else
-               send_info->write.send_flags &= ~IB_SEND_SIGNALED;
+               send_info->write.send_flags = 0;
 }
 
 static void dr_fill_write_icm_segs(struct mlx5dr_domain *dmn,
@@ -680,13 +596,9 @@ static void dr_fill_write_icm_segs(struct mlx5dr_domain *dmn,
        }
 
        send_ring->pending_wqe++;
-       if (!send_info->write.lkey)
-               send_info->write.send_flags |= IB_SEND_INLINE;
 
        if (send_ring->pending_wqe % send_ring->signal_th == 0)
                send_info->write.send_flags |= IB_SEND_SIGNALED;
-       else
-               send_info->write.send_flags &= ~IB_SEND_SIGNALED;
 
        send_ring->pending_wqe++;
        send_info->read.length = send_info->write.length;
@@ -696,9 +608,9 @@ static void dr_fill_write_icm_segs(struct mlx5dr_domain *dmn,
        send_info->read.lkey = send_ring->sync_mr->mkey;
 
        if (send_ring->pending_wqe % send_ring->signal_th == 0)
-               send_info->read.send_flags |= IB_SEND_SIGNALED;
+               send_info->read.send_flags = IB_SEND_SIGNALED;
        else
-               send_info->read.send_flags &= ~IB_SEND_SIGNALED;
+               send_info->read.send_flags = 0;
 }
 
 static void dr_fill_data_segs(struct mlx5dr_domain *dmn,
@@ -1345,7 +1257,6 @@ int mlx5dr_send_ring_alloc(struct mlx5dr_domain *dmn)
        dmn->send_ring->cq->qp = dmn->send_ring->qp;
 
        dmn->info.max_send_wr = QUEUE_SIZE;
-       init_attr.max_send_sge = 1;
        dmn->info.max_inline_size = min(dmn->send_ring->qp->max_inline_data,
                                        DR_STE_SIZE);
 
index 0c76c16..295366a 100644 (file)
@@ -624,6 +624,7 @@ struct rtl8169_private {
 
        unsigned supports_gmii:1;
        unsigned aspm_manageable:1;
+       unsigned dash_enabled:1;
        dma_addr_t counters_phys_addr;
        struct rtl8169_counters *counters;
        struct rtl8169_tc_offsets tc_offset;
@@ -1253,14 +1254,26 @@ static bool r8168ep_check_dash(struct rtl8169_private *tp)
        return r8168ep_ocp_read(tp, 0x128) & BIT(0);
 }
 
-static enum rtl_dash_type rtl_check_dash(struct rtl8169_private *tp)
+static bool rtl_dash_is_enabled(struct rtl8169_private *tp)
+{
+       switch (tp->dash_type) {
+       case RTL_DASH_DP:
+               return r8168dp_check_dash(tp);
+       case RTL_DASH_EP:
+               return r8168ep_check_dash(tp);
+       default:
+               return false;
+       }
+}
+
+static enum rtl_dash_type rtl_get_dash_type(struct rtl8169_private *tp)
 {
        switch (tp->mac_version) {
        case RTL_GIGA_MAC_VER_28:
        case RTL_GIGA_MAC_VER_31:
-               return r8168dp_check_dash(tp) ? RTL_DASH_DP : RTL_DASH_NONE;
+               return RTL_DASH_DP;
        case RTL_GIGA_MAC_VER_51 ... RTL_GIGA_MAC_VER_53:
-               return r8168ep_check_dash(tp) ? RTL_DASH_EP : RTL_DASH_NONE;
+               return RTL_DASH_EP;
        default:
                return RTL_DASH_NONE;
        }
@@ -1453,7 +1466,7 @@ static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts)
 
        device_set_wakeup_enable(tp_to_dev(tp), wolopts);
 
-       if (tp->dash_type == RTL_DASH_NONE) {
+       if (!tp->dash_enabled) {
                rtl_set_d3_pll_down(tp, !wolopts);
                tp->dev->wol_enabled = wolopts ? 1 : 0;
        }
@@ -2512,7 +2525,7 @@ static void rtl_wol_enable_rx(struct rtl8169_private *tp)
 
 static void rtl_prepare_power_down(struct rtl8169_private *tp)
 {
-       if (tp->dash_type != RTL_DASH_NONE)
+       if (tp->dash_enabled)
                return;
 
        if (tp->mac_version == RTL_GIGA_MAC_VER_32 ||
@@ -2586,9 +2599,7 @@ static void rtl_set_rx_mode(struct net_device *dev)
                rx_mode &= ~AcceptMulticast;
        } else if (netdev_mc_count(dev) > MC_FILTER_LIMIT ||
                   dev->flags & IFF_ALLMULTI ||
-                  tp->mac_version == RTL_GIGA_MAC_VER_35 ||
-                  tp->mac_version == RTL_GIGA_MAC_VER_46 ||
-                  tp->mac_version == RTL_GIGA_MAC_VER_48) {
+                  tp->mac_version == RTL_GIGA_MAC_VER_35) {
                /* accept all multicasts */
        } else if (netdev_mc_empty(dev)) {
                rx_mode &= ~AcceptMulticast;
@@ -4648,10 +4659,16 @@ static void rtl8169_down(struct rtl8169_private *tp)
        rtl8169_cleanup(tp);
        rtl_disable_exit_l1(tp);
        rtl_prepare_power_down(tp);
+
+       if (tp->dash_type != RTL_DASH_NONE)
+               rtl8168_driver_stop(tp);
 }
 
 static void rtl8169_up(struct rtl8169_private *tp)
 {
+       if (tp->dash_type != RTL_DASH_NONE)
+               rtl8168_driver_start(tp);
+
        pci_set_master(tp->pci_dev);
        phy_init_hw(tp->phydev);
        phy_resume(tp->phydev);
@@ -4869,7 +4886,7 @@ static int rtl8169_runtime_idle(struct device *device)
 {
        struct rtl8169_private *tp = dev_get_drvdata(device);
 
-       if (tp->dash_type != RTL_DASH_NONE)
+       if (tp->dash_enabled)
                return -EBUSY;
 
        if (!netif_running(tp->dev) || !netif_carrier_ok(tp->dev))
@@ -4895,8 +4912,7 @@ static void rtl_shutdown(struct pci_dev *pdev)
        /* Restore original MAC address */
        rtl_rar_set(tp, tp->dev->perm_addr);
 
-       if (system_state == SYSTEM_POWER_OFF &&
-           tp->dash_type == RTL_DASH_NONE) {
+       if (system_state == SYSTEM_POWER_OFF && !tp->dash_enabled) {
                pci_wake_from_d3(pdev, tp->saved_wolopts);
                pci_set_power_state(pdev, PCI_D3hot);
        }
@@ -5254,7 +5270,8 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
                rc = pci_disable_link_state(pdev, PCIE_LINK_STATE_L1);
        tp->aspm_manageable = !rc;
 
-       tp->dash_type = rtl_check_dash(tp);
+       tp->dash_type = rtl_get_dash_type(tp);
+       tp->dash_enabled = rtl_dash_is_enabled(tp);
 
        tp->cp_cmd = RTL_R16(tp, CPlusCmd) & CPCMD_MASK;
 
@@ -5325,7 +5342,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
        /* configure chip for default features */
        rtl8169_set_features(dev, dev->features);
 
-       if (tp->dash_type == RTL_DASH_NONE) {
+       if (!tp->dash_enabled) {
                rtl_set_d3_pll_down(tp, true);
        } else {
                rtl_set_d3_pll_down(tp, false);
@@ -5365,7 +5382,8 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
                            "ok" : "ko");
 
        if (tp->dash_type != RTL_DASH_NONE) {
-               netdev_info(dev, "DASH enabled\n");
+               netdev_info(dev, "DASH %s\n",
+                           tp->dash_enabled ? "enabled" : "disabled");
                rtl8168_driver_start(tp);
        }
 
index a2b9e28..85dcda5 100644 (file)
@@ -280,7 +280,7 @@ config DWMAC_INTEL
 config DWMAC_LOONGSON
        tristate "Loongson PCI DWMAC support"
        default MACH_LOONGSON64
-       depends on STMMAC_ETH && PCI
+       depends on (MACH_LOONGSON64 || COMPILE_TEST) && STMMAC_ETH && PCI
        depends on COMMON_CLK
        help
          This selects the LOONGSON PCI bus support for the stmmac driver,
index 3e50fd5..2afb2bd 100644 (file)
@@ -5293,6 +5293,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
 
        dma_dir = page_pool_get_dma_dir(rx_q->page_pool);
        buf_sz = DIV_ROUND_UP(priv->dma_conf.dma_buf_sz, PAGE_SIZE) * PAGE_SIZE;
+       limit = min(priv->dma_conf.dma_rx_size - 1, (unsigned int)limit);
 
        if (netif_msg_rx_status(priv)) {
                void *rx_head;
@@ -5328,10 +5329,10 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
                        len = 0;
                }
 
+read_again:
                if (count >= limit)
                        break;
 
-read_again:
                buf1_len = 0;
                buf2_len = 0;
                entry = next_entry;
index 6c4b642..411898a 100644 (file)
@@ -2063,7 +2063,7 @@ static int prueth_probe(struct platform_device *pdev)
                                       &prueth->shram);
        if (ret) {
                dev_err(dev, "unable to get PRUSS SHRD RAM2: %d\n", ret);
-               pruss_put(prueth->pruss);
+               goto put_pruss;
        }
 
        prueth->sram_pool = of_gen_pool_get(np, "sram", 0);
@@ -2105,10 +2105,7 @@ static int prueth_probe(struct platform_device *pdev)
        prueth->iep1 = icss_iep_get_idx(np, 1);
        if (IS_ERR(prueth->iep1)) {
                ret = dev_err_probe(dev, PTR_ERR(prueth->iep1), "iep1 get failed\n");
-               icss_iep_put(prueth->iep0);
-               prueth->iep0 = NULL;
-               prueth->iep1 = NULL;
-               goto free_pool;
+               goto put_iep0;
        }
 
        if (prueth->pdata.quirk_10m_link_issue) {
@@ -2205,6 +2202,12 @@ netdev_exit:
 exit_iep:
        if (prueth->pdata.quirk_10m_link_issue)
                icss_iep_exit_fw(prueth->iep1);
+       icss_iep_put(prueth->iep1);
+
+put_iep0:
+       icss_iep_put(prueth->iep0);
+       prueth->iep0 = NULL;
+       prueth->iep1 = NULL;
 
 free_pool:
        gen_pool_free(prueth->sram_pool,
@@ -2212,6 +2215,8 @@ free_pool:
 
 put_mem:
        pruss_release_mem_region(prueth->pruss, &prueth->shram);
+
+put_pruss:
        pruss_put(prueth->pruss);
 
 put_cores:
index a3c5de9..533e912 100644 (file)
@@ -1769,10 +1769,12 @@ int wx_sw_init(struct wx *wx)
                wx->subsystem_device_id = pdev->subsystem_device;
        } else {
                err = wx_flash_read_dword(wx, 0xfffdc, &ssid);
-               if (!err)
-                       wx->subsystem_device_id = swab16((u16)ssid);
+               if (err < 0) {
+                       wx_err(wx, "read of internal subsystem device id failed\n");
+                       return err;
+               }
 
-               return err;
+               wx->subsystem_device_id = swab16((u16)ssid);
        }
 
        wx->mac_table = kcalloc(wx->mac.num_rar_entries,
index 3d43f80..8db8045 100644 (file)
@@ -121,10 +121,8 @@ static int ngbe_sw_init(struct wx *wx)
 
        /* PCI config space info */
        err = wx_sw_init(wx);
-       if (err < 0) {
-               wx_err(wx, "read of internal subsystem device id failed\n");
+       if (err < 0)
                return err;
-       }
 
        /* mac type, phy type , oem type */
        ngbe_init_type_code(wx);
index 70f0b5c..5262501 100644 (file)
@@ -364,10 +364,8 @@ static int txgbe_sw_init(struct wx *wx)
 
        /* PCI config space info */
        err = wx_sw_init(wx);
-       if (err < 0) {
-               wx_err(wx, "read of internal subsystem device id failed\n");
+       if (err < 0)
                return err;
-       }
 
        txgbe_init_type_code(wx);
 
index 82d0d44..bf6e339 100644 (file)
@@ -822,7 +822,7 @@ axienet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
                if (lp->features & XAE_FEATURE_FULL_TX_CSUM) {
                        /* Tx Full Checksum Offload Enabled */
                        cur_p->app0 |= 2;
-               } else if (lp->features & XAE_FEATURE_PARTIAL_RX_CSUM) {
+               } else if (lp->features & XAE_FEATURE_PARTIAL_TX_CSUM) {
                        csum_start_off = skb_transport_offset(skb);
                        csum_index_off = csum_start_off + skb->csum_offset;
                        /* Tx Partial Checksum Offload Enabled */
index 3ba3c8f..706ea52 100644 (file)
@@ -2206,9 +2206,6 @@ static int netvsc_vf_join(struct net_device *vf_netdev,
                goto upper_link_failed;
        }
 
-       /* set slave flag before open to prevent IPv6 addrconf */
-       vf_netdev->flags |= IFF_SLAVE;
-
        schedule_delayed_work(&ndev_ctx->vf_takeover, VF_TAKEOVER_INT);
 
        call_netdevice_notifiers(NETDEV_JOIN, vf_netdev);
@@ -2315,16 +2312,18 @@ static struct net_device *get_netvsc_byslot(const struct net_device *vf_netdev)
 
        }
 
-       /* Fallback path to check synthetic vf with
-        * help of mac addr
+       /* Fallback path to check synthetic vf with help of mac addr.
+        * Because this function can be called before vf_netdev is
+        * initialized (NETDEV_POST_INIT) when its perm_addr has not been copied
+        * from dev_addr, also try to match to its dev_addr.
+        * Note: On Hyper-V and Azure, it's not possible to set a MAC address
+        * on a VF that matches to the MAC of a unrelated NETVSC device.
         */
        list_for_each_entry(ndev_ctx, &netvsc_dev_list, list) {
                ndev = hv_get_drvdata(ndev_ctx->device_ctx);
-               if (ether_addr_equal(vf_netdev->perm_addr, ndev->perm_addr)) {
-                       netdev_notice(vf_netdev,
-                                     "falling back to mac addr based matching\n");
+               if (ether_addr_equal(vf_netdev->perm_addr, ndev->perm_addr) ||
+                   ether_addr_equal(vf_netdev->dev_addr, ndev->perm_addr))
                        return ndev;
-               }
        }
 
        netdev_notice(vf_netdev,
@@ -2332,6 +2331,19 @@ static struct net_device *get_netvsc_byslot(const struct net_device *vf_netdev)
        return NULL;
 }
 
+static int netvsc_prepare_bonding(struct net_device *vf_netdev)
+{
+       struct net_device *ndev;
+
+       ndev = get_netvsc_byslot(vf_netdev);
+       if (!ndev)
+               return NOTIFY_DONE;
+
+       /* set slave flag before open to prevent IPv6 addrconf */
+       vf_netdev->flags |= IFF_SLAVE;
+       return NOTIFY_DONE;
+}
+
 static int netvsc_register_vf(struct net_device *vf_netdev)
 {
        struct net_device_context *net_device_ctx;
@@ -2531,15 +2543,6 @@ static int netvsc_probe(struct hv_device *dev,
                goto devinfo_failed;
        }
 
-       nvdev = rndis_filter_device_add(dev, device_info);
-       if (IS_ERR(nvdev)) {
-               ret = PTR_ERR(nvdev);
-               netdev_err(net, "unable to add netvsc device (ret %d)\n", ret);
-               goto rndis_failed;
-       }
-
-       eth_hw_addr_set(net, device_info->mac_adr);
-
        /* We must get rtnl lock before scheduling nvdev->subchan_work,
         * otherwise netvsc_subchan_work() can get rtnl lock first and wait
         * all subchannels to show up, but that may not happen because
@@ -2547,9 +2550,23 @@ static int netvsc_probe(struct hv_device *dev,
         * -> ... -> device_add() -> ... -> __device_attach() can't get
         * the device lock, so all the subchannels can't be processed --
         * finally netvsc_subchan_work() hangs forever.
+        *
+        * The rtnl lock also needs to be held before rndis_filter_device_add()
+        * which advertises nvsp_2_vsc_capability / sriov bit, and triggers
+        * VF NIC offering and registering. If VF NIC finished register_netdev()
+        * earlier it may cause name based config failure.
         */
        rtnl_lock();
 
+       nvdev = rndis_filter_device_add(dev, device_info);
+       if (IS_ERR(nvdev)) {
+               ret = PTR_ERR(nvdev);
+               netdev_err(net, "unable to add netvsc device (ret %d)\n", ret);
+               goto rndis_failed;
+       }
+
+       eth_hw_addr_set(net, device_info->mac_adr);
+
        if (nvdev->num_chn > 1)
                schedule_work(&nvdev->subchan_work);
 
@@ -2586,9 +2603,9 @@ static int netvsc_probe(struct hv_device *dev,
        return 0;
 
 register_failed:
-       rtnl_unlock();
        rndis_filter_device_remove(dev, nvdev);
 rndis_failed:
+       rtnl_unlock();
        netvsc_devinfo_put(device_info);
 devinfo_failed:
        free_percpu(net_device_ctx->vf_stats);
@@ -2753,6 +2770,8 @@ static int netvsc_netdev_event(struct notifier_block *this,
                return NOTIFY_DONE;
 
        switch (event) {
+       case NETDEV_POST_INIT:
+               return netvsc_prepare_bonding(event_dev);
        case NETDEV_REGISTER:
                return netvsc_register_vf(event_dev);
        case NETDEV_UNREGISTER:
@@ -2788,12 +2807,17 @@ static int __init netvsc_drv_init(void)
        }
        netvsc_ring_bytes = ring_size * PAGE_SIZE;
 
+       register_netdevice_notifier(&netvsc_netdev_notifier);
+
        ret = vmbus_driver_register(&netvsc_drv);
        if (ret)
-               return ret;
+               goto err_vmbus_reg;
 
-       register_netdevice_notifier(&netvsc_netdev_notifier);
        return 0;
+
+err_vmbus_reg:
+       unregister_netdevice_notifier(&netvsc_netdev_notifier);
+       return ret;
 }
 
 MODULE_LICENSE("GPL");
index d7b81a3..145eb0b 100644 (file)
@@ -78,7 +78,7 @@ REG_STRIDE_FIELDS(EV_CH_E_CNTXT_0, ev_ch_e_cntxt_0,
                  0x0001c000 + 0x12000 * GSI_EE_AP, 0x80);
 
 static const u32 reg_ev_ch_e_cntxt_1_fmask[] = {
-       [R_LENGTH]                                      = GENMASK(19, 0),
+       [R_LENGTH]                                      = GENMASK(23, 0),
 };
 
 REG_STRIDE_FIELDS(EV_CH_E_CNTXT_1, ev_ch_e_cntxt_1,
index 21e9cac..2d5b021 100644 (file)
@@ -411,7 +411,7 @@ struct ipvl_addr *ipvlan_addr_lookup(struct ipvl_port *port, void *lyr3h,
        return addr;
 }
 
-static int ipvlan_process_v4_outbound(struct sk_buff *skb)
+static noinline_for_stack int ipvlan_process_v4_outbound(struct sk_buff *skb)
 {
        const struct iphdr *ip4h = ip_hdr(skb);
        struct net_device *dev = skb->dev;
@@ -453,13 +453,11 @@ out:
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
-static int ipvlan_process_v6_outbound(struct sk_buff *skb)
+
+static noinline_for_stack int
+ipvlan_route_v6_outbound(struct net_device *dev, struct sk_buff *skb)
 {
        const struct ipv6hdr *ip6h = ipv6_hdr(skb);
-       struct net_device *dev = skb->dev;
-       struct net *net = dev_net(dev);
-       struct dst_entry *dst;
-       int err, ret = NET_XMIT_DROP;
        struct flowi6 fl6 = {
                .flowi6_oif = dev->ifindex,
                .daddr = ip6h->daddr,
@@ -469,27 +467,38 @@ static int ipvlan_process_v6_outbound(struct sk_buff *skb)
                .flowi6_mark = skb->mark,
                .flowi6_proto = ip6h->nexthdr,
        };
+       struct dst_entry *dst;
+       int err;
 
-       dst = ip6_route_output(net, NULL, &fl6);
-       if (dst->error) {
-               ret = dst->error;
+       dst = ip6_route_output(dev_net(dev), NULL, &fl6);
+       err = dst->error;
+       if (err) {
                dst_release(dst);
-               goto err;
+               return err;
        }
        skb_dst_set(skb, dst);
+       return 0;
+}
+
+static int ipvlan_process_v6_outbound(struct sk_buff *skb)
+{
+       struct net_device *dev = skb->dev;
+       int err, ret = NET_XMIT_DROP;
+
+       err = ipvlan_route_v6_outbound(dev, skb);
+       if (unlikely(err)) {
+               DEV_STATS_INC(dev, tx_errors);
+               kfree_skb(skb);
+               return err;
+       }
 
        memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
 
-       err = ip6_local_out(net, skb->sk, skb);
+       err = ip6_local_out(dev_net(dev), skb->sk, skb);
        if (unlikely(net_xmit_eval(err)))
                DEV_STATS_INC(dev, tx_errors);
        else
                ret = NET_XMIT_SUCCESS;
-       goto out;
-err:
-       DEV_STATS_INC(dev, tx_errors);
-       kfree_skb(skb);
-out:
        return ret;
 }
 #else
index 02bd201..c8da94a 100644 (file)
@@ -780,7 +780,7 @@ static void macvlan_change_rx_flags(struct net_device *dev, int change)
        if (dev->flags & IFF_UP) {
                if (change & IFF_ALLMULTI)
                        dev_set_allmulti(lowerdev, dev->flags & IFF_ALLMULTI ? 1 : -1);
-               if (change & IFF_PROMISC)
+               if (!macvlan_passthru(vlan->port) && change & IFF_PROMISC)
                        dev_set_promiscuity(lowerdev,
                                            dev->flags & IFF_PROMISC ? 1 : -1);
 
index 5a0f86f..97bd670 100644 (file)
@@ -7,6 +7,7 @@
 #include <linux/filter.h>
 #include <linux/netfilter_netdev.h>
 #include <linux/bpf_mprog.h>
+#include <linux/indirect_call_wrapper.h>
 
 #include <net/netkit.h>
 #include <net/dst.h>
@@ -68,6 +69,7 @@ static netdev_tx_t netkit_xmit(struct sk_buff *skb, struct net_device *dev)
        netdev_tx_t ret_dev = NET_XMIT_SUCCESS;
        const struct bpf_mprog_entry *entry;
        struct net_device *peer;
+       int len = skb->len;
 
        rcu_read_lock();
        peer = rcu_dereference(nk->peer);
@@ -85,15 +87,22 @@ static netdev_tx_t netkit_xmit(struct sk_buff *skb, struct net_device *dev)
        case NETKIT_PASS:
                skb->protocol = eth_type_trans(skb, skb->dev);
                skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
-               __netif_rx(skb);
+               if (likely(__netif_rx(skb) == NET_RX_SUCCESS)) {
+                       dev_sw_netstats_tx_add(dev, 1, len);
+                       dev_sw_netstats_rx_add(peer, len);
+               } else {
+                       goto drop_stats;
+               }
                break;
        case NETKIT_REDIRECT:
+               dev_sw_netstats_tx_add(dev, 1, len);
                skb_do_redirect(skb);
                break;
        case NETKIT_DROP:
        default:
 drop:
                kfree_skb(skb);
+drop_stats:
                dev_core_stats_tx_dropped_inc(dev);
                ret_dev = NET_XMIT_DROP;
                break;
@@ -169,11 +178,18 @@ out:
        rcu_read_unlock();
 }
 
-static struct net_device *netkit_peer_dev(struct net_device *dev)
+INDIRECT_CALLABLE_SCOPE struct net_device *netkit_peer_dev(struct net_device *dev)
 {
        return rcu_dereference(netkit_priv(dev)->peer);
 }
 
+static void netkit_get_stats(struct net_device *dev,
+                            struct rtnl_link_stats64 *stats)
+{
+       dev_fetch_sw_netstats(stats, dev->tstats);
+       stats->tx_dropped = DEV_STATS_READ(dev, tx_dropped);
+}
+
 static void netkit_uninit(struct net_device *dev);
 
 static const struct net_device_ops netkit_netdev_ops = {
@@ -184,6 +200,7 @@ static const struct net_device_ops netkit_netdev_ops = {
        .ndo_set_rx_headroom    = netkit_set_headroom,
        .ndo_get_iflink         = netkit_get_iflink,
        .ndo_get_peer_dev       = netkit_peer_dev,
+       .ndo_get_stats64        = netkit_get_stats,
        .ndo_uninit             = netkit_uninit,
        .ndo_features_check     = passthru_features_check,
 };
@@ -218,6 +235,7 @@ static void netkit_setup(struct net_device *dev)
 
        ether_setup(dev);
        dev->max_mtu = ETH_MAX_MTU;
+       dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
 
        dev->flags |= IFF_NOARP;
        dev->priv_flags &= ~IFF_TX_SKB_SHARING;
index ebcdffd..52d05ce 100644 (file)
@@ -453,6 +453,10 @@ ppp_sync_ioctl(struct ppp_channel *chan, unsigned int cmd, unsigned long arg)
        case PPPIOCSMRU:
                if (get_user(val, (int __user *) argp))
                        break;
+               if (val > U16_MAX) {
+                       err = -EINVAL;
+                       break;
+               }
                if (val < PPP_MRU)
                        val = PPP_MRU;
                ap->mru = val;
@@ -687,7 +691,7 @@ ppp_sync_input(struct syncppp *ap, const u8 *buf, const u8 *flags, int count)
 
        /* strip address/control field if present */
        p = skb->data;
-       if (p[0] == PPP_ALLSTATIONS && p[1] == PPP_UI) {
+       if (skb->len >= 2 && p[0] == PPP_ALLSTATIONS && p[1] == PPP_UI) {
                /* chop off address/control */
                if (skb->len < 3)
                        goto err;
index a017e9d..7b8afa5 100644 (file)
@@ -1079,17 +1079,17 @@ static int aqc111_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
        u16 pkt_count = 0;
        u64 desc_hdr = 0;
        u16 vlan_tag = 0;
-       u32 skb_len = 0;
+       u32 skb_len;
 
        if (!skb)
                goto err;
 
-       if (skb->len == 0)
+       skb_len = skb->len;
+       if (skb_len < sizeof(desc_hdr))
                goto err;
 
-       skb_len = skb->len;
        /* RX Descriptor Header */
-       skb_trim(skb, skb->len - sizeof(desc_hdr));
+       skb_trim(skb, skb_len - sizeof(desc_hdr));
        desc_hdr = le64_to_cpup((u64 *)skb_tail_pointer(skb));
 
        /* Check these packets */
index aff39bf..4ea0e15 100644 (file)
@@ -1583,11 +1583,11 @@ static int ax88179_reset(struct usbnet *dev)
 
        *tmp16 = AX_PHYPWR_RSTCTL_IPRL;
        ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_PHYPWR_RSTCTL, 2, 2, tmp16);
-       msleep(200);
+       msleep(500);
 
        *tmp = AX_CLK_SELECT_ACS | AX_CLK_SELECT_BCS;
        ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_CLK_SELECT, 1, 1, tmp);
-       msleep(100);
+       msleep(200);
 
        /* Ethernet PHY Auto Detach*/
        ax88179_auto_detach(dev);
index 344af3c..e2e1813 100644 (file)
@@ -1289,6 +1289,7 @@ static const struct usb_device_id products[] = {
        {QMI_FIXED_INTF(0x19d2, 0x0168, 4)},
        {QMI_FIXED_INTF(0x19d2, 0x0176, 3)},
        {QMI_FIXED_INTF(0x19d2, 0x0178, 3)},
+       {QMI_FIXED_INTF(0x19d2, 0x0189, 4)},    /* ZTE MF290 */
        {QMI_FIXED_INTF(0x19d2, 0x0191, 4)},    /* ZTE EuFi890 */
        {QMI_FIXED_INTF(0x19d2, 0x0199, 1)},    /* ZTE MF820S */
        {QMI_FIXED_INTF(0x19d2, 0x0200, 1)},
index 9980517..57efb34 100644 (file)
@@ -236,8 +236,8 @@ static void veth_get_ethtool_stats(struct net_device *dev,
                                data[tx_idx + j] += *(u64 *)(base + offset);
                        }
                } while (u64_stats_fetch_retry(&rq_stats->syncp, start));
-               pp_idx = tx_idx + VETH_TQ_STATS_LEN;
        }
+       pp_idx = idx + dev->real_num_tx_queues * VETH_TQ_STATS_LEN;
 
 page_pool_stats:
        veth_get_page_pool_stats(dev, &data[pp_idx]);
@@ -373,7 +373,7 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
        skb_tx_timestamp(skb);
        if (likely(veth_forward_skb(rcv, skb, rq, use_napi) == NET_RX_SUCCESS)) {
                if (!use_napi)
-                       dev_lstats_add(dev, length);
+                       dev_sw_netstats_tx_add(dev, 1, length);
                else
                        __veth_xdp_flush(rq);
        } else {
@@ -387,14 +387,6 @@ drop:
        return ret;
 }
 
-static u64 veth_stats_tx(struct net_device *dev, u64 *packets, u64 *bytes)
-{
-       struct veth_priv *priv = netdev_priv(dev);
-
-       dev_lstats_read(dev, packets, bytes);
-       return atomic64_read(&priv->dropped);
-}
-
 static void veth_stats_rx(struct veth_stats *result, struct net_device *dev)
 {
        struct veth_priv *priv = netdev_priv(dev);
@@ -432,24 +424,24 @@ static void veth_get_stats64(struct net_device *dev,
        struct veth_priv *priv = netdev_priv(dev);
        struct net_device *peer;
        struct veth_stats rx;
-       u64 packets, bytes;
 
-       tot->tx_dropped = veth_stats_tx(dev, &packets, &bytes);
-       tot->tx_bytes = bytes;
-       tot->tx_packets = packets;
+       tot->tx_dropped = atomic64_read(&priv->dropped);
+       dev_fetch_sw_netstats(tot, dev->tstats);
 
        veth_stats_rx(&rx, dev);
        tot->tx_dropped += rx.xdp_tx_err;
        tot->rx_dropped = rx.rx_drops + rx.peer_tq_xdp_xmit_err;
-       tot->rx_bytes = rx.xdp_bytes;
-       tot->rx_packets = rx.xdp_packets;
+       tot->rx_bytes += rx.xdp_bytes;
+       tot->rx_packets += rx.xdp_packets;
 
        rcu_read_lock();
        peer = rcu_dereference(priv->peer);
        if (peer) {
-               veth_stats_tx(peer, &packets, &bytes);
-               tot->rx_bytes += bytes;
-               tot->rx_packets += packets;
+               struct rtnl_link_stats64 tot_peer = {};
+
+               dev_fetch_sw_netstats(&tot_peer, peer->tstats);
+               tot->rx_bytes += tot_peer.tx_bytes;
+               tot->rx_packets += tot_peer.tx_packets;
 
                veth_stats_rx(&rx, peer);
                tot->tx_dropped += rx.peer_tq_xdp_xmit_err;
@@ -1506,25 +1498,12 @@ static void veth_free_queues(struct net_device *dev)
 
 static int veth_dev_init(struct net_device *dev)
 {
-       int err;
-
-       dev->lstats = netdev_alloc_pcpu_stats(struct pcpu_lstats);
-       if (!dev->lstats)
-               return -ENOMEM;
-
-       err = veth_alloc_queues(dev);
-       if (err) {
-               free_percpu(dev->lstats);
-               return err;
-       }
-
-       return 0;
+       return veth_alloc_queues(dev);
 }
 
 static void veth_dev_free(struct net_device *dev)
 {
        veth_free_queues(dev);
-       free_percpu(dev->lstats);
 }
 
 #ifdef CONFIG_NET_POLL_CONTROLLER
@@ -1796,6 +1775,7 @@ static void veth_setup(struct net_device *dev)
                               NETIF_F_HW_VLAN_STAG_RX);
        dev->needs_free_netdev = true;
        dev->priv_destructor = veth_dev_free;
+       dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
        dev->max_mtu = ETH_MAX_MTU;
 
        dev->hw_features = VETH_FEATURES;
index db76694..bb95ce4 100644 (file)
@@ -121,22 +121,12 @@ struct net_vrf {
        int                     ifindex;
 };
 
-struct pcpu_dstats {
-       u64                     tx_pkts;
-       u64                     tx_bytes;
-       u64                     tx_drps;
-       u64                     rx_pkts;
-       u64                     rx_bytes;
-       u64                     rx_drps;
-       struct u64_stats_sync   syncp;
-};
-
 static void vrf_rx_stats(struct net_device *dev, int len)
 {
        struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats);
 
        u64_stats_update_begin(&dstats->syncp);
-       dstats->rx_pkts++;
+       dstats->rx_packets++;
        dstats->rx_bytes += len;
        u64_stats_update_end(&dstats->syncp);
 }
@@ -161,10 +151,10 @@ static void vrf_get_stats64(struct net_device *dev,
                do {
                        start = u64_stats_fetch_begin(&dstats->syncp);
                        tbytes = dstats->tx_bytes;
-                       tpkts = dstats->tx_pkts;
-                       tdrops = dstats->tx_drps;
+                       tpkts = dstats->tx_packets;
+                       tdrops = dstats->tx_drops;
                        rbytes = dstats->rx_bytes;
-                       rpkts = dstats->rx_pkts;
+                       rpkts = dstats->rx_packets;
                } while (u64_stats_fetch_retry(&dstats->syncp, start));
                stats->tx_bytes += tbytes;
                stats->tx_packets += tpkts;
@@ -421,7 +411,7 @@ static int vrf_local_xmit(struct sk_buff *skb, struct net_device *dev,
        if (likely(__netif_rx(skb) == NET_RX_SUCCESS))
                vrf_rx_stats(dev, len);
        else
-               this_cpu_inc(dev->dstats->rx_drps);
+               this_cpu_inc(dev->dstats->rx_drops);
 
        return NETDEV_TX_OK;
 }
@@ -616,11 +606,11 @@ static netdev_tx_t vrf_xmit(struct sk_buff *skb, struct net_device *dev)
                struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats);
 
                u64_stats_update_begin(&dstats->syncp);
-               dstats->tx_pkts++;
+               dstats->tx_packets++;
                dstats->tx_bytes += len;
                u64_stats_update_end(&dstats->syncp);
        } else {
-               this_cpu_inc(dev->dstats->tx_drps);
+               this_cpu_inc(dev->dstats->tx_drops);
        }
 
        return ret;
@@ -1174,22 +1164,15 @@ static void vrf_dev_uninit(struct net_device *dev)
 
        vrf_rtable_release(dev, vrf);
        vrf_rt6_release(dev, vrf);
-
-       free_percpu(dev->dstats);
-       dev->dstats = NULL;
 }
 
 static int vrf_dev_init(struct net_device *dev)
 {
        struct net_vrf *vrf = netdev_priv(dev);
 
-       dev->dstats = netdev_alloc_pcpu_stats(struct pcpu_dstats);
-       if (!dev->dstats)
-               goto out_nomem;
-
        /* create the default dst which points back to us */
        if (vrf_rtable_create(dev) != 0)
-               goto out_stats;
+               goto out_nomem;
 
        if (vrf_rt6_create(dev) != 0)
                goto out_rth;
@@ -1203,9 +1186,6 @@ static int vrf_dev_init(struct net_device *dev)
 
 out_rth:
        vrf_rtable_release(dev, vrf);
-out_stats:
-       free_percpu(dev->dstats);
-       dev->dstats = NULL;
 out_nomem:
        return -ENOMEM;
 }
@@ -1704,6 +1684,8 @@ static void vrf_setup(struct net_device *dev)
        dev->min_mtu = IPV6_MIN_MTU;
        dev->max_mtu = IP6_MAX_MTU;
        dev->mtu = dev->max_mtu;
+
+       dev->pcpu_stat_type = NETDEV_PCPU_STAT_DSTATS;
 }
 
 static int vrf_validate(struct nlattr *tb[], struct nlattr *data[],
index 258dcc1..deb9636 100644 (file)
@@ -210,7 +210,7 @@ static netdev_tx_t wg_xmit(struct sk_buff *skb, struct net_device *dev)
         */
        while (skb_queue_len(&peer->staged_packet_queue) > MAX_STAGED_PACKETS) {
                dev_kfree_skb(__skb_dequeue(&peer->staged_packet_queue));
-               ++dev->stats.tx_dropped;
+               DEV_STATS_INC(dev, tx_dropped);
        }
        skb_queue_splice_tail(&packets, &peer->staged_packet_queue);
        spin_unlock_bh(&peer->staged_packet_queue.lock);
@@ -228,7 +228,7 @@ err_icmp:
        else if (skb->protocol == htons(ETH_P_IPV6))
                icmpv6_ndo_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
 err:
-       ++dev->stats.tx_errors;
+       DEV_STATS_INC(dev, tx_errors);
        kfree_skb(skb);
        return ret;
 }
index 0b3f0c8..a176653 100644 (file)
@@ -416,20 +416,20 @@ dishonest_packet_peer:
        net_dbg_skb_ratelimited("%s: Packet has unallowed src IP (%pISc) from peer %llu (%pISpfsc)\n",
                                dev->name, skb, peer->internal_id,
                                &peer->endpoint.addr);
-       ++dev->stats.rx_errors;
-       ++dev->stats.rx_frame_errors;
+       DEV_STATS_INC(dev, rx_errors);
+       DEV_STATS_INC(dev, rx_frame_errors);
        goto packet_processed;
 dishonest_packet_type:
        net_dbg_ratelimited("%s: Packet is neither ipv4 nor ipv6 from peer %llu (%pISpfsc)\n",
                            dev->name, peer->internal_id, &peer->endpoint.addr);
-       ++dev->stats.rx_errors;
-       ++dev->stats.rx_frame_errors;
+       DEV_STATS_INC(dev, rx_errors);
+       DEV_STATS_INC(dev, rx_frame_errors);
        goto packet_processed;
 dishonest_packet_size:
        net_dbg_ratelimited("%s: Packet has incorrect size from peer %llu (%pISpfsc)\n",
                            dev->name, peer->internal_id, &peer->endpoint.addr);
-       ++dev->stats.rx_errors;
-       ++dev->stats.rx_length_errors;
+       DEV_STATS_INC(dev, rx_errors);
+       DEV_STATS_INC(dev, rx_length_errors);
        goto packet_processed;
 packet_processed:
        dev_kfree_skb(skb);
index 95c853b..0d48e0f 100644 (file)
@@ -333,7 +333,8 @@ err:
 void wg_packet_purge_staged_packets(struct wg_peer *peer)
 {
        spin_lock_bh(&peer->staged_packet_queue.lock);
-       peer->device->dev->stats.tx_dropped += peer->staged_packet_queue.qlen;
+       DEV_STATS_ADD(peer->device->dev, tx_dropped,
+                     peer->staged_packet_queue.qlen);
        __skb_queue_purge(&peer->staged_packet_queue);
        spin_unlock_bh(&peer->staged_packet_queue.lock);
 }
index b027be0..590b038 100644 (file)
@@ -26,10 +26,14 @@ struct virtual_nci_dev {
        struct mutex mtx;
        struct sk_buff *send_buff;
        struct wait_queue_head wq;
+       bool running;
 };
 
 static int virtual_nci_open(struct nci_dev *ndev)
 {
+       struct virtual_nci_dev *vdev = nci_get_drvdata(ndev);
+
+       vdev->running = true;
        return 0;
 }
 
@@ -40,6 +44,7 @@ static int virtual_nci_close(struct nci_dev *ndev)
        mutex_lock(&vdev->mtx);
        kfree_skb(vdev->send_buff);
        vdev->send_buff = NULL;
+       vdev->running = false;
        mutex_unlock(&vdev->mtx);
 
        return 0;
@@ -50,7 +55,7 @@ static int virtual_nci_send(struct nci_dev *ndev, struct sk_buff *skb)
        struct virtual_nci_dev *vdev = nci_get_drvdata(ndev);
 
        mutex_lock(&vdev->mtx);
-       if (vdev->send_buff) {
+       if (vdev->send_buff || !vdev->running) {
                mutex_unlock(&vdev->mtx);
                kfree_skb(skb);
                return -1;
index 48328e3..72c0525 100644 (file)
@@ -757,12 +757,11 @@ static void nvme_queue_auth_work(struct work_struct *work)
                __func__, chap->qid);
        mutex_lock(&ctrl->dhchap_auth_mutex);
        ret = nvme_auth_dhchap_setup_host_response(ctrl, chap);
+       mutex_unlock(&ctrl->dhchap_auth_mutex);
        if (ret) {
-               mutex_unlock(&ctrl->dhchap_auth_mutex);
                chap->error = ret;
                goto fail2;
        }
-       mutex_unlock(&ctrl->dhchap_auth_mutex);
 
        /* DH-HMAC-CHAP Step 3: send reply */
        dev_dbg(ctrl->device, "%s: qid %d send reply\n",
@@ -839,6 +838,8 @@ static void nvme_queue_auth_work(struct work_struct *work)
        }
 
 fail2:
+       if (chap->status == 0)
+               chap->status = NVME_AUTH_DHCHAP_FAILURE_FAILED;
        dev_dbg(ctrl->device, "%s: qid %d send failure2, status %x\n",
                __func__, chap->qid, chap->status);
        tl = nvme_auth_set_dhchap_failure2_data(ctrl, chap);
index 88b54cd..46a4c9c 100644 (file)
@@ -482,7 +482,6 @@ EXPORT_SYMBOL_GPL(nvme_cancel_tagset);
 
 void nvme_cancel_admin_tagset(struct nvme_ctrl *ctrl)
 {
-       nvme_stop_keep_alive(ctrl);
        if (ctrl->admin_tagset) {
                blk_mq_tagset_busy_iter(ctrl->admin_tagset,
                                nvme_cancel_request, ctrl);
@@ -1814,16 +1813,18 @@ set_pi:
        return ret;
 }
 
-static void nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id)
+static int nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id)
 {
        struct nvme_ctrl *ctrl = ns->ctrl;
+       int ret;
 
-       if (nvme_init_ms(ns, id))
-               return;
+       ret = nvme_init_ms(ns, id);
+       if (ret)
+               return ret;
 
        ns->features &= ~(NVME_NS_METADATA_SUPPORTED | NVME_NS_EXT_LBAS);
        if (!ns->ms || !(ctrl->ops->flags & NVME_F_METADATA_SUPPORTED))
-               return;
+               return 0;
 
        if (ctrl->ops->flags & NVME_F_FABRICS) {
                /*
@@ -1832,7 +1833,7 @@ static void nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id)
                 * remap the separate metadata buffer from the block layer.
                 */
                if (WARN_ON_ONCE(!(id->flbas & NVME_NS_FLBAS_META_EXT)))
-                       return;
+                       return 0;
 
                ns->features |= NVME_NS_EXT_LBAS;
 
@@ -1859,6 +1860,7 @@ static void nvme_configure_metadata(struct nvme_ns *ns, struct nvme_id_ns *id)
                else
                        ns->features |= NVME_NS_METADATA_SUPPORTED;
        }
+       return 0;
 }
 
 static void nvme_set_queue_limits(struct nvme_ctrl *ctrl,
@@ -2032,7 +2034,11 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
        ns->lba_shift = id->lbaf[lbaf].ds;
        nvme_set_queue_limits(ns->ctrl, ns->queue);
 
-       nvme_configure_metadata(ns, id);
+       ret = nvme_configure_metadata(ns, id);
+       if (ret < 0) {
+               blk_mq_unfreeze_queue(ns->disk->queue);
+               goto out;
+       }
        nvme_set_chunk_sectors(ns, id);
        nvme_update_disk_info(ns->disk, ns, id);
 
@@ -4348,6 +4354,7 @@ void nvme_stop_ctrl(struct nvme_ctrl *ctrl)
 {
        nvme_mpath_stop(ctrl);
        nvme_auth_stop(ctrl);
+       nvme_stop_keep_alive(ctrl);
        nvme_stop_failfast_work(ctrl);
        flush_work(&ctrl->async_event_work);
        cancel_work_sync(&ctrl->fw_act_work);
index 4673ead..aa88606 100644 (file)
@@ -667,8 +667,10 @@ static const match_table_t opt_tokens = {
 #endif
        { NVMF_OPT_FAIL_FAST_TMO,       "fast_io_fail_tmo=%d"   },
        { NVMF_OPT_DISCOVERY,           "discovery"             },
+#ifdef CONFIG_NVME_HOST_AUTH
        { NVMF_OPT_DHCHAP_SECRET,       "dhchap_secret=%s"      },
        { NVMF_OPT_DHCHAP_CTRL_SECRET,  "dhchap_ctrl_secret=%s" },
+#endif
 #ifdef CONFIG_NVME_TCP_TLS
        { NVMF_OPT_TLS,                 "tls"                   },
 #endif
index 49c3e46..9f9a3b3 100644 (file)
@@ -2530,12 +2530,6 @@ __nvme_fc_abort_outstanding_ios(struct nvme_fc_ctrl *ctrl, bool start_queues)
         * clean up the admin queue. Same thing as above.
         */
        nvme_quiesce_admin_queue(&ctrl->ctrl);
-
-       /*
-        * Open-coding nvme_cancel_admin_tagset() as fc
-        * is not using nvme_cancel_request().
-        */
-       nvme_stop_keep_alive(&ctrl->ctrl);
        blk_sync_queue(ctrl->ctrl.admin_q);
        blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
                                nvme_fc_terminate_exchange, &ctrl->ctrl);
@@ -3138,11 +3132,12 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
        nvme_unquiesce_admin_queue(&ctrl->ctrl);
 
        ret = nvme_init_ctrl_finish(&ctrl->ctrl, false);
-       if (!ret && test_bit(ASSOC_FAILED, &ctrl->flags))
-               ret = -EIO;
        if (ret)
                goto out_disconnect_admin_queue;
-
+       if (test_bit(ASSOC_FAILED, &ctrl->flags)) {
+               ret = -EIO;
+               goto out_stop_keep_alive;
+       }
        /* sanity checks */
 
        /* FC-NVME does not have other data in the capsule */
@@ -3150,7 +3145,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
                dev_err(ctrl->ctrl.device, "icdoff %d is not supported!\n",
                                ctrl->ctrl.icdoff);
                ret = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
-               goto out_disconnect_admin_queue;
+               goto out_stop_keep_alive;
        }
 
        /* FC-NVME supports normal SGL Data Block Descriptors */
@@ -3158,7 +3153,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
                dev_err(ctrl->ctrl.device,
                        "Mandatory sgls are not supported!\n");
                ret = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
-               goto out_disconnect_admin_queue;
+               goto out_stop_keep_alive;
        }
 
        if (opts->queue_size > ctrl->ctrl.maxcmd) {
@@ -3205,6 +3200,8 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
 
 out_term_aen_ops:
        nvme_fc_term_aen_ops(ctrl);
+out_stop_keep_alive:
+       nvme_stop_keep_alive(&ctrl->ctrl);
 out_disconnect_admin_queue:
        dev_warn(ctrl->ctrl.device,
                "NVME-FC{%d}: create_assoc failed, assoc_id %llx ret %d\n",
index a7fea4c..6d178d5 100644 (file)
@@ -1080,6 +1080,7 @@ destroy_io:
                nvme_rdma_free_io_queues(ctrl);
        }
 destroy_admin:
+       nvme_stop_keep_alive(&ctrl->ctrl);
        nvme_quiesce_admin_queue(&ctrl->ctrl);
        blk_sync_queue(ctrl->ctrl.admin_q);
        nvme_rdma_stop_queue(&ctrl->queues[0]);
index 89661a9..d79811c 100644 (file)
@@ -36,11 +36,11 @@ static int so_priority;
 module_param(so_priority, int, 0644);
 MODULE_PARM_DESC(so_priority, "nvme tcp socket optimize priority");
 
-#ifdef CONFIG_NVME_TCP_TLS
 /*
  * TLS handshake timeout
  */
 static int tls_handshake_timeout = 10;
+#ifdef CONFIG_NVME_TCP_TLS
 module_param(tls_handshake_timeout, int, 0644);
 MODULE_PARM_DESC(tls_handshake_timeout,
                 "nvme TLS handshake timeout in seconds (default 10)");
@@ -161,10 +161,8 @@ struct nvme_tcp_queue {
        struct ahash_request    *snd_hash;
        __le32                  exp_ddgst;
        __le32                  recv_ddgst;
-#ifdef CONFIG_NVME_TCP_TLS
        struct completion       tls_complete;
        int                     tls_err;
-#endif
        struct page_frag_cache  pf_cache;
 
        void (*state_change)(struct sock *);
@@ -207,6 +205,14 @@ static inline int nvme_tcp_queue_id(struct nvme_tcp_queue *queue)
        return queue - queue->ctrl->queues;
 }
 
+static inline bool nvme_tcp_tls(struct nvme_ctrl *ctrl)
+{
+       if (!IS_ENABLED(CONFIG_NVME_TCP_TLS))
+               return 0;
+
+       return ctrl->opts->tls;
+}
+
 static inline struct blk_mq_tags *nvme_tcp_tagset(struct nvme_tcp_queue *queue)
 {
        u32 queue_idx = nvme_tcp_queue_id(queue);
@@ -1412,7 +1418,7 @@ static int nvme_tcp_init_connection(struct nvme_tcp_queue *queue)
        memset(&msg, 0, sizeof(msg));
        iov.iov_base = icresp;
        iov.iov_len = sizeof(*icresp);
-       if (queue->ctrl->ctrl.opts->tls) {
+       if (nvme_tcp_tls(&queue->ctrl->ctrl)) {
                msg.msg_control = cbuf;
                msg.msg_controllen = sizeof(cbuf);
        }
@@ -1424,7 +1430,7 @@ static int nvme_tcp_init_connection(struct nvme_tcp_queue *queue)
                goto free_icresp;
        }
        ret = -ENOTCONN;
-       if (queue->ctrl->ctrl.opts->tls) {
+       if (nvme_tcp_tls(&queue->ctrl->ctrl)) {
                ctype = tls_get_record_type(queue->sock->sk,
                                            (struct cmsghdr *)cbuf);
                if (ctype != TLS_RECORD_TYPE_DATA) {
@@ -1548,7 +1554,6 @@ static void nvme_tcp_set_queue_io_cpu(struct nvme_tcp_queue *queue)
        queue->io_cpu = cpumask_next_wrap(n - 1, cpu_online_mask, -1, false);
 }
 
-#ifdef CONFIG_NVME_TCP_TLS
 static void nvme_tcp_tls_done(void *data, int status, key_serial_t pskid)
 {
        struct nvme_tcp_queue *queue = data;
@@ -1625,14 +1630,6 @@ static int nvme_tcp_start_tls(struct nvme_ctrl *nctrl,
        }
        return ret;
 }
-#else
-static int nvme_tcp_start_tls(struct nvme_ctrl *nctrl,
-                             struct nvme_tcp_queue *queue,
-                             key_serial_t pskid)
-{
-       return -EPROTONOSUPPORT;
-}
-#endif
 
 static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid,
                                key_serial_t pskid)
@@ -1759,7 +1756,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid,
        }
 
        /* If PSKs are configured try to start TLS */
-       if (pskid) {
+       if (IS_ENABLED(CONFIG_NVME_TCP_TLS) && pskid) {
                ret = nvme_tcp_start_tls(nctrl, queue, pskid);
                if (ret)
                        goto err_init_connect;
@@ -1916,7 +1913,7 @@ static int nvme_tcp_alloc_admin_queue(struct nvme_ctrl *ctrl)
        int ret;
        key_serial_t pskid = 0;
 
-       if (ctrl->opts->tls) {
+       if (nvme_tcp_tls(ctrl)) {
                if (ctrl->opts->tls_key)
                        pskid = key_serial(ctrl->opts->tls_key);
                else
@@ -1949,7 +1946,7 @@ static int __nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl)
 {
        int i, ret;
 
-       if (ctrl->opts->tls && !ctrl->tls_key) {
+       if (nvme_tcp_tls(ctrl) && !ctrl->tls_key) {
                dev_err(ctrl->device, "no PSK negotiated\n");
                return -ENOKEY;
        }
@@ -2237,6 +2234,7 @@ destroy_io:
                nvme_tcp_destroy_io_queues(ctrl, new);
        }
 destroy_admin:
+       nvme_stop_keep_alive(ctrl);
        nvme_tcp_teardown_admin_queue(ctrl, false);
        return ret;
 }
index 31633da..e1ebc73 100644 (file)
@@ -4,6 +4,8 @@ config NVME_TARGET
        tristate "NVMe Target support"
        depends on BLOCK
        depends on CONFIGFS_FS
+       select NVME_KEYRING if NVME_TARGET_TCP_TLS
+       select KEYS if NVME_TARGET_TCP_TLS
        select BLK_DEV_INTEGRITY_T10 if BLK_DEV_INTEGRITY
        select SGL_ALLOC
        help
@@ -87,9 +89,7 @@ config NVME_TARGET_TCP
 config NVME_TARGET_TCP_TLS
        bool "NVMe over Fabrics TCP target TLS encryption support"
        depends on NVME_TARGET_TCP
-       select NVME_KEYRING
        select NET_HANDSHAKE
-       select KEYS
        help
          Enables TLS encryption for the NVMe TCP target using the netlink handshake API.
 
index 9eed6e6..e307a04 100644 (file)
@@ -1893,7 +1893,7 @@ static struct config_group *nvmet_ports_make(struct config_group *group,
                return ERR_PTR(-ENOMEM);
        }
 
-       if (nvme_keyring_id()) {
+       if (IS_ENABLED(CONFIG_NVME_TARGET_TCP_TLS) && nvme_keyring_id()) {
                port->keyring = key_lookup(nvme_keyring_id());
                if (IS_ERR(port->keyring)) {
                        pr_warn("NVMe keyring not available, disabling TLS\n");
index 43b5bd8..d8da840 100644 (file)
@@ -244,6 +244,8 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req)
                goto out;
        }
 
+       d->subsysnqn[NVMF_NQN_FIELD_LEN - 1] = '\0';
+       d->hostnqn[NVMF_NQN_FIELD_LEN - 1] = '\0';
        status = nvmet_alloc_ctrl(d->subsysnqn, d->hostnqn, req,
                                  le32_to_cpu(c->kato), &ctrl);
        if (status)
@@ -313,6 +315,8 @@ static void nvmet_execute_io_connect(struct nvmet_req *req)
                goto out;
        }
 
+       d->subsysnqn[NVMF_NQN_FIELD_LEN - 1] = '\0';
+       d->hostnqn[NVMF_NQN_FIELD_LEN - 1] = '\0';
        ctrl = nvmet_ctrl_find_get(d->subsysnqn, d->hostnqn,
                                   le16_to_cpu(d->cntlid), req);
        if (!ctrl) {
index 92b74d0..4cc2785 100644 (file)
@@ -1854,6 +1854,8 @@ static int nvmet_tcp_tls_handshake(struct nvmet_tcp_queue *queue)
        }
        return ret;
 }
+#else
+static void nvmet_tcp_tls_handshake_timeout(struct work_struct *w) {}
 #endif
 
 static void nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port,
@@ -1911,9 +1913,9 @@ static void nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port,
        list_add_tail(&queue->queue_list, &nvmet_tcp_queue_list);
        mutex_unlock(&nvmet_tcp_queue_mutex);
 
-#ifdef CONFIG_NVME_TARGET_TCP_TLS
        INIT_DELAYED_WORK(&queue->tls_handshake_tmo_work,
                          nvmet_tcp_tls_handshake_timeout);
+#ifdef CONFIG_NVME_TARGET_TCP_TLS
        if (queue->state == NVMET_TCP_Q_TLS_HANDSHAKE) {
                struct sock *sk = queue->sock->sk;
 
index 539d892..bb0d924 100644 (file)
@@ -176,7 +176,7 @@ static struct notifier_block parisc_panic_block = {
 static int qemu_power_off(struct sys_off_data *data)
 {
        /* this turns the system off via SeaBIOS */
-       *(int *)data->cb_data = 0;
+       gsc_writel(0, (unsigned long) data->cb_data);
        pdc_soft_power_button(1);
        return NOTIFY_DONE;
 }
index 787354b..4cef568 100644 (file)
@@ -87,7 +87,6 @@ source "drivers/phy/motorola/Kconfig"
 source "drivers/phy/mscc/Kconfig"
 source "drivers/phy/qualcomm/Kconfig"
 source "drivers/phy/ralink/Kconfig"
-source "drivers/phy/realtek/Kconfig"
 source "drivers/phy/renesas/Kconfig"
 source "drivers/phy/rockchip/Kconfig"
 source "drivers/phy/samsung/Kconfig"
index 868a220..fb3dc9d 100644 (file)
@@ -26,7 +26,6 @@ obj-y                                 += allwinner/   \
                                           mscc/        \
                                           qualcomm/    \
                                           ralink/      \
-                                          realtek/     \
                                           renesas/     \
                                           rockchip/    \
                                           samsung/     \
diff --git a/drivers/phy/realtek/Kconfig b/drivers/phy/realtek/Kconfig
deleted file mode 100644 (file)
index 75ac7e7..0000000
+++ /dev/null
@@ -1,32 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-#
-# Phy drivers for Realtek platforms
-#
-
-if ARCH_REALTEK || COMPILE_TEST
-
-config PHY_RTK_RTD_USB2PHY
-       tristate "Realtek RTD USB2 PHY Transceiver Driver"
-       depends on USB_SUPPORT
-       select GENERIC_PHY
-       select USB_PHY
-       select USB_COMMON
-       help
-         Enable this to support Realtek SoC USB2 phy transceiver.
-         The DHC (digital home center) RTD series SoCs used the Synopsys
-         DWC3 USB IP. This driver will do the PHY initialization
-         of the parameters.
-
-config PHY_RTK_RTD_USB3PHY
-       tristate "Realtek RTD USB3 PHY Transceiver Driver"
-       depends on USB_SUPPORT
-       select GENERIC_PHY
-       select USB_PHY
-       select USB_COMMON
-       help
-         Enable this to support Realtek SoC USB3 phy transceiver.
-         The DHC (digital home center) RTD series SoCs used the Synopsys
-         DWC3 USB IP. This driver will do the PHY initialization
-         of the parameters.
-
-endif # ARCH_REALTEK || COMPILE_TEST
diff --git a/drivers/phy/realtek/Makefile b/drivers/phy/realtek/Makefile
deleted file mode 100644 (file)
index ed7b47f..0000000
+++ /dev/null
@@ -1,3 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-obj-$(CONFIG_PHY_RTK_RTD_USB2PHY)      += phy-rtk-usb2.o
-obj-$(CONFIG_PHY_RTK_RTD_USB3PHY)      += phy-rtk-usb3.o
diff --git a/drivers/phy/realtek/phy-rtk-usb2.c b/drivers/phy/realtek/phy-rtk-usb2.c
deleted file mode 100644 (file)
index 0a64262..0000000
+++ /dev/null
@@ -1,1325 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- *  phy-rtk-usb2.c RTK usb2.0 PHY driver
- *
- * Copyright (C) 2023 Realtek Semiconductor Corporation
- *
- */
-
-#include <linux/module.h>
-#include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/platform_device.h>
-#include <linux/uaccess.h>
-#include <linux/debugfs.h>
-#include <linux/nvmem-consumer.h>
-#include <linux/regmap.h>
-#include <linux/sys_soc.h>
-#include <linux/mfd/syscon.h>
-#include <linux/phy/phy.h>
-#include <linux/usb.h>
-#include <linux/usb/phy.h>
-#include <linux/usb/hcd.h>
-
-/* GUSB2PHYACCn register */
-#define PHY_NEW_REG_REQ BIT(25)
-#define PHY_VSTS_BUSY   BIT(23)
-#define PHY_VCTRL_SHIFT 8
-#define PHY_REG_DATA_MASK 0xff
-
-#define GET_LOW_NIBBLE(addr) ((addr) & 0x0f)
-#define GET_HIGH_NIBBLE(addr) (((addr) & 0xf0) >> 4)
-
-#define EFUS_USB_DC_CAL_RATE 2
-#define EFUS_USB_DC_CAL_MAX 7
-
-#define EFUS_USB_DC_DIS_RATE 1
-#define EFUS_USB_DC_DIS_MAX 7
-
-#define MAX_PHY_DATA_SIZE 20
-#define OFFEST_PHY_READ 0x20
-
-#define MAX_USB_PHY_NUM 4
-#define MAX_USB_PHY_PAGE0_DATA_SIZE 16
-#define MAX_USB_PHY_PAGE1_DATA_SIZE 16
-#define MAX_USB_PHY_PAGE2_DATA_SIZE 8
-
-#define SET_PAGE_OFFSET 0xf4
-#define SET_PAGE_0 0x9b
-#define SET_PAGE_1 0xbb
-#define SET_PAGE_2 0xdb
-
-#define PAGE_START 0xe0
-#define PAGE0_0XE4 0xe4
-#define PAGE0_0XE6 0xe6
-#define PAGE0_0XE7 0xe7
-#define PAGE1_0XE0 0xe0
-#define PAGE1_0XE2 0xe2
-
-#define SENSITIVITY_CTRL (BIT(4) | BIT(5) | BIT(6))
-#define ENABLE_AUTO_SENSITIVITY_CALIBRATION BIT(2)
-#define DEFAULT_DC_DRIVING_VALUE (0x8)
-#define DEFAULT_DC_DISCONNECTION_VALUE (0x6)
-#define HS_CLK_SELECT BIT(6)
-
-struct phy_reg {
-       void __iomem *reg_wrap_vstatus;
-       void __iomem *reg_gusb2phyacc0;
-       int vstatus_index;
-};
-
-struct phy_data {
-       u8 addr;
-       u8 data;
-};
-
-struct phy_cfg {
-       int page0_size;
-       struct phy_data page0[MAX_USB_PHY_PAGE0_DATA_SIZE];
-       int page1_size;
-       struct phy_data page1[MAX_USB_PHY_PAGE1_DATA_SIZE];
-       int page2_size;
-       struct phy_data page2[MAX_USB_PHY_PAGE2_DATA_SIZE];
-
-       int num_phy;
-
-       bool check_efuse;
-       int check_efuse_version;
-#define CHECK_EFUSE_V1 1
-#define CHECK_EFUSE_V2 2
-       int efuse_dc_driving_rate;
-       int efuse_dc_disconnect_rate;
-       int dc_driving_mask;
-       int dc_disconnect_mask;
-       bool usb_dc_disconnect_at_page0;
-       int driving_updated_for_dev_dis;
-
-       bool do_toggle;
-       bool do_toggle_driving;
-       bool use_default_parameter;
-       bool is_double_sensitivity_mode;
-};
-
-struct phy_parameter {
-       struct phy_reg phy_reg;
-
-       /* Get from efuse */
-       s8 efuse_usb_dc_cal;
-       s8 efuse_usb_dc_dis;
-
-       /* Get from dts */
-       bool inverse_hstx_sync_clock;
-       u32 driving_level;
-       s32 driving_level_compensate;
-       s32 disconnection_compensate;
-};
-
-struct rtk_phy {
-       struct usb_phy phy;
-       struct device *dev;
-
-       struct phy_cfg *phy_cfg;
-       int num_phy;
-       struct phy_parameter *phy_parameter;
-
-       struct dentry *debug_dir;
-};
-
-/* mapping 0xE0 to 0 ... 0xE7 to 7, 0xF0 to 8 ,,, 0xF7 to 15 */
-static inline int page_addr_to_array_index(u8 addr)
-{
-       return (int)((((addr) - PAGE_START) & 0x7) +
-               ((((addr) - PAGE_START) & 0x10) >> 1));
-}
-
-static inline u8 array_index_to_page_addr(int index)
-{
-       return ((((index) + PAGE_START) & 0x7) +
-               ((((index) & 0x8) << 1) + PAGE_START));
-}
-
-#define PHY_IO_TIMEOUT_USEC            (50000)
-#define PHY_IO_DELAY_US                        (100)
-
-static inline int utmi_wait_register(void __iomem *reg, u32 mask, u32 result)
-{
-       int ret;
-       unsigned int val;
-
-       ret = read_poll_timeout(readl, val, ((val & mask) == result),
-                               PHY_IO_DELAY_US, PHY_IO_TIMEOUT_USEC, false, reg);
-       if (ret) {
-               pr_err("%s can't program USB phy\n", __func__);
-               return -ETIMEDOUT;
-       }
-
-       return 0;
-}
-
-static char rtk_phy_read(struct phy_reg *phy_reg, char addr)
-{
-       void __iomem *reg_gusb2phyacc0 = phy_reg->reg_gusb2phyacc0;
-       unsigned int val;
-       int ret = 0;
-
-       addr -= OFFEST_PHY_READ;
-
-       /* polling until VBusy == 0 */
-       ret = utmi_wait_register(reg_gusb2phyacc0, PHY_VSTS_BUSY, 0);
-       if (ret)
-               return (char)ret;
-
-       /* VCtrl = low nibble of addr, and set PHY_NEW_REG_REQ */
-       val = PHY_NEW_REG_REQ | (GET_LOW_NIBBLE(addr) << PHY_VCTRL_SHIFT);
-       writel(val, reg_gusb2phyacc0);
-       ret = utmi_wait_register(reg_gusb2phyacc0, PHY_VSTS_BUSY, 0);
-       if (ret)
-               return (char)ret;
-
-       /* VCtrl = high nibble of addr, and set PHY_NEW_REG_REQ */
-       val = PHY_NEW_REG_REQ | (GET_HIGH_NIBBLE(addr) << PHY_VCTRL_SHIFT);
-       writel(val, reg_gusb2phyacc0);
-       ret = utmi_wait_register(reg_gusb2phyacc0, PHY_VSTS_BUSY, 0);
-       if (ret)
-               return (char)ret;
-
-       val = readl(reg_gusb2phyacc0);
-
-       return (char)(val & PHY_REG_DATA_MASK);
-}
-
-static int rtk_phy_write(struct phy_reg *phy_reg, char addr, char data)
-{
-       unsigned int val;
-       void __iomem *reg_wrap_vstatus = phy_reg->reg_wrap_vstatus;
-       void __iomem *reg_gusb2phyacc0 = phy_reg->reg_gusb2phyacc0;
-       int shift_bits = phy_reg->vstatus_index * 8;
-       int ret = 0;
-
-       /* write data to VStatusOut2 (data output to phy) */
-       writel((u32)data << shift_bits, reg_wrap_vstatus);
-
-       ret = utmi_wait_register(reg_gusb2phyacc0, PHY_VSTS_BUSY, 0);
-       if (ret)
-               return ret;
-
-       /* VCtrl = low nibble of addr, set PHY_NEW_REG_REQ */
-       val = PHY_NEW_REG_REQ | (GET_LOW_NIBBLE(addr) << PHY_VCTRL_SHIFT);
-
-       writel(val, reg_gusb2phyacc0);
-       ret = utmi_wait_register(reg_gusb2phyacc0, PHY_VSTS_BUSY, 0);
-       if (ret)
-               return ret;
-
-       /* VCtrl = high nibble of addr, set PHY_NEW_REG_REQ */
-       val = PHY_NEW_REG_REQ | (GET_HIGH_NIBBLE(addr) << PHY_VCTRL_SHIFT);
-
-       writel(val, reg_gusb2phyacc0);
-       ret = utmi_wait_register(reg_gusb2phyacc0, PHY_VSTS_BUSY, 0);
-       if (ret)
-               return ret;
-
-       return 0;
-}
-
-static int rtk_phy_set_page(struct phy_reg *phy_reg, int page)
-{
-       switch (page) {
-       case 0:
-               return rtk_phy_write(phy_reg, SET_PAGE_OFFSET, SET_PAGE_0);
-       case 1:
-               return rtk_phy_write(phy_reg, SET_PAGE_OFFSET, SET_PAGE_1);
-       case 2:
-               return rtk_phy_write(phy_reg, SET_PAGE_OFFSET, SET_PAGE_2);
-       default:
-               pr_err("%s error page=%d\n", __func__, page);
-       }
-
-       return -EINVAL;
-}
-
-static u8 __updated_dc_disconnect_level_page0_0xe4(struct phy_cfg *phy_cfg,
-                                                  struct phy_parameter *phy_parameter, u8 data)
-{
-       u8 ret;
-       s32 val;
-       s32 dc_disconnect_mask = phy_cfg->dc_disconnect_mask;
-       int offset = 4;
-
-       val = (s32)((data >> offset) & dc_disconnect_mask)
-                    + phy_parameter->efuse_usb_dc_dis
-                    + phy_parameter->disconnection_compensate;
-
-       if (val > dc_disconnect_mask)
-               val = dc_disconnect_mask;
-       else if (val < 0)
-               val = 0;
-
-       ret = (data & (~(dc_disconnect_mask << offset))) |
-                   (val & dc_disconnect_mask) << offset;
-
-       return ret;
-}
-
-/* updated disconnect level at page0 */
-static void update_dc_disconnect_level_at_page0(struct rtk_phy *rtk_phy,
-                                               struct phy_parameter *phy_parameter, bool update)
-{
-       struct phy_cfg *phy_cfg;
-       struct phy_reg *phy_reg;
-       struct phy_data *phy_data_page;
-       struct phy_data *phy_data;
-       u8 addr, data;
-       int offset = 4;
-       s32 dc_disconnect_mask;
-       int i;
-
-       phy_cfg = rtk_phy->phy_cfg;
-       phy_reg = &phy_parameter->phy_reg;
-
-       /* Set page 0 */
-       phy_data_page = phy_cfg->page0;
-       rtk_phy_set_page(phy_reg, 0);
-
-       i = page_addr_to_array_index(PAGE0_0XE4);
-       phy_data = phy_data_page + i;
-       if (!phy_data->addr) {
-               phy_data->addr = PAGE0_0XE4;
-               phy_data->data = rtk_phy_read(phy_reg, PAGE0_0XE4);
-       }
-
-       addr = phy_data->addr;
-       data = phy_data->data;
-       dc_disconnect_mask = phy_cfg->dc_disconnect_mask;
-
-       if (update)
-               data = __updated_dc_disconnect_level_page0_0xe4(phy_cfg, phy_parameter, data);
-       else
-               data = (data & ~(dc_disconnect_mask << offset)) |
-                       (DEFAULT_DC_DISCONNECTION_VALUE << offset);
-
-       if (rtk_phy_write(phy_reg, addr, data))
-               dev_err(rtk_phy->dev,
-                       "%s: Error to set page1 parameter addr=0x%x value=0x%x\n",
-                       __func__, addr, data);
-}
-
-static u8 __updated_dc_disconnect_level_page1_0xe2(struct phy_cfg *phy_cfg,
-                                                  struct phy_parameter *phy_parameter, u8 data)
-{
-       u8 ret;
-       s32 val;
-       s32 dc_disconnect_mask = phy_cfg->dc_disconnect_mask;
-
-       if (phy_cfg->check_efuse_version == CHECK_EFUSE_V1) {
-               val = (s32)(data & dc_disconnect_mask)
-                           + phy_parameter->efuse_usb_dc_dis
-                           + phy_parameter->disconnection_compensate;
-       } else { /* for CHECK_EFUSE_V2 or no efuse */
-               if (phy_parameter->efuse_usb_dc_dis)
-                       val = (s32)(phy_parameter->efuse_usb_dc_dis +
-                                   phy_parameter->disconnection_compensate);
-               else
-                       val = (s32)((data & dc_disconnect_mask) +
-                                   phy_parameter->disconnection_compensate);
-       }
-
-       if (val > dc_disconnect_mask)
-               val = dc_disconnect_mask;
-       else if (val < 0)
-               val = 0;
-
-       ret = (data & (~dc_disconnect_mask)) | (val & dc_disconnect_mask);
-
-       return ret;
-}
-
-/* updated disconnect level at page1 */
-static void update_dc_disconnect_level_at_page1(struct rtk_phy *rtk_phy,
-                                               struct phy_parameter *phy_parameter, bool update)
-{
-       struct phy_cfg *phy_cfg;
-       struct phy_data *phy_data_page;
-       struct phy_data *phy_data;
-       struct phy_reg *phy_reg;
-       u8 addr, data;
-       s32 dc_disconnect_mask;
-       int i;
-
-       phy_cfg = rtk_phy->phy_cfg;
-       phy_reg = &phy_parameter->phy_reg;
-
-       /* Set page 1 */
-       phy_data_page = phy_cfg->page1;
-       rtk_phy_set_page(phy_reg, 1);
-
-       i = page_addr_to_array_index(PAGE1_0XE2);
-       phy_data = phy_data_page + i;
-       if (!phy_data->addr) {
-               phy_data->addr = PAGE1_0XE2;
-               phy_data->data = rtk_phy_read(phy_reg, PAGE1_0XE2);
-       }
-
-       addr = phy_data->addr;
-       data = phy_data->data;
-       dc_disconnect_mask = phy_cfg->dc_disconnect_mask;
-
-       if (update)
-               data = __updated_dc_disconnect_level_page1_0xe2(phy_cfg, phy_parameter, data);
-       else
-               data = (data & ~dc_disconnect_mask) | DEFAULT_DC_DISCONNECTION_VALUE;
-
-       if (rtk_phy_write(phy_reg, addr, data))
-               dev_err(rtk_phy->dev,
-                       "%s: Error to set page1 parameter addr=0x%x value=0x%x\n",
-                       __func__, addr, data);
-}
-
-static void update_dc_disconnect_level(struct rtk_phy *rtk_phy,
-                                      struct phy_parameter *phy_parameter, bool update)
-{
-       struct phy_cfg *phy_cfg = rtk_phy->phy_cfg;
-
-       if (phy_cfg->usb_dc_disconnect_at_page0)
-               update_dc_disconnect_level_at_page0(rtk_phy, phy_parameter, update);
-       else
-               update_dc_disconnect_level_at_page1(rtk_phy, phy_parameter, update);
-}
-
-static u8 __update_dc_driving_page0_0xe4(struct phy_cfg *phy_cfg,
-                                        struct phy_parameter *phy_parameter, u8 data)
-{
-       s32 driving_level_compensate = phy_parameter->driving_level_compensate;
-       s32 dc_driving_mask = phy_cfg->dc_driving_mask;
-       s32 val;
-       u8 ret;
-
-       if (phy_cfg->check_efuse_version == CHECK_EFUSE_V1) {
-               val = (s32)(data & dc_driving_mask) + driving_level_compensate
-                           + phy_parameter->efuse_usb_dc_cal;
-       } else { /* for CHECK_EFUSE_V2 or no efuse */
-               if (phy_parameter->efuse_usb_dc_cal)
-                       val = (s32)((phy_parameter->efuse_usb_dc_cal & dc_driving_mask)
-                                   + driving_level_compensate);
-               else
-                       val = (s32)(data & dc_driving_mask);
-       }
-
-       if (val > dc_driving_mask)
-               val = dc_driving_mask;
-       else if (val < 0)
-               val = 0;
-
-       ret = (data & (~dc_driving_mask)) | (val & dc_driving_mask);
-
-       return ret;
-}
-
-static void update_dc_driving_level(struct rtk_phy *rtk_phy,
-                                   struct phy_parameter *phy_parameter)
-{
-       struct phy_cfg *phy_cfg;
-       struct phy_reg *phy_reg;
-
-       phy_reg = &phy_parameter->phy_reg;
-       phy_cfg = rtk_phy->phy_cfg;
-       if (!phy_cfg->page0[4].addr) {
-               rtk_phy_set_page(phy_reg, 0);
-               phy_cfg->page0[4].addr = PAGE0_0XE4;
-               phy_cfg->page0[4].data = rtk_phy_read(phy_reg, PAGE0_0XE4);
-       }
-
-       if (phy_parameter->driving_level != DEFAULT_DC_DRIVING_VALUE) {
-               u32 dc_driving_mask;
-               u8 driving_level;
-               u8 data;
-
-               data = phy_cfg->page0[4].data;
-               dc_driving_mask = phy_cfg->dc_driving_mask;
-               driving_level = data & dc_driving_mask;
-
-               dev_dbg(rtk_phy->dev, "%s driving_level=%d => dts driving_level=%d\n",
-                       __func__, driving_level, phy_parameter->driving_level);
-
-               phy_cfg->page0[4].data = (data & (~dc_driving_mask)) |
-                           (phy_parameter->driving_level & dc_driving_mask);
-       }
-
-       phy_cfg->page0[4].data = __update_dc_driving_page0_0xe4(phy_cfg,
-                                                               phy_parameter,
-                                                               phy_cfg->page0[4].data);
-}
-
-static void update_hs_clk_select(struct rtk_phy *rtk_phy,
-                                struct phy_parameter *phy_parameter)
-{
-       struct phy_cfg *phy_cfg;
-       struct phy_reg *phy_reg;
-
-       phy_cfg = rtk_phy->phy_cfg;
-       phy_reg = &phy_parameter->phy_reg;
-
-       if (phy_parameter->inverse_hstx_sync_clock) {
-               if (!phy_cfg->page0[6].addr) {
-                       rtk_phy_set_page(phy_reg, 0);
-                       phy_cfg->page0[6].addr = PAGE0_0XE6;
-                       phy_cfg->page0[6].data = rtk_phy_read(phy_reg, PAGE0_0XE6);
-               }
-
-               phy_cfg->page0[6].data = phy_cfg->page0[6].data | HS_CLK_SELECT;
-       }
-}
-
-static void do_rtk_phy_toggle(struct rtk_phy *rtk_phy,
-                             int index, bool connect)
-{
-       struct phy_parameter *phy_parameter;
-       struct phy_cfg *phy_cfg;
-       struct phy_reg *phy_reg;
-       struct phy_data *phy_data_page;
-       u8 addr, data;
-       int i;
-
-       phy_cfg = rtk_phy->phy_cfg;
-       phy_parameter = &((struct phy_parameter *)rtk_phy->phy_parameter)[index];
-       phy_reg = &phy_parameter->phy_reg;
-
-       if (!phy_cfg->do_toggle)
-               goto out;
-
-       if (phy_cfg->is_double_sensitivity_mode)
-               goto do_toggle_driving;
-
-       /* Set page 0 */
-       rtk_phy_set_page(phy_reg, 0);
-
-       addr = PAGE0_0XE7;
-       data = rtk_phy_read(phy_reg, addr);
-
-       if (connect)
-               rtk_phy_write(phy_reg, addr, data & (~SENSITIVITY_CTRL));
-       else
-               rtk_phy_write(phy_reg, addr, data | (SENSITIVITY_CTRL));
-
-do_toggle_driving:
-
-       if (!phy_cfg->do_toggle_driving)
-               goto do_toggle;
-
-       /* Page 0 addr 0xE4 driving capability */
-
-       /* Set page 0 */
-       phy_data_page = phy_cfg->page0;
-       rtk_phy_set_page(phy_reg, 0);
-
-       i = page_addr_to_array_index(PAGE0_0XE4);
-       addr = phy_data_page[i].addr;
-       data = phy_data_page[i].data;
-
-       if (connect) {
-               rtk_phy_write(phy_reg, addr, data);
-       } else {
-               u8 value;
-               s32 tmp;
-               s32 driving_updated =
-                           phy_cfg->driving_updated_for_dev_dis;
-               s32 dc_driving_mask = phy_cfg->dc_driving_mask;
-
-               tmp = (s32)(data & dc_driving_mask) + driving_updated;
-
-               if (tmp > dc_driving_mask)
-                       tmp = dc_driving_mask;
-               else if (tmp < 0)
-                       tmp = 0;
-
-               value = (data & (~dc_driving_mask)) | (tmp & dc_driving_mask);
-
-               rtk_phy_write(phy_reg, addr, value);
-       }
-
-do_toggle:
-       /* restore dc disconnect level before toggle */
-       update_dc_disconnect_level(rtk_phy, phy_parameter, false);
-
-       /* Set page 1 */
-       rtk_phy_set_page(phy_reg, 1);
-
-       addr = PAGE1_0XE0;
-       data = rtk_phy_read(phy_reg, addr);
-
-       rtk_phy_write(phy_reg, addr, data &
-                     (~ENABLE_AUTO_SENSITIVITY_CALIBRATION));
-       mdelay(1);
-       rtk_phy_write(phy_reg, addr, data |
-                     (ENABLE_AUTO_SENSITIVITY_CALIBRATION));
-
-       /* update dc disconnect level after toggle */
-       update_dc_disconnect_level(rtk_phy, phy_parameter, true);
-
-out:
-       return;
-}
-
-static int do_rtk_phy_init(struct rtk_phy *rtk_phy, int index)
-{
-       struct phy_parameter *phy_parameter;
-       struct phy_cfg *phy_cfg;
-       struct phy_data *phy_data_page;
-       struct phy_reg *phy_reg;
-       int i;
-
-       phy_cfg = rtk_phy->phy_cfg;
-       phy_parameter = &((struct phy_parameter *)rtk_phy->phy_parameter)[index];
-       phy_reg = &phy_parameter->phy_reg;
-
-       if (phy_cfg->use_default_parameter) {
-               dev_dbg(rtk_phy->dev, "%s phy#%d use default parameter\n",
-                       __func__, index);
-               goto do_toggle;
-       }
-
-       /* Set page 0 */
-       phy_data_page = phy_cfg->page0;
-       rtk_phy_set_page(phy_reg, 0);
-
-       for (i = 0; i < phy_cfg->page0_size; i++) {
-               struct phy_data *phy_data = phy_data_page + i;
-               u8 addr = phy_data->addr;
-               u8 data = phy_data->data;
-
-               if (!addr)
-                       continue;
-
-               if (rtk_phy_write(phy_reg, addr, data)) {
-                       dev_err(rtk_phy->dev,
-                               "%s: Error to set page0 parameter addr=0x%x value=0x%x\n",
-                               __func__, addr, data);
-                       return -EINVAL;
-               }
-       }
-
-       /* Set page 1 */
-       phy_data_page = phy_cfg->page1;
-       rtk_phy_set_page(phy_reg, 1);
-
-       for (i = 0; i < phy_cfg->page1_size; i++) {
-               struct phy_data *phy_data = phy_data_page + i;
-               u8 addr = phy_data->addr;
-               u8 data = phy_data->data;
-
-               if (!addr)
-                       continue;
-
-               if (rtk_phy_write(phy_reg, addr, data)) {
-                       dev_err(rtk_phy->dev,
-                               "%s: Error to set page1 parameter addr=0x%x value=0x%x\n",
-                               __func__, addr, data);
-                       return -EINVAL;
-               }
-       }
-
-       if (phy_cfg->page2_size == 0)
-               goto do_toggle;
-
-       /* Set page 2 */
-       phy_data_page = phy_cfg->page2;
-       rtk_phy_set_page(phy_reg, 2);
-
-       for (i = 0; i < phy_cfg->page2_size; i++) {
-               struct phy_data *phy_data = phy_data_page + i;
-               u8 addr = phy_data->addr;
-               u8 data = phy_data->data;
-
-               if (!addr)
-                       continue;
-
-               if (rtk_phy_write(phy_reg, addr, data)) {
-                       dev_err(rtk_phy->dev,
-                               "%s: Error to set page2 parameter addr=0x%x value=0x%x\n",
-                               __func__, addr, data);
-                       return -EINVAL;
-               }
-       }
-
-do_toggle:
-       do_rtk_phy_toggle(rtk_phy, index, false);
-
-       return 0;
-}
-
-static int rtk_phy_init(struct phy *phy)
-{
-       struct rtk_phy *rtk_phy = phy_get_drvdata(phy);
-       unsigned long phy_init_time = jiffies;
-       int i, ret = 0;
-
-       if (!rtk_phy)
-               return -EINVAL;
-
-       for (i = 0; i < rtk_phy->num_phy; i++)
-               ret = do_rtk_phy_init(rtk_phy, i);
-
-       dev_dbg(rtk_phy->dev, "Initialized RTK USB 2.0 PHY (take %dms)\n",
-               jiffies_to_msecs(jiffies - phy_init_time));
-       return ret;
-}
-
-static int rtk_phy_exit(struct phy *phy)
-{
-       return 0;
-}
-
-static const struct phy_ops ops = {
-       .init           = rtk_phy_init,
-       .exit           = rtk_phy_exit,
-       .owner          = THIS_MODULE,
-};
-
-static void rtk_phy_toggle(struct usb_phy *usb2_phy, bool connect, int port)
-{
-       int index = port;
-       struct rtk_phy *rtk_phy = NULL;
-
-       rtk_phy = dev_get_drvdata(usb2_phy->dev);
-
-       if (index > rtk_phy->num_phy) {
-               dev_err(rtk_phy->dev, "%s: The port=%d is not in usb phy (num_phy=%d)\n",
-                       __func__, index, rtk_phy->num_phy);
-               return;
-       }
-
-       do_rtk_phy_toggle(rtk_phy, index, connect);
-}
-
-static int rtk_phy_notify_port_status(struct usb_phy *x, int port,
-                                     u16 portstatus, u16 portchange)
-{
-       bool connect = false;
-
-       pr_debug("%s port=%d portstatus=0x%x portchange=0x%x\n",
-                __func__, port, (int)portstatus, (int)portchange);
-       if (portstatus & USB_PORT_STAT_CONNECTION)
-               connect = true;
-
-       if (portchange & USB_PORT_STAT_C_CONNECTION)
-               rtk_phy_toggle(x, connect, port);
-
-       return 0;
-}
-
-#ifdef CONFIG_DEBUG_FS
-static struct dentry *create_phy_debug_root(void)
-{
-       struct dentry *phy_debug_root;
-
-       phy_debug_root = debugfs_lookup("phy", usb_debug_root);
-       if (!phy_debug_root)
-               phy_debug_root = debugfs_create_dir("phy", usb_debug_root);
-
-       return phy_debug_root;
-}
-
-static int rtk_usb2_parameter_show(struct seq_file *s, void *unused)
-{
-       struct rtk_phy *rtk_phy = s->private;
-       struct phy_cfg *phy_cfg;
-       int i, index;
-
-       phy_cfg = rtk_phy->phy_cfg;
-
-       seq_puts(s, "Property:\n");
-       seq_printf(s, "  check_efuse: %s\n",
-                  phy_cfg->check_efuse ? "Enable" : "Disable");
-       seq_printf(s, "  check_efuse_version: %d\n",
-                  phy_cfg->check_efuse_version);
-       seq_printf(s, "  efuse_dc_driving_rate: %d\n",
-                  phy_cfg->efuse_dc_driving_rate);
-       seq_printf(s, "  dc_driving_mask: 0x%x\n",
-                  phy_cfg->dc_driving_mask);
-       seq_printf(s, "  efuse_dc_disconnect_rate: %d\n",
-                  phy_cfg->efuse_dc_disconnect_rate);
-       seq_printf(s, "  dc_disconnect_mask: 0x%x\n",
-                  phy_cfg->dc_disconnect_mask);
-       seq_printf(s, "  usb_dc_disconnect_at_page0: %s\n",
-                  phy_cfg->usb_dc_disconnect_at_page0 ? "true" : "false");
-       seq_printf(s, "  do_toggle: %s\n",
-                  phy_cfg->do_toggle ? "Enable" : "Disable");
-       seq_printf(s, "  do_toggle_driving: %s\n",
-                  phy_cfg->do_toggle_driving ? "Enable" : "Disable");
-       seq_printf(s, "  driving_updated_for_dev_dis: 0x%x\n",
-                  phy_cfg->driving_updated_for_dev_dis);
-       seq_printf(s, "  use_default_parameter: %s\n",
-                  phy_cfg->use_default_parameter ? "Enable" : "Disable");
-       seq_printf(s, "  is_double_sensitivity_mode: %s\n",
-                  phy_cfg->is_double_sensitivity_mode ? "Enable" : "Disable");
-
-       for (index = 0; index < rtk_phy->num_phy; index++) {
-               struct phy_parameter *phy_parameter;
-               struct phy_reg *phy_reg;
-               struct phy_data *phy_data_page;
-
-               phy_parameter =  &((struct phy_parameter *)rtk_phy->phy_parameter)[index];
-               phy_reg = &phy_parameter->phy_reg;
-
-               seq_printf(s, "PHY %d:\n", index);
-
-               seq_puts(s, "Page 0:\n");
-               /* Set page 0 */
-               phy_data_page = phy_cfg->page0;
-               rtk_phy_set_page(phy_reg, 0);
-
-               for (i = 0; i < phy_cfg->page0_size; i++) {
-                       struct phy_data *phy_data = phy_data_page + i;
-                       u8 addr = array_index_to_page_addr(i);
-                       u8 data = phy_data->data;
-                       u8 value = rtk_phy_read(phy_reg, addr);
-
-                       if (phy_data->addr)
-                               seq_printf(s, "  Page 0: addr=0x%x data=0x%02x ==> read value=0x%02x\n",
-                                          addr, data, value);
-                       else
-                               seq_printf(s, "  Page 0: addr=0x%x data=none ==> read value=0x%02x\n",
-                                          addr, value);
-               }
-
-               seq_puts(s, "Page 1:\n");
-               /* Set page 1 */
-               phy_data_page = phy_cfg->page1;
-               rtk_phy_set_page(phy_reg, 1);
-
-               for (i = 0; i < phy_cfg->page1_size; i++) {
-                       struct phy_data *phy_data = phy_data_page + i;
-                       u8 addr = array_index_to_page_addr(i);
-                       u8 data = phy_data->data;
-                       u8 value = rtk_phy_read(phy_reg, addr);
-
-                       if (phy_data->addr)
-                               seq_printf(s, "  Page 1: addr=0x%x data=0x%02x ==> read value=0x%02x\n",
-                                          addr, data, value);
-                       else
-                               seq_printf(s, "  Page 1: addr=0x%x data=none ==> read value=0x%02x\n",
-                                          addr, value);
-               }
-
-               if (phy_cfg->page2_size == 0)
-                       goto out;
-
-               seq_puts(s, "Page 2:\n");
-               /* Set page 2 */
-               phy_data_page = phy_cfg->page2;
-               rtk_phy_set_page(phy_reg, 2);
-
-               for (i = 0; i < phy_cfg->page2_size; i++) {
-                       struct phy_data *phy_data = phy_data_page + i;
-                       u8 addr = array_index_to_page_addr(i);
-                       u8 data = phy_data->data;
-                       u8 value = rtk_phy_read(phy_reg, addr);
-
-                       if (phy_data->addr)
-                               seq_printf(s, "  Page 2: addr=0x%x data=0x%02x ==> read value=0x%02x\n",
-                                          addr, data, value);
-                       else
-                               seq_printf(s, "  Page 2: addr=0x%x data=none ==> read value=0x%02x\n",
-                                          addr, value);
-               }
-
-out:
-               seq_puts(s, "PHY Property:\n");
-               seq_printf(s, "  efuse_usb_dc_cal: %d\n",
-                          (int)phy_parameter->efuse_usb_dc_cal);
-               seq_printf(s, "  efuse_usb_dc_dis: %d\n",
-                          (int)phy_parameter->efuse_usb_dc_dis);
-               seq_printf(s, "  inverse_hstx_sync_clock: %s\n",
-                          phy_parameter->inverse_hstx_sync_clock ? "Enable" : "Disable");
-               seq_printf(s, "  driving_level: %d\n",
-                          phy_parameter->driving_level);
-               seq_printf(s, "  driving_level_compensate: %d\n",
-                          phy_parameter->driving_level_compensate);
-               seq_printf(s, "  disconnection_compensate: %d\n",
-                          phy_parameter->disconnection_compensate);
-       }
-
-       return 0;
-}
-DEFINE_SHOW_ATTRIBUTE(rtk_usb2_parameter);
-
-static inline void create_debug_files(struct rtk_phy *rtk_phy)
-{
-       struct dentry *phy_debug_root = NULL;
-
-       phy_debug_root = create_phy_debug_root();
-       if (!phy_debug_root)
-               return;
-
-       rtk_phy->debug_dir = debugfs_create_dir(dev_name(rtk_phy->dev),
-                                               phy_debug_root);
-
-       debugfs_create_file("parameter", 0444, rtk_phy->debug_dir, rtk_phy,
-                           &rtk_usb2_parameter_fops);
-
-       return;
-}
-
-static inline void remove_debug_files(struct rtk_phy *rtk_phy)
-{
-       debugfs_remove_recursive(rtk_phy->debug_dir);
-}
-#else
-static inline void create_debug_files(struct rtk_phy *rtk_phy) { }
-static inline void remove_debug_files(struct rtk_phy *rtk_phy) { }
-#endif /* CONFIG_DEBUG_FS */
-
-static int get_phy_data_by_efuse(struct rtk_phy *rtk_phy,
-                                struct phy_parameter *phy_parameter, int index)
-{
-       struct phy_cfg *phy_cfg = rtk_phy->phy_cfg;
-       u8 value = 0;
-       struct nvmem_cell *cell;
-       struct soc_device_attribute rtk_soc_groot[] = {
-                   { .family = "Realtek Groot",},
-                   { /* empty */ } };
-
-       if (!phy_cfg->check_efuse)
-               goto out;
-
-       /* Read efuse for usb dc cal */
-       cell = nvmem_cell_get(rtk_phy->dev, "usb-dc-cal");
-       if (IS_ERR(cell)) {
-               dev_dbg(rtk_phy->dev, "%s no usb-dc-cal: %ld\n",
-                       __func__, PTR_ERR(cell));
-       } else {
-               unsigned char *buf;
-               size_t buf_size;
-
-               buf = nvmem_cell_read(cell, &buf_size);
-               if (!IS_ERR(buf)) {
-                       value = buf[0] & phy_cfg->dc_driving_mask;
-                       kfree(buf);
-               }
-               nvmem_cell_put(cell);
-       }
-
-       if (phy_cfg->check_efuse_version == CHECK_EFUSE_V1) {
-               int rate = phy_cfg->efuse_dc_driving_rate;
-
-               if (value <= EFUS_USB_DC_CAL_MAX)
-                       phy_parameter->efuse_usb_dc_cal = (int8_t)(value * rate);
-               else
-                       phy_parameter->efuse_usb_dc_cal = -(int8_t)
-                                   ((EFUS_USB_DC_CAL_MAX & value) * rate);
-
-               if (soc_device_match(rtk_soc_groot)) {
-                       dev_dbg(rtk_phy->dev, "For groot IC we need a workaround to adjust efuse_usb_dc_cal\n");
-
-                       /* We don't multiple dc_cal_rate=2 for positive dc cal compensate */
-                       if (value <= EFUS_USB_DC_CAL_MAX)
-                               phy_parameter->efuse_usb_dc_cal = (int8_t)(value);
-
-                       /* We set max dc cal compensate is 0x8 if otp is 0x7 */
-                       if (value == 0x7)
-                               phy_parameter->efuse_usb_dc_cal = (int8_t)(value + 1);
-               }
-       } else { /* for CHECK_EFUSE_V2 */
-               phy_parameter->efuse_usb_dc_cal = value & phy_cfg->dc_driving_mask;
-       }
-
-       /* Read efuse for usb dc disconnect level */
-       value = 0;
-       cell = nvmem_cell_get(rtk_phy->dev, "usb-dc-dis");
-       if (IS_ERR(cell)) {
-               dev_dbg(rtk_phy->dev, "%s no usb-dc-dis: %ld\n",
-                       __func__, PTR_ERR(cell));
-       } else {
-               unsigned char *buf;
-               size_t buf_size;
-
-               buf = nvmem_cell_read(cell, &buf_size);
-               if (!IS_ERR(buf)) {
-                       value = buf[0] & phy_cfg->dc_disconnect_mask;
-                       kfree(buf);
-               }
-               nvmem_cell_put(cell);
-       }
-
-       if (phy_cfg->check_efuse_version == CHECK_EFUSE_V1) {
-               int rate = phy_cfg->efuse_dc_disconnect_rate;
-
-               if (value <= EFUS_USB_DC_DIS_MAX)
-                       phy_parameter->efuse_usb_dc_dis = (int8_t)(value * rate);
-               else
-                       phy_parameter->efuse_usb_dc_dis = -(int8_t)
-                                   ((EFUS_USB_DC_DIS_MAX & value) * rate);
-       } else { /* for CHECK_EFUSE_V2 */
-               phy_parameter->efuse_usb_dc_dis = value & phy_cfg->dc_disconnect_mask;
-       }
-
-out:
-       return 0;
-}
-
-static int parse_phy_data(struct rtk_phy *rtk_phy)
-{
-       struct device *dev = rtk_phy->dev;
-       struct device_node *np = dev->of_node;
-       struct phy_parameter *phy_parameter;
-       int ret = 0;
-       int index;
-
-       rtk_phy->phy_parameter = devm_kzalloc(dev, sizeof(struct phy_parameter) *
-                                               rtk_phy->num_phy, GFP_KERNEL);
-       if (!rtk_phy->phy_parameter)
-               return -ENOMEM;
-
-       for (index = 0; index < rtk_phy->num_phy; index++) {
-               phy_parameter = &((struct phy_parameter *)rtk_phy->phy_parameter)[index];
-
-               phy_parameter->phy_reg.reg_wrap_vstatus = of_iomap(np, 0);
-               phy_parameter->phy_reg.reg_gusb2phyacc0 = of_iomap(np, 1) + index;
-               phy_parameter->phy_reg.vstatus_index = index;
-
-               if (of_property_read_bool(np, "realtek,inverse-hstx-sync-clock"))
-                       phy_parameter->inverse_hstx_sync_clock = true;
-               else
-                       phy_parameter->inverse_hstx_sync_clock = false;
-
-               if (of_property_read_u32_index(np, "realtek,driving-level",
-                                              index, &phy_parameter->driving_level))
-                       phy_parameter->driving_level = DEFAULT_DC_DRIVING_VALUE;
-
-               if (of_property_read_u32_index(np, "realtek,driving-level-compensate",
-                                              index, &phy_parameter->driving_level_compensate))
-                       phy_parameter->driving_level_compensate = 0;
-
-               if (of_property_read_u32_index(np, "realtek,disconnection-compensate",
-                                              index, &phy_parameter->disconnection_compensate))
-                       phy_parameter->disconnection_compensate = 0;
-
-               get_phy_data_by_efuse(rtk_phy, phy_parameter, index);
-
-               update_dc_driving_level(rtk_phy, phy_parameter);
-
-               update_hs_clk_select(rtk_phy, phy_parameter);
-       }
-
-       return ret;
-}
-
-static int rtk_usb2phy_probe(struct platform_device *pdev)
-{
-       struct rtk_phy *rtk_phy;
-       struct device *dev = &pdev->dev;
-       struct phy *generic_phy;
-       struct phy_provider *phy_provider;
-       const struct phy_cfg *phy_cfg;
-       int ret = 0;
-
-       phy_cfg = of_device_get_match_data(dev);
-       if (!phy_cfg) {
-               dev_err(dev, "phy config are not assigned!\n");
-               return -EINVAL;
-       }
-
-       rtk_phy = devm_kzalloc(dev, sizeof(*rtk_phy), GFP_KERNEL);
-       if (!rtk_phy)
-               return -ENOMEM;
-
-       rtk_phy->dev                    = &pdev->dev;
-       rtk_phy->phy.dev                = rtk_phy->dev;
-       rtk_phy->phy.label              = "rtk-usb2phy";
-       rtk_phy->phy.notify_port_status = rtk_phy_notify_port_status;
-
-       rtk_phy->phy_cfg = devm_kzalloc(dev, sizeof(*phy_cfg), GFP_KERNEL);
-
-       memcpy(rtk_phy->phy_cfg, phy_cfg, sizeof(*phy_cfg));
-
-       rtk_phy->num_phy = phy_cfg->num_phy;
-
-       ret = parse_phy_data(rtk_phy);
-       if (ret)
-               goto err;
-
-       platform_set_drvdata(pdev, rtk_phy);
-
-       generic_phy = devm_phy_create(rtk_phy->dev, NULL, &ops);
-       if (IS_ERR(generic_phy))
-               return PTR_ERR(generic_phy);
-
-       phy_set_drvdata(generic_phy, rtk_phy);
-
-       phy_provider = devm_of_phy_provider_register(rtk_phy->dev,
-                                                    of_phy_simple_xlate);
-       if (IS_ERR(phy_provider))
-               return PTR_ERR(phy_provider);
-
-       ret = usb_add_phy_dev(&rtk_phy->phy);
-       if (ret)
-               goto err;
-
-       create_debug_files(rtk_phy);
-
-err:
-       return ret;
-}
-
-static void rtk_usb2phy_remove(struct platform_device *pdev)
-{
-       struct rtk_phy *rtk_phy = platform_get_drvdata(pdev);
-
-       remove_debug_files(rtk_phy);
-
-       usb_remove_phy(&rtk_phy->phy);
-}
-
-static const struct phy_cfg rtd1295_phy_cfg = {
-       .page0_size = MAX_USB_PHY_PAGE0_DATA_SIZE,
-       .page0 = { [0] = {0xe0, 0x90},
-                  [3] = {0xe3, 0x3a},
-                  [4] = {0xe4, 0x68},
-                  [6] = {0xe6, 0x91},
-                 [13] = {0xf5, 0x81},
-                 [15] = {0xf7, 0x02}, },
-       .page1_size = 8,
-       .page1 = { /* default parameter */ },
-       .page2_size = 0,
-       .page2 = { /* no parameter */ },
-       .num_phy = 1,
-       .check_efuse = false,
-       .check_efuse_version = CHECK_EFUSE_V1,
-       .efuse_dc_driving_rate = 1,
-       .dc_driving_mask = 0xf,
-       .efuse_dc_disconnect_rate = EFUS_USB_DC_DIS_RATE,
-       .dc_disconnect_mask = 0xf,
-       .usb_dc_disconnect_at_page0 = true,
-       .do_toggle = true,
-       .do_toggle_driving = false,
-       .driving_updated_for_dev_dis = 0xf,
-       .use_default_parameter = false,
-       .is_double_sensitivity_mode = false,
-};
-
-static const struct phy_cfg rtd1395_phy_cfg = {
-       .page0_size = MAX_USB_PHY_PAGE0_DATA_SIZE,
-       .page0 = { [4] = {0xe4, 0xac},
-                 [13] = {0xf5, 0x00},
-                 [15] = {0xf7, 0x02}, },
-       .page1_size = 8,
-       .page1 = { /* default parameter */ },
-       .page2_size = 0,
-       .page2 = { /* no parameter */ },
-       .num_phy = 1,
-       .check_efuse = false,
-       .check_efuse_version = CHECK_EFUSE_V1,
-       .efuse_dc_driving_rate = 1,
-       .dc_driving_mask = 0xf,
-       .efuse_dc_disconnect_rate = EFUS_USB_DC_DIS_RATE,
-       .dc_disconnect_mask = 0xf,
-       .usb_dc_disconnect_at_page0 = true,
-       .do_toggle = true,
-       .do_toggle_driving = false,
-       .driving_updated_for_dev_dis = 0xf,
-       .use_default_parameter = false,
-       .is_double_sensitivity_mode = false,
-};
-
-static const struct phy_cfg rtd1395_phy_cfg_2port = {
-       .page0_size = MAX_USB_PHY_PAGE0_DATA_SIZE,
-       .page0 = { [4] = {0xe4, 0xac},
-                 [13] = {0xf5, 0x00},
-                 [15] = {0xf7, 0x02}, },
-       .page1_size = 8,
-       .page1 = { /* default parameter */ },
-       .page2_size = 0,
-       .page2 = { /* no parameter */ },
-       .num_phy = 2,
-       .check_efuse = false,
-       .check_efuse_version = CHECK_EFUSE_V1,
-       .efuse_dc_driving_rate = 1,
-       .dc_driving_mask = 0xf,
-       .efuse_dc_disconnect_rate = EFUS_USB_DC_DIS_RATE,
-       .dc_disconnect_mask = 0xf,
-       .usb_dc_disconnect_at_page0 = true,
-       .do_toggle = true,
-       .do_toggle_driving = false,
-       .driving_updated_for_dev_dis = 0xf,
-       .use_default_parameter = false,
-       .is_double_sensitivity_mode = false,
-};
-
-static const struct phy_cfg rtd1619_phy_cfg = {
-       .page0_size = MAX_USB_PHY_PAGE0_DATA_SIZE,
-       .page0 = { [4] = {0xe4, 0x68}, },
-       .page1_size = 8,
-       .page1 = { /* default parameter */ },
-       .page2_size = 0,
-       .page2 = { /* no parameter */ },
-       .num_phy = 1,
-       .check_efuse = true,
-       .check_efuse_version = CHECK_EFUSE_V1,
-       .efuse_dc_driving_rate = 1,
-       .dc_driving_mask = 0xf,
-       .efuse_dc_disconnect_rate = EFUS_USB_DC_DIS_RATE,
-       .dc_disconnect_mask = 0xf,
-       .usb_dc_disconnect_at_page0 = true,
-       .do_toggle = true,
-       .do_toggle_driving = false,
-       .driving_updated_for_dev_dis = 0xf,
-       .use_default_parameter = false,
-       .is_double_sensitivity_mode = false,
-};
-
-static const struct phy_cfg rtd1319_phy_cfg = {
-       .page0_size = MAX_USB_PHY_PAGE0_DATA_SIZE,
-       .page0 = { [0] = {0xe0, 0x18},
-                  [4] = {0xe4, 0x6a},
-                  [7] = {0xe7, 0x71},
-                 [13] = {0xf5, 0x15},
-                 [15] = {0xf7, 0x32}, },
-       .page1_size = 8,
-       .page1 = { [3] = {0xe3, 0x44}, },
-       .page2_size = MAX_USB_PHY_PAGE2_DATA_SIZE,
-       .page2 = { [0] = {0xe0, 0x01}, },
-       .num_phy = 1,
-       .check_efuse = true,
-       .check_efuse_version = CHECK_EFUSE_V1,
-       .efuse_dc_driving_rate = 1,
-       .dc_driving_mask = 0xf,
-       .efuse_dc_disconnect_rate = EFUS_USB_DC_DIS_RATE,
-       .dc_disconnect_mask = 0xf,
-       .usb_dc_disconnect_at_page0 = true,
-       .do_toggle = true,
-       .do_toggle_driving = true,
-       .driving_updated_for_dev_dis = 0xf,
-       .use_default_parameter = false,
-       .is_double_sensitivity_mode = true,
-};
-
-static const struct phy_cfg rtd1312c_phy_cfg = {
-       .page0_size = MAX_USB_PHY_PAGE0_DATA_SIZE,
-       .page0 = { [0] = {0xe0, 0x14},
-                  [4] = {0xe4, 0x67},
-                  [5] = {0xe5, 0x55}, },
-       .page1_size = 8,
-       .page1 = { [3] = {0xe3, 0x23},
-                  [6] = {0xe6, 0x58}, },
-       .page2_size = MAX_USB_PHY_PAGE2_DATA_SIZE,
-       .page2 = { /* default parameter */ },
-       .num_phy = 1,
-       .check_efuse = true,
-       .check_efuse_version = CHECK_EFUSE_V1,
-       .efuse_dc_driving_rate = 1,
-       .dc_driving_mask = 0xf,
-       .efuse_dc_disconnect_rate = EFUS_USB_DC_DIS_RATE,
-       .dc_disconnect_mask = 0xf,
-       .usb_dc_disconnect_at_page0 = true,
-       .do_toggle = true,
-       .do_toggle_driving = true,
-       .driving_updated_for_dev_dis = 0xf,
-       .use_default_parameter = false,
-       .is_double_sensitivity_mode = true,
-};
-
-static const struct phy_cfg rtd1619b_phy_cfg = {
-       .page0_size = MAX_USB_PHY_PAGE0_DATA_SIZE,
-       .page0 = { [0] = {0xe0, 0xa3},
-                  [4] = {0xe4, 0x88},
-                  [5] = {0xe5, 0x4f},
-                  [6] = {0xe6, 0x02}, },
-       .page1_size = 8,
-       .page1 = { [3] = {0xe3, 0x64}, },
-       .page2_size = MAX_USB_PHY_PAGE2_DATA_SIZE,
-       .page2 = { [7] = {0xe7, 0x45}, },
-       .num_phy = 1,
-       .check_efuse = true,
-       .check_efuse_version = CHECK_EFUSE_V1,
-       .efuse_dc_driving_rate = EFUS_USB_DC_CAL_RATE,
-       .dc_driving_mask = 0x1f,
-       .efuse_dc_disconnect_rate = EFUS_USB_DC_DIS_RATE,
-       .dc_disconnect_mask = 0xf,
-       .usb_dc_disconnect_at_page0 = false,
-       .do_toggle = true,
-       .do_toggle_driving = true,
-       .driving_updated_for_dev_dis = 0x8,
-       .use_default_parameter = false,
-       .is_double_sensitivity_mode = true,
-};
-
-static const struct phy_cfg rtd1319d_phy_cfg = {
-       .page0_size = MAX_USB_PHY_PAGE0_DATA_SIZE,
-       .page0 = { [0] = {0xe0, 0xa3},
-                  [4] = {0xe4, 0x8e},
-                  [5] = {0xe5, 0x4f},
-                  [6] = {0xe6, 0x02}, },
-       .page1_size = MAX_USB_PHY_PAGE1_DATA_SIZE,
-       .page1 = { [14] = {0xf5, 0x1}, },
-       .page2_size = MAX_USB_PHY_PAGE2_DATA_SIZE,
-       .page2 = { [7] = {0xe7, 0x44}, },
-       .check_efuse = true,
-       .num_phy = 1,
-       .check_efuse_version = CHECK_EFUSE_V1,
-       .efuse_dc_driving_rate = EFUS_USB_DC_CAL_RATE,
-       .dc_driving_mask = 0x1f,
-       .efuse_dc_disconnect_rate = EFUS_USB_DC_DIS_RATE,
-       .dc_disconnect_mask = 0xf,
-       .usb_dc_disconnect_at_page0 = false,
-       .do_toggle = true,
-       .do_toggle_driving = false,
-       .driving_updated_for_dev_dis = 0x8,
-       .use_default_parameter = false,
-       .is_double_sensitivity_mode = true,
-};
-
-static const struct phy_cfg rtd1315e_phy_cfg = {
-       .page0_size = MAX_USB_PHY_PAGE0_DATA_SIZE,
-       .page0 = { [0] = {0xe0, 0xa3},
-                  [4] = {0xe4, 0x8c},
-                  [5] = {0xe5, 0x4f},
-                  [6] = {0xe6, 0x02}, },
-       .page1_size = MAX_USB_PHY_PAGE1_DATA_SIZE,
-       .page1 = { [3] = {0xe3, 0x7f},
-                 [14] = {0xf5, 0x01}, },
-       .page2_size = MAX_USB_PHY_PAGE2_DATA_SIZE,
-       .page2 = { [7] = {0xe7, 0x44}, },
-       .num_phy = 1,
-       .check_efuse = true,
-       .check_efuse_version = CHECK_EFUSE_V2,
-       .efuse_dc_driving_rate = EFUS_USB_DC_CAL_RATE,
-       .dc_driving_mask = 0x1f,
-       .efuse_dc_disconnect_rate = EFUS_USB_DC_DIS_RATE,
-       .dc_disconnect_mask = 0xf,
-       .usb_dc_disconnect_at_page0 = false,
-       .do_toggle = true,
-       .do_toggle_driving = false,
-       .driving_updated_for_dev_dis = 0x8,
-       .use_default_parameter = false,
-       .is_double_sensitivity_mode = true,
-};
-
-static const struct of_device_id usbphy_rtk_dt_match[] = {
-       { .compatible = "realtek,rtd1295-usb2phy", .data = &rtd1295_phy_cfg },
-       { .compatible = "realtek,rtd1312c-usb2phy", .data = &rtd1312c_phy_cfg },
-       { .compatible = "realtek,rtd1315e-usb2phy", .data = &rtd1315e_phy_cfg },
-       { .compatible = "realtek,rtd1319-usb2phy", .data = &rtd1319_phy_cfg },
-       { .compatible = "realtek,rtd1319d-usb2phy", .data = &rtd1319d_phy_cfg },
-       { .compatible = "realtek,rtd1395-usb2phy", .data = &rtd1395_phy_cfg },
-       { .compatible = "realtek,rtd1395-usb2phy-2port", .data = &rtd1395_phy_cfg_2port },
-       { .compatible = "realtek,rtd1619-usb2phy", .data = &rtd1619_phy_cfg },
-       { .compatible = "realtek,rtd1619b-usb2phy", .data = &rtd1619b_phy_cfg },
-       {},
-};
-MODULE_DEVICE_TABLE(of, usbphy_rtk_dt_match);
-
-static struct platform_driver rtk_usb2phy_driver = {
-       .probe          = rtk_usb2phy_probe,
-       .remove_new     = rtk_usb2phy_remove,
-       .driver         = {
-               .name   = "rtk-usb2phy",
-               .of_match_table = usbphy_rtk_dt_match,
-       },
-};
-
-module_platform_driver(rtk_usb2phy_driver);
-
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("platform: rtk-usb2phy");
-MODULE_AUTHOR("Stanley Chang <stanley_chang@realtek.com>");
-MODULE_DESCRIPTION("Realtek usb 2.0 phy driver");
diff --git a/drivers/phy/realtek/phy-rtk-usb3.c b/drivers/phy/realtek/phy-rtk-usb3.c
deleted file mode 100644 (file)
index 67446a8..0000000
+++ /dev/null
@@ -1,761 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- *  phy-rtk-usb3.c RTK usb3.0 phy driver
- *
- * copyright (c) 2023 realtek semiconductor corporation
- *
- */
-
-#include <linux/module.h>
-#include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/platform_device.h>
-#include <linux/uaccess.h>
-#include <linux/debugfs.h>
-#include <linux/nvmem-consumer.h>
-#include <linux/regmap.h>
-#include <linux/sys_soc.h>
-#include <linux/mfd/syscon.h>
-#include <linux/phy/phy.h>
-#include <linux/usb.h>
-#include <linux/usb/hcd.h>
-#include <linux/usb/phy.h>
-
-#define USB_MDIO_CTRL_PHY_BUSY BIT(7)
-#define USB_MDIO_CTRL_PHY_WRITE BIT(0)
-#define USB_MDIO_CTRL_PHY_ADDR_SHIFT 8
-#define USB_MDIO_CTRL_PHY_DATA_SHIFT 16
-
-#define MAX_USB_PHY_DATA_SIZE 0x30
-#define PHY_ADDR_0X09 0x09
-#define PHY_ADDR_0X0B 0x0b
-#define PHY_ADDR_0X0D 0x0d
-#define PHY_ADDR_0X10 0x10
-#define PHY_ADDR_0X1F 0x1f
-#define PHY_ADDR_0X20 0x20
-#define PHY_ADDR_0X21 0x21
-#define PHY_ADDR_0X30 0x30
-
-#define REG_0X09_FORCE_CALIBRATION BIT(9)
-#define REG_0X0B_RX_OFFSET_RANGE_MASK 0xc
-#define REG_0X0D_RX_DEBUG_TEST_EN BIT(6)
-#define REG_0X10_DEBUG_MODE_SETTING 0x3c0
-#define REG_0X10_DEBUG_MODE_SETTING_MASK 0x3f8
-#define REG_0X1F_RX_OFFSET_CODE_MASK 0x1e
-
-#define USB_U3_TX_LFPS_SWING_TRIM_SHIFT 4
-#define USB_U3_TX_LFPS_SWING_TRIM_MASK 0xf
-#define AMPLITUDE_CONTROL_COARSE_MASK 0xff
-#define AMPLITUDE_CONTROL_FINE_MASK 0xffff
-#define AMPLITUDE_CONTROL_COARSE_DEFAULT 0xff
-#define AMPLITUDE_CONTROL_FINE_DEFAULT 0xffff
-
-#define PHY_ADDR_MAP_ARRAY_INDEX(addr) (addr)
-#define ARRAY_INDEX_MAP_PHY_ADDR(index) (index)
-
-struct phy_reg {
-       void __iomem *reg_mdio_ctl;
-};
-
-struct phy_data {
-       u8 addr;
-       u16 data;
-};
-
-struct phy_cfg {
-       int param_size;
-       struct phy_data param[MAX_USB_PHY_DATA_SIZE];
-
-       bool check_efuse;
-       bool do_toggle;
-       bool do_toggle_once;
-       bool use_default_parameter;
-       bool check_rx_front_end_offset;
-};
-
-struct phy_parameter {
-       struct phy_reg phy_reg;
-
-       /* Get from efuse */
-       u8 efuse_usb_u3_tx_lfps_swing_trim;
-
-       /* Get from dts */
-       u32 amplitude_control_coarse;
-       u32 amplitude_control_fine;
-};
-
-struct rtk_phy {
-       struct usb_phy phy;
-       struct device *dev;
-
-       struct phy_cfg *phy_cfg;
-       int num_phy;
-       struct phy_parameter *phy_parameter;
-
-       struct dentry *debug_dir;
-};
-
-#define PHY_IO_TIMEOUT_USEC            (50000)
-#define PHY_IO_DELAY_US                        (100)
-
-static inline int utmi_wait_register(void __iomem *reg, u32 mask, u32 result)
-{
-       int ret;
-       unsigned int val;
-
-       ret = read_poll_timeout(readl, val, ((val & mask) == result),
-                               PHY_IO_DELAY_US, PHY_IO_TIMEOUT_USEC, false, reg);
-       if (ret) {
-               pr_err("%s can't program USB phy\n", __func__);
-               return -ETIMEDOUT;
-       }
-
-       return 0;
-}
-
-static int rtk_phy3_wait_vbusy(struct phy_reg *phy_reg)
-{
-       return utmi_wait_register(phy_reg->reg_mdio_ctl, USB_MDIO_CTRL_PHY_BUSY, 0);
-}
-
-static u16 rtk_phy_read(struct phy_reg *phy_reg, char addr)
-{
-       unsigned int tmp;
-       u32 value;
-
-       tmp = (addr << USB_MDIO_CTRL_PHY_ADDR_SHIFT);
-
-       writel(tmp, phy_reg->reg_mdio_ctl);
-
-       rtk_phy3_wait_vbusy(phy_reg);
-
-       value = readl(phy_reg->reg_mdio_ctl);
-       value = value >> USB_MDIO_CTRL_PHY_DATA_SHIFT;
-
-       return (u16)value;
-}
-
-static int rtk_phy_write(struct phy_reg *phy_reg, char addr, u16 data)
-{
-       unsigned int val;
-
-       val = USB_MDIO_CTRL_PHY_WRITE |
-                   (addr << USB_MDIO_CTRL_PHY_ADDR_SHIFT) |
-                   (data << USB_MDIO_CTRL_PHY_DATA_SHIFT);
-
-       writel(val, phy_reg->reg_mdio_ctl);
-
-       rtk_phy3_wait_vbusy(phy_reg);
-
-       return 0;
-}
-
-static void do_rtk_usb3_phy_toggle(struct rtk_phy *rtk_phy, int index, bool connect)
-{
-       struct phy_cfg *phy_cfg = rtk_phy->phy_cfg;
-       struct phy_reg *phy_reg;
-       struct phy_parameter *phy_parameter;
-       struct phy_data *phy_data;
-       u8 addr;
-       u16 data;
-       int i;
-
-       phy_parameter = &((struct phy_parameter *)rtk_phy->phy_parameter)[index];
-       phy_reg = &phy_parameter->phy_reg;
-
-       if (!phy_cfg->do_toggle)
-               return;
-
-       i = PHY_ADDR_MAP_ARRAY_INDEX(PHY_ADDR_0X09);
-       phy_data = phy_cfg->param + i;
-       addr = phy_data->addr;
-       data = phy_data->data;
-
-       if (!addr && !data) {
-               addr = PHY_ADDR_0X09;
-               data = rtk_phy_read(phy_reg, addr);
-               phy_data->addr = addr;
-               phy_data->data = data;
-       }
-
-       rtk_phy_write(phy_reg, addr, data & (~REG_0X09_FORCE_CALIBRATION));
-       mdelay(1);
-       rtk_phy_write(phy_reg, addr, data | REG_0X09_FORCE_CALIBRATION);
-}
-
-static int do_rtk_phy_init(struct rtk_phy *rtk_phy, int index)
-{
-       struct phy_cfg *phy_cfg;
-       struct phy_reg *phy_reg;
-       struct phy_parameter *phy_parameter;
-       int i = 0;
-
-       phy_cfg = rtk_phy->phy_cfg;
-       phy_parameter = &((struct phy_parameter *)rtk_phy->phy_parameter)[index];
-       phy_reg = &phy_parameter->phy_reg;
-
-       if (phy_cfg->use_default_parameter)
-               goto do_toggle;
-
-       for (i = 0; i < phy_cfg->param_size; i++) {
-               struct phy_data *phy_data = phy_cfg->param + i;
-               u8 addr = phy_data->addr;
-               u16 data = phy_data->data;
-
-               if (!addr && !data)
-                       continue;
-
-               rtk_phy_write(phy_reg, addr, data);
-       }
-
-do_toggle:
-       if (phy_cfg->do_toggle_once)
-               phy_cfg->do_toggle = true;
-
-       do_rtk_usb3_phy_toggle(rtk_phy, index, false);
-
-       if (phy_cfg->do_toggle_once) {
-               u16 check_value = 0;
-               int count = 10;
-               u16 value_0x0d, value_0x10;
-
-               /* Enable Debug mode by set 0x0D and 0x10 */
-               value_0x0d = rtk_phy_read(phy_reg, PHY_ADDR_0X0D);
-               value_0x10 = rtk_phy_read(phy_reg, PHY_ADDR_0X10);
-
-               rtk_phy_write(phy_reg, PHY_ADDR_0X0D,
-                             value_0x0d | REG_0X0D_RX_DEBUG_TEST_EN);
-               rtk_phy_write(phy_reg, PHY_ADDR_0X10,
-                             (value_0x10 & ~REG_0X10_DEBUG_MODE_SETTING_MASK) |
-                             REG_0X10_DEBUG_MODE_SETTING);
-
-               check_value = rtk_phy_read(phy_reg, PHY_ADDR_0X30);
-
-               while (!(check_value & BIT(15))) {
-                       check_value = rtk_phy_read(phy_reg, PHY_ADDR_0X30);
-                       mdelay(1);
-                       if (count-- < 0)
-                               break;
-               }
-
-               if (!(check_value & BIT(15)))
-                       dev_info(rtk_phy->dev, "toggle fail addr=0x%02x, data=0x%04x\n",
-                                PHY_ADDR_0X30, check_value);
-
-               /* Disable Debug mode by set 0x0D and 0x10 to default*/
-               rtk_phy_write(phy_reg, PHY_ADDR_0X0D, value_0x0d);
-               rtk_phy_write(phy_reg, PHY_ADDR_0X10, value_0x10);
-
-               phy_cfg->do_toggle = false;
-       }
-
-       if (phy_cfg->check_rx_front_end_offset) {
-               u16 rx_offset_code, rx_offset_range;
-               u16 code_mask = REG_0X1F_RX_OFFSET_CODE_MASK;
-               u16 range_mask = REG_0X0B_RX_OFFSET_RANGE_MASK;
-               bool do_update = false;
-
-               rx_offset_code = rtk_phy_read(phy_reg, PHY_ADDR_0X1F);
-               if (((rx_offset_code & code_mask) == 0x0) ||
-                   ((rx_offset_code & code_mask) == code_mask))
-                       do_update = true;
-
-               rx_offset_range = rtk_phy_read(phy_reg, PHY_ADDR_0X0B);
-               if (((rx_offset_range & range_mask) == range_mask) && do_update) {
-                       dev_warn(rtk_phy->dev, "Don't update rx_offset_range (rx_offset_code=0x%x, rx_offset_range=0x%x)\n",
-                                rx_offset_code, rx_offset_range);
-                       do_update = false;
-               }
-
-               if (do_update) {
-                       u16 tmp1, tmp2;
-
-                       tmp1 = rx_offset_range & (~range_mask);
-                       tmp2 = rx_offset_range & range_mask;
-                       tmp2 += (1 << 2);
-                       rx_offset_range = tmp1 | (tmp2 & range_mask);
-                       rtk_phy_write(phy_reg, PHY_ADDR_0X0B, rx_offset_range);
-                       goto do_toggle;
-               }
-       }
-
-       return 0;
-}
-
-static int rtk_phy_init(struct phy *phy)
-{
-       struct rtk_phy *rtk_phy = phy_get_drvdata(phy);
-       int ret = 0;
-       int i;
-       unsigned long phy_init_time = jiffies;
-
-       for (i = 0; i < rtk_phy->num_phy; i++)
-               ret = do_rtk_phy_init(rtk_phy, i);
-
-       dev_dbg(rtk_phy->dev, "Initialized RTK USB 3.0 PHY (take %dms)\n",
-               jiffies_to_msecs(jiffies - phy_init_time));
-
-       return ret;
-}
-
-static int rtk_phy_exit(struct phy *phy)
-{
-       return 0;
-}
-
-static const struct phy_ops ops = {
-       .init           = rtk_phy_init,
-       .exit           = rtk_phy_exit,
-       .owner          = THIS_MODULE,
-};
-
-static void rtk_phy_toggle(struct usb_phy *usb3_phy, bool connect, int port)
-{
-       int index = port;
-       struct rtk_phy *rtk_phy = NULL;
-
-       rtk_phy = dev_get_drvdata(usb3_phy->dev);
-
-       if (index > rtk_phy->num_phy) {
-               dev_err(rtk_phy->dev, "%s: The port=%d is not in usb phy (num_phy=%d)\n",
-                       __func__, index, rtk_phy->num_phy);
-               return;
-       }
-
-       do_rtk_usb3_phy_toggle(rtk_phy, index, connect);
-}
-
-static int rtk_phy_notify_port_status(struct usb_phy *x, int port,
-                                     u16 portstatus, u16 portchange)
-{
-       bool connect = false;
-
-       pr_debug("%s port=%d portstatus=0x%x portchange=0x%x\n",
-                __func__, port, (int)portstatus, (int)portchange);
-       if (portstatus & USB_PORT_STAT_CONNECTION)
-               connect = true;
-
-       if (portchange & USB_PORT_STAT_C_CONNECTION)
-               rtk_phy_toggle(x, connect, port);
-
-       return 0;
-}
-
-#ifdef CONFIG_DEBUG_FS
-static struct dentry *create_phy_debug_root(void)
-{
-       struct dentry *phy_debug_root;
-
-       phy_debug_root = debugfs_lookup("phy", usb_debug_root);
-       if (!phy_debug_root)
-               phy_debug_root = debugfs_create_dir("phy", usb_debug_root);
-
-       return phy_debug_root;
-}
-
-static int rtk_usb3_parameter_show(struct seq_file *s, void *unused)
-{
-       struct rtk_phy *rtk_phy = s->private;
-       struct phy_cfg *phy_cfg;
-       int i, index;
-
-       phy_cfg = rtk_phy->phy_cfg;
-
-       seq_puts(s, "Property:\n");
-       seq_printf(s, "  check_efuse: %s\n",
-                  phy_cfg->check_efuse ? "Enable" : "Disable");
-       seq_printf(s, "  do_toggle: %s\n",
-                  phy_cfg->do_toggle ? "Enable" : "Disable");
-       seq_printf(s, "  do_toggle_once: %s\n",
-                  phy_cfg->do_toggle_once ? "Enable" : "Disable");
-       seq_printf(s, "  use_default_parameter: %s\n",
-                  phy_cfg->use_default_parameter ? "Enable" : "Disable");
-
-       for (index = 0; index < rtk_phy->num_phy; index++) {
-               struct phy_reg *phy_reg;
-               struct phy_parameter *phy_parameter;
-
-               phy_parameter = &((struct phy_parameter *)rtk_phy->phy_parameter)[index];
-               phy_reg = &phy_parameter->phy_reg;
-
-               seq_printf(s, "PHY %d:\n", index);
-
-               for (i = 0; i < phy_cfg->param_size; i++) {
-                       struct phy_data *phy_data = phy_cfg->param + i;
-                       u8 addr = ARRAY_INDEX_MAP_PHY_ADDR(i);
-                       u16 data = phy_data->data;
-
-                       if (!phy_data->addr && !data)
-                               seq_printf(s, "  addr = 0x%02x, data = none   ==> read value = 0x%04x\n",
-                                          addr, rtk_phy_read(phy_reg, addr));
-                       else
-                               seq_printf(s, "  addr = 0x%02x, data = 0x%04x ==> read value = 0x%04x\n",
-                                          addr, data, rtk_phy_read(phy_reg, addr));
-               }
-
-               seq_puts(s, "PHY Property:\n");
-               seq_printf(s, "  efuse_usb_u3_tx_lfps_swing_trim: 0x%x\n",
-                          (int)phy_parameter->efuse_usb_u3_tx_lfps_swing_trim);
-               seq_printf(s, "  amplitude_control_coarse: 0x%x\n",
-                          (int)phy_parameter->amplitude_control_coarse);
-               seq_printf(s, "  amplitude_control_fine: 0x%x\n",
-                          (int)phy_parameter->amplitude_control_fine);
-       }
-
-       return 0;
-}
-DEFINE_SHOW_ATTRIBUTE(rtk_usb3_parameter);
-
-static inline void create_debug_files(struct rtk_phy *rtk_phy)
-{
-       struct dentry *phy_debug_root = NULL;
-
-       phy_debug_root = create_phy_debug_root();
-
-       if (!phy_debug_root)
-               return;
-
-       rtk_phy->debug_dir = debugfs_create_dir(dev_name(rtk_phy->dev), phy_debug_root);
-
-       debugfs_create_file("parameter", 0444, rtk_phy->debug_dir, rtk_phy,
-                           &rtk_usb3_parameter_fops);
-
-       return;
-}
-
-static inline void remove_debug_files(struct rtk_phy *rtk_phy)
-{
-       debugfs_remove_recursive(rtk_phy->debug_dir);
-}
-#else
-static inline void create_debug_files(struct rtk_phy *rtk_phy) { }
-static inline void remove_debug_files(struct rtk_phy *rtk_phy) { }
-#endif /* CONFIG_DEBUG_FS */
-
-static int get_phy_data_by_efuse(struct rtk_phy *rtk_phy,
-                                struct phy_parameter *phy_parameter, int index)
-{
-       struct phy_cfg *phy_cfg = rtk_phy->phy_cfg;
-       u8 value = 0;
-       struct nvmem_cell *cell;
-
-       if (!phy_cfg->check_efuse)
-               goto out;
-
-       cell = nvmem_cell_get(rtk_phy->dev, "usb_u3_tx_lfps_swing_trim");
-       if (IS_ERR(cell)) {
-               dev_dbg(rtk_phy->dev, "%s no usb_u3_tx_lfps_swing_trim: %ld\n",
-                       __func__, PTR_ERR(cell));
-       } else {
-               unsigned char *buf;
-               size_t buf_size;
-
-               buf = nvmem_cell_read(cell, &buf_size);
-               if (!IS_ERR(buf)) {
-                       value = buf[0] & USB_U3_TX_LFPS_SWING_TRIM_MASK;
-                       kfree(buf);
-               }
-               nvmem_cell_put(cell);
-       }
-
-       if (value > 0 && value < 0x8)
-               phy_parameter->efuse_usb_u3_tx_lfps_swing_trim = 0x8;
-       else
-               phy_parameter->efuse_usb_u3_tx_lfps_swing_trim = (u8)value;
-
-out:
-       return 0;
-}
-
-static void update_amplitude_control_value(struct rtk_phy *rtk_phy,
-                                          struct phy_parameter *phy_parameter)
-{
-       struct phy_cfg *phy_cfg;
-       struct phy_reg *phy_reg;
-
-       phy_reg = &phy_parameter->phy_reg;
-       phy_cfg = rtk_phy->phy_cfg;
-
-       if (phy_parameter->amplitude_control_coarse != AMPLITUDE_CONTROL_COARSE_DEFAULT) {
-               u16 val_mask = AMPLITUDE_CONTROL_COARSE_MASK;
-               u16 data;
-
-               if (!phy_cfg->param[PHY_ADDR_0X20].addr && !phy_cfg->param[PHY_ADDR_0X20].data) {
-                       phy_cfg->param[PHY_ADDR_0X20].addr = PHY_ADDR_0X20;
-                       data = rtk_phy_read(phy_reg, PHY_ADDR_0X20);
-               } else {
-                       data = phy_cfg->param[PHY_ADDR_0X20].data;
-               }
-
-               data &= (~val_mask);
-               data |= (phy_parameter->amplitude_control_coarse & val_mask);
-
-               phy_cfg->param[PHY_ADDR_0X20].data = data;
-       }
-
-       if (phy_parameter->efuse_usb_u3_tx_lfps_swing_trim) {
-               u8 efuse_val = phy_parameter->efuse_usb_u3_tx_lfps_swing_trim;
-               u16 val_mask = USB_U3_TX_LFPS_SWING_TRIM_MASK;
-               int val_shift = USB_U3_TX_LFPS_SWING_TRIM_SHIFT;
-               u16 data;
-
-               if (!phy_cfg->param[PHY_ADDR_0X20].addr && !phy_cfg->param[PHY_ADDR_0X20].data) {
-                       phy_cfg->param[PHY_ADDR_0X20].addr = PHY_ADDR_0X20;
-                       data = rtk_phy_read(phy_reg, PHY_ADDR_0X20);
-               } else {
-                       data = phy_cfg->param[PHY_ADDR_0X20].data;
-               }
-
-               data &= ~(val_mask << val_shift);
-               data |= ((efuse_val & val_mask) << val_shift);
-
-               phy_cfg->param[PHY_ADDR_0X20].data = data;
-       }
-
-       if (phy_parameter->amplitude_control_fine != AMPLITUDE_CONTROL_FINE_DEFAULT) {
-               u16 val_mask = AMPLITUDE_CONTROL_FINE_MASK;
-
-               if (!phy_cfg->param[PHY_ADDR_0X21].addr && !phy_cfg->param[PHY_ADDR_0X21].data)
-                       phy_cfg->param[PHY_ADDR_0X21].addr = PHY_ADDR_0X21;
-
-               phy_cfg->param[PHY_ADDR_0X21].data =
-                           phy_parameter->amplitude_control_fine & val_mask;
-       }
-}
-
-static int parse_phy_data(struct rtk_phy *rtk_phy)
-{
-       struct device *dev = rtk_phy->dev;
-       struct phy_parameter *phy_parameter;
-       int ret = 0;
-       int index;
-
-       rtk_phy->phy_parameter = devm_kzalloc(dev, sizeof(struct phy_parameter) *
-                                             rtk_phy->num_phy, GFP_KERNEL);
-       if (!rtk_phy->phy_parameter)
-               return -ENOMEM;
-
-       for (index = 0; index < rtk_phy->num_phy; index++) {
-               phy_parameter = &((struct phy_parameter *)rtk_phy->phy_parameter)[index];
-
-               phy_parameter->phy_reg.reg_mdio_ctl = of_iomap(dev->of_node, 0) + index;
-
-               /* Amplitude control address 0x20 bit 0 to bit 7 */
-               if (of_property_read_u32(dev->of_node, "realtek,amplitude-control-coarse-tuning",
-                                        &phy_parameter->amplitude_control_coarse))
-                       phy_parameter->amplitude_control_coarse = AMPLITUDE_CONTROL_COARSE_DEFAULT;
-
-               /* Amplitude control address 0x21 bit 0 to bit 16 */
-               if (of_property_read_u32(dev->of_node, "realtek,amplitude-control-fine-tuning",
-                                        &phy_parameter->amplitude_control_fine))
-                       phy_parameter->amplitude_control_fine = AMPLITUDE_CONTROL_FINE_DEFAULT;
-
-               get_phy_data_by_efuse(rtk_phy, phy_parameter, index);
-
-               update_amplitude_control_value(rtk_phy, phy_parameter);
-       }
-
-       return ret;
-}
-
-static int rtk_usb3phy_probe(struct platform_device *pdev)
-{
-       struct rtk_phy *rtk_phy;
-       struct device *dev = &pdev->dev;
-       struct phy *generic_phy;
-       struct phy_provider *phy_provider;
-       const struct phy_cfg *phy_cfg;
-       int ret;
-
-       phy_cfg = of_device_get_match_data(dev);
-       if (!phy_cfg) {
-               dev_err(dev, "phy config are not assigned!\n");
-               return -EINVAL;
-       }
-
-       rtk_phy = devm_kzalloc(dev, sizeof(*rtk_phy), GFP_KERNEL);
-       if (!rtk_phy)
-               return -ENOMEM;
-
-       rtk_phy->dev                    = &pdev->dev;
-       rtk_phy->phy.dev                = rtk_phy->dev;
-       rtk_phy->phy.label              = "rtk-usb3phy";
-       rtk_phy->phy.notify_port_status = rtk_phy_notify_port_status;
-
-       rtk_phy->phy_cfg = devm_kzalloc(dev, sizeof(*phy_cfg), GFP_KERNEL);
-
-       memcpy(rtk_phy->phy_cfg, phy_cfg, sizeof(*phy_cfg));
-
-       rtk_phy->num_phy = 1;
-
-       ret = parse_phy_data(rtk_phy);
-       if (ret)
-               goto err;
-
-       platform_set_drvdata(pdev, rtk_phy);
-
-       generic_phy = devm_phy_create(rtk_phy->dev, NULL, &ops);
-       if (IS_ERR(generic_phy))
-               return PTR_ERR(generic_phy);
-
-       phy_set_drvdata(generic_phy, rtk_phy);
-
-       phy_provider = devm_of_phy_provider_register(rtk_phy->dev, of_phy_simple_xlate);
-       if (IS_ERR(phy_provider))
-               return PTR_ERR(phy_provider);
-
-       ret = usb_add_phy_dev(&rtk_phy->phy);
-       if (ret)
-               goto err;
-
-       create_debug_files(rtk_phy);
-
-err:
-       return ret;
-}
-
-static void rtk_usb3phy_remove(struct platform_device *pdev)
-{
-       struct rtk_phy *rtk_phy = platform_get_drvdata(pdev);
-
-       remove_debug_files(rtk_phy);
-
-       usb_remove_phy(&rtk_phy->phy);
-}
-
-static const struct phy_cfg rtd1295_phy_cfg = {
-       .param_size = MAX_USB_PHY_DATA_SIZE,
-       .param = {  [0] = {0x01, 0x4008},  [1] = {0x01, 0xe046},
-                   [2] = {0x02, 0x6046},  [3] = {0x03, 0x2779},
-                   [4] = {0x04, 0x72f5},  [5] = {0x05, 0x2ad3},
-                   [6] = {0x06, 0x000e},  [7] = {0x07, 0x2e00},
-                   [8] = {0x08, 0x3591},  [9] = {0x09, 0x525c},
-                  [10] = {0x0a, 0xa600}, [11] = {0x0b, 0xa904},
-                  [12] = {0x0c, 0xc000}, [13] = {0x0d, 0xef1c},
-                  [14] = {0x0e, 0x2000}, [15] = {0x0f, 0x0000},
-                  [16] = {0x10, 0x000c}, [17] = {0x11, 0x4c00},
-                  [18] = {0x12, 0xfc00}, [19] = {0x13, 0x0c81},
-                  [20] = {0x14, 0xde01}, [21] = {0x15, 0x0000},
-                  [22] = {0x16, 0x0000}, [23] = {0x17, 0x0000},
-                  [24] = {0x18, 0x0000}, [25] = {0x19, 0x4004},
-                  [26] = {0x1a, 0x1260}, [27] = {0x1b, 0xff00},
-                  [28] = {0x1c, 0xcb00}, [29] = {0x1d, 0xa03f},
-                  [30] = {0x1e, 0xc2e0}, [31] = {0x1f, 0x2807},
-                  [32] = {0x20, 0x947a}, [33] = {0x21, 0x88aa},
-                  [34] = {0x22, 0x0057}, [35] = {0x23, 0xab66},
-                  [36] = {0x24, 0x0800}, [37] = {0x25, 0x0000},
-                  [38] = {0x26, 0x040a}, [39] = {0x27, 0x01d6},
-                  [40] = {0x28, 0xf8c2}, [41] = {0x29, 0x3080},
-                  [42] = {0x2a, 0x3082}, [43] = {0x2b, 0x2078},
-                  [44] = {0x2c, 0xffff}, [45] = {0x2d, 0xffff},
-                  [46] = {0x2e, 0x0000}, [47] = {0x2f, 0x0040}, },
-       .check_efuse = false,
-       .do_toggle = true,
-       .do_toggle_once = false,
-       .use_default_parameter = false,
-       .check_rx_front_end_offset = false,
-};
-
-static const struct phy_cfg rtd1619_phy_cfg = {
-       .param_size = MAX_USB_PHY_DATA_SIZE,
-       .param = {  [8] = {0x08, 0x3591},
-                  [38] = {0x26, 0x840b},
-                  [40] = {0x28, 0xf842}, },
-       .check_efuse = false,
-       .do_toggle = true,
-       .do_toggle_once = false,
-       .use_default_parameter = false,
-       .check_rx_front_end_offset = false,
-};
-
-static const struct phy_cfg rtd1319_phy_cfg = {
-       .param_size = MAX_USB_PHY_DATA_SIZE,
-       .param = {  [1] = {0x01, 0xac86},
-                   [6] = {0x06, 0x0003},
-                   [9] = {0x09, 0x924c},
-                  [10] = {0x0a, 0xa608},
-                  [11] = {0x0b, 0xb905},
-                  [14] = {0x0e, 0x2010},
-                  [32] = {0x20, 0x705a},
-                  [33] = {0x21, 0xf645},
-                  [34] = {0x22, 0x0013},
-                  [35] = {0x23, 0xcb66},
-                  [41] = {0x29, 0xff00}, },
-       .check_efuse = true,
-       .do_toggle = true,
-       .do_toggle_once = false,
-       .use_default_parameter = false,
-       .check_rx_front_end_offset = false,
-};
-
-static const struct phy_cfg rtd1619b_phy_cfg = {
-       .param_size = MAX_USB_PHY_DATA_SIZE,
-       .param = {  [1] = {0x01, 0xac8c},
-                   [6] = {0x06, 0x0017},
-                   [9] = {0x09, 0x724c},
-                  [10] = {0x0a, 0xb610},
-                  [11] = {0x0b, 0xb90d},
-                  [13] = {0x0d, 0xef2a},
-                  [15] = {0x0f, 0x9050},
-                  [16] = {0x10, 0x000c},
-                  [32] = {0x20, 0x70ff},
-                  [34] = {0x22, 0x0013},
-                  [35] = {0x23, 0xdb66},
-                  [38] = {0x26, 0x8609},
-                  [41] = {0x29, 0xff13},
-                  [42] = {0x2a, 0x3070}, },
-       .check_efuse = true,
-       .do_toggle = false,
-       .do_toggle_once = true,
-       .use_default_parameter = false,
-       .check_rx_front_end_offset = false,
-};
-
-static const  struct phy_cfg rtd1319d_phy_cfg = {
-       .param_size = MAX_USB_PHY_DATA_SIZE,
-       .param = {  [1] = {0x01, 0xac89},
-                   [4] = {0x04, 0xf2f5},
-                   [6] = {0x06, 0x0017},
-                   [9] = {0x09, 0x424c},
-                  [10] = {0x0a, 0x9610},
-                  [11] = {0x0b, 0x9901},
-                  [12] = {0x0c, 0xf000},
-                  [13] = {0x0d, 0xef2a},
-                  [14] = {0x0e, 0x1000},
-                  [15] = {0x0f, 0x9050},
-                  [32] = {0x20, 0x7077},
-                  [35] = {0x23, 0x0b62},
-                  [37] = {0x25, 0x10ec},
-                  [42] = {0x2a, 0x3070}, },
-       .check_efuse = true,
-       .do_toggle = false,
-       .do_toggle_once = true,
-       .use_default_parameter = false,
-       .check_rx_front_end_offset = true,
-};
-
-static const struct of_device_id usbphy_rtk_dt_match[] = {
-       { .compatible = "realtek,rtd1295-usb3phy", .data = &rtd1295_phy_cfg },
-       { .compatible = "realtek,rtd1319-usb3phy", .data = &rtd1319_phy_cfg },
-       { .compatible = "realtek,rtd1319d-usb3phy", .data = &rtd1319d_phy_cfg },
-       { .compatible = "realtek,rtd1619-usb3phy", .data = &rtd1619_phy_cfg },
-       { .compatible = "realtek,rtd1619b-usb3phy", .data = &rtd1619b_phy_cfg },
-       {},
-};
-MODULE_DEVICE_TABLE(of, usbphy_rtk_dt_match);
-
-static struct platform_driver rtk_usb3phy_driver = {
-       .probe          = rtk_usb3phy_probe,
-       .remove_new     = rtk_usb3phy_remove,
-       .driver         = {
-               .name   = "rtk-usb3phy",
-               .of_match_table = usbphy_rtk_dt_match,
-       },
-};
-
-module_platform_driver(rtk_usb3phy_driver);
-
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("platform: rtk-usb3phy");
-MODULE_AUTHOR("Stanley Chang <stanley_chang@realtek.com>");
-MODULE_DESCRIPTION("Realtek usb 3.0 phy driver");
index cd6ac04..c310471 100644 (file)
@@ -964,33 +964,6 @@ static const struct pci_device_id pmc_pci_ids[] = {
        { }
 };
 
-static int amd_pmc_get_dram_size(struct amd_pmc_dev *dev)
-{
-       int ret;
-
-       switch (dev->cpu_id) {
-       case AMD_CPU_ID_YC:
-               if (!(dev->major > 90 || (dev->major == 90 && dev->minor > 39))) {
-                       ret = -EINVAL;
-                       goto err_dram_size;
-               }
-               break;
-       default:
-               ret = -EINVAL;
-               goto err_dram_size;
-       }
-
-       ret = amd_pmc_send_cmd(dev, S2D_DRAM_SIZE, &dev->dram_size, dev->s2d_msg_id, true);
-       if (ret || !dev->dram_size)
-               goto err_dram_size;
-
-       return 0;
-
-err_dram_size:
-       dev_err(dev->dev, "DRAM size command not supported for this platform\n");
-       return ret;
-}
-
 static int amd_pmc_s2d_init(struct amd_pmc_dev *dev)
 {
        u32 phys_addr_low, phys_addr_hi;
@@ -1009,8 +982,8 @@ static int amd_pmc_s2d_init(struct amd_pmc_dev *dev)
                return -EIO;
 
        /* Get DRAM size */
-       ret = amd_pmc_get_dram_size(dev);
-       if (ret)
+       ret = amd_pmc_send_cmd(dev, S2D_DRAM_SIZE, &dev->dram_size, dev->s2d_msg_id, true);
+       if (ret || !dev->dram_size)
                dev->dram_size = S2D_TELEMETRY_DRAMBYTES_MAX;
 
        /* Get STB DRAM address */
index 5798b49..8c9f4f3 100644 (file)
@@ -588,17 +588,14 @@ static void release_attributes_data(void)
 static int hp_add_other_attributes(int attr_type)
 {
        struct kobject *attr_name_kobj;
-       union acpi_object *obj = NULL;
        int ret;
        char *attr_name;
 
-       mutex_lock(&bioscfg_drv.mutex);
-
        attr_name_kobj = kzalloc(sizeof(*attr_name_kobj), GFP_KERNEL);
-       if (!attr_name_kobj) {
-               ret = -ENOMEM;
-               goto err_other_attr_init;
-       }
+       if (!attr_name_kobj)
+               return -ENOMEM;
+
+       mutex_lock(&bioscfg_drv.mutex);
 
        /* Check if attribute type is supported */
        switch (attr_type) {
@@ -615,14 +612,14 @@ static int hp_add_other_attributes(int attr_type)
        default:
                pr_err("Error: Unknown attr_type: %d\n", attr_type);
                ret = -EINVAL;
-               goto err_other_attr_init;
+               kfree(attr_name_kobj);
+               goto unlock_drv_mutex;
        }
 
        ret = kobject_init_and_add(attr_name_kobj, &attr_name_ktype,
                                   NULL, "%s", attr_name);
        if (ret) {
                pr_err("Error encountered [%d]\n", ret);
-               kobject_put(attr_name_kobj);
                goto err_other_attr_init;
        }
 
@@ -630,27 +627,26 @@ static int hp_add_other_attributes(int attr_type)
        switch (attr_type) {
        case HPWMI_SECURE_PLATFORM_TYPE:
                ret = hp_populate_secure_platform_data(attr_name_kobj);
-               if (ret)
-                       goto err_other_attr_init;
                break;
 
        case HPWMI_SURE_START_TYPE:
                ret = hp_populate_sure_start_data(attr_name_kobj);
-               if (ret)
-                       goto err_other_attr_init;
                break;
 
        default:
                ret = -EINVAL;
-               goto err_other_attr_init;
        }
 
+       if (ret)
+               goto err_other_attr_init;
+
        mutex_unlock(&bioscfg_drv.mutex);
        return 0;
 
 err_other_attr_init:
+       kobject_put(attr_name_kobj);
+unlock_drv_mutex:
        mutex_unlock(&bioscfg_drv.mutex);
-       kfree(obj);
        return ret;
 }
 
index ac03754..88eefcc 100644 (file)
@@ -1425,18 +1425,17 @@ static int ideapad_kbd_bl_init(struct ideapad_private *priv)
        if (WARN_ON(priv->kbd_bl.initialized))
                return -EEXIST;
 
-       brightness = ideapad_kbd_bl_brightness_get(priv);
-       if (brightness < 0)
-               return brightness;
-
-       priv->kbd_bl.last_brightness = brightness;
-
        if (ideapad_kbd_bl_check_tristate(priv->kbd_bl.type)) {
                priv->kbd_bl.led.max_brightness = 2;
        } else {
                priv->kbd_bl.led.max_brightness = 1;
        }
 
+       brightness = ideapad_kbd_bl_brightness_get(priv);
+       if (brightness < 0)
+               return brightness;
+
+       priv->kbd_bl.last_brightness = brightness;
        priv->kbd_bl.led.name                    = "platform::" LED_FUNCTION_KBD_BACKLIGHT;
        priv->kbd_bl.led.brightness_get          = ideapad_kbd_bl_led_cdev_brightness_get;
        priv->kbd_bl.led.brightness_set_blocking = ideapad_kbd_bl_led_cdev_brightness_set;
index fdf55b5..e4be40f 100644 (file)
@@ -102,7 +102,7 @@ static const struct telemetry_core_ops telm_defpltops = {
 /**
  * telemetry_update_events() - Update telemetry Configuration
  * @pss_evtconfig: PSS related config. No change if num_evts = 0.
- * @pss_evtconfig: IOSS related config. No change if num_evts = 0.
+ * @ioss_evtconfig: IOSS related config. No change if num_evts = 0.
  *
  * This API updates the IOSS & PSS Telemetry configuration. Old config
  * is overwritten. Call telemetry_reset_events when logging is over
@@ -176,7 +176,7 @@ EXPORT_SYMBOL_GPL(telemetry_reset_events);
 /**
  * telemetry_get_eventconfig() - Returns the pss and ioss events enabled
  * @pss_evtconfig: Pointer to PSS related configuration.
- * @pss_evtconfig: Pointer to IOSS related configuration.
+ * @ioss_evtconfig: Pointer to IOSS related configuration.
  * @pss_len:      Number of u32 elements allocated for pss_evtconfig array
  * @ioss_len:     Number of u32 elements allocated for ioss_evtconfig array
  *
index 3f7a747..7513018 100644 (file)
@@ -572,7 +572,8 @@ ssize_t ptp_read(struct posix_clock_context *pccontext, uint rdflags,
 
        for (i = 0; i < cnt; i++) {
                event[i] = queue->buf[queue->head];
-               queue->head = (queue->head + 1) % PTP_MAX_TIMESTAMPS;
+               /* Paired with READ_ONCE() in queue_cnt() */
+               WRITE_ONCE(queue->head, (queue->head + 1) % PTP_MAX_TIMESTAMPS);
        }
 
        spin_unlock_irqrestore(&queue->lock, flags);
index 3134568..15b804b 100644 (file)
@@ -57,10 +57,11 @@ static void enqueue_external_timestamp(struct timestamp_event_queue *queue,
        dst->t.sec = seconds;
        dst->t.nsec = remainder;
 
+       /* Both WRITE_ONCE() are paired with READ_ONCE() in queue_cnt() */
        if (!queue_free(queue))
-               queue->head = (queue->head + 1) % PTP_MAX_TIMESTAMPS;
+               WRITE_ONCE(queue->head, (queue->head + 1) % PTP_MAX_TIMESTAMPS);
 
-       queue->tail = (queue->tail + 1) % PTP_MAX_TIMESTAMPS;
+       WRITE_ONCE(queue->tail, (queue->tail + 1) % PTP_MAX_TIMESTAMPS);
 
        spin_unlock_irqrestore(&queue->lock, flags);
 }
index 35fde0a..45f9002 100644 (file)
@@ -85,9 +85,13 @@ struct ptp_vclock {
  * that a writer might concurrently increment the tail does not
  * matter, since the queue remains nonempty nonetheless.
  */
-static inline int queue_cnt(struct timestamp_event_queue *q)
+static inline int queue_cnt(const struct timestamp_event_queue *q)
 {
-       int cnt = q->tail - q->head;
+       /*
+        * Paired with WRITE_ONCE() in enqueue_external_timestamp(),
+        * ptp_read(), extts_fifo_show().
+        */
+       int cnt = READ_ONCE(q->tail) - READ_ONCE(q->head);
        return cnt < 0 ? PTP_MAX_TIMESTAMPS + cnt : cnt;
 }
 
index 7d023d9..f7a499a 100644 (file)
@@ -94,7 +94,8 @@ static ssize_t extts_fifo_show(struct device *dev,
        qcnt = queue_cnt(queue);
        if (qcnt) {
                event = queue->buf[queue->head];
-               queue->head = (queue->head + 1) % PTP_MAX_TIMESTAMPS;
+               /* Paired with READ_ONCE() in queue_cnt() */
+               WRITE_ONCE(queue->head, (queue->head + 1) % PTP_MAX_TIMESTAMPS);
        }
        spin_unlock_irqrestore(&queue->lock, flags);
 
index d440319..833cfab 100644 (file)
@@ -676,18 +676,20 @@ static void dasd_profile_start(struct dasd_block *block,
         * we count each request only once.
         */
        device = cqr->startdev;
-       if (device->profile.data) {
-               counter = 1; /* request is not yet queued on the start device */
-               list_for_each(l, &device->ccw_queue)
-                       if (++counter >= 31)
-                               break;
-       }
+       if (!device->profile.data)
+               return;
+
+       spin_lock(get_ccwdev_lock(device->cdev));
+       counter = 1; /* request is not yet queued on the start device */
+       list_for_each(l, &device->ccw_queue)
+               if (++counter >= 31)
+                       break;
+       spin_unlock(get_ccwdev_lock(device->cdev));
+
        spin_lock(&device->profile.lock);
-       if (device->profile.data) {
-               device->profile.data->dasd_io_nr_req[counter]++;
-               if (rq_data_dir(req) == READ)
-                       device->profile.data->dasd_read_nr_req[counter]++;
-       }
+       device->profile.data->dasd_io_nr_req[counter]++;
+       if (rq_data_dir(req) == READ)
+               device->profile.data->dasd_read_nr_req[counter]++;
        spin_unlock(&device->profile.lock);
 }
 
index 2e66313..1b1b8a4 100644 (file)
@@ -283,7 +283,7 @@ struct dasd_pprc_dev_info {
        __u8 secondary;         /* 7       Secondary device address */
        __u16 pprc_id;          /* 8-9     Peer-to-Peer Remote Copy ID */
        __u8 reserved2[12];     /* 10-21   reserved */
-       __u16 prim_cu_ssid;     /* 22-23   Pimary Control Unit SSID */
+       __u16 prim_cu_ssid;     /* 22-23   Primary Control Unit SSID */
        __u8 reserved3[12];     /* 24-35   reserved */
        __u16 sec_cu_ssid;      /* 36-37   Secondary Control Unit SSID */
        __u8 reserved4[90];     /* 38-127  reserved */
index 4902d45..c61e642 100644 (file)
@@ -103,10 +103,11 @@ config CCWGROUP
 config ISM
        tristate "Support for ISM vPCI Adapter"
        depends on PCI
+       imply SMC
        default n
        help
          Select this option if you want to use the Internal Shared Memory
-         vPCI Adapter.
+         vPCI Adapter. The adapter can be used with the SMC network protocol.
 
          To compile as a module choose M. The module name is ism.
          If unsure, choose N.
index 6df7f37..81aabbf 100644 (file)
@@ -30,7 +30,6 @@ static const struct pci_device_id ism_device_table[] = {
 MODULE_DEVICE_TABLE(pci, ism_device_table);
 
 static debug_info_t *ism_debug_info;
-static const struct smcd_ops ism_ops;
 
 #define NO_CLIENT              0xff            /* must be >= MAX_CLIENTS */
 static struct ism_client *clients[MAX_CLIENTS];        /* use an array rather than */
@@ -289,22 +288,6 @@ out:
        return ret;
 }
 
-static int ism_query_rgid(struct ism_dev *ism, u64 rgid, u32 vid_valid,
-                         u32 vid)
-{
-       union ism_query_rgid cmd;
-
-       memset(&cmd, 0, sizeof(cmd));
-       cmd.request.hdr.cmd = ISM_QUERY_RGID;
-       cmd.request.hdr.len = sizeof(cmd.request);
-
-       cmd.request.rgid = rgid;
-       cmd.request.vlan_valid = vid_valid;
-       cmd.request.vlan_id = vid;
-
-       return ism_cmd(ism, &cmd);
-}
-
 static void ism_free_dmb(struct ism_dev *ism, struct ism_dmb *dmb)
 {
        clear_bit(dmb->sba_idx, ism->sba_bitmap);
@@ -429,23 +412,6 @@ static int ism_del_vlan_id(struct ism_dev *ism, u64 vlan_id)
        return ism_cmd(ism, &cmd);
 }
 
-static int ism_signal_ieq(struct ism_dev *ism, u64 rgid, u32 trigger_irq,
-                         u32 event_code, u64 info)
-{
-       union ism_sig_ieq cmd;
-
-       memset(&cmd, 0, sizeof(cmd));
-       cmd.request.hdr.cmd = ISM_SIGNAL_IEQ;
-       cmd.request.hdr.len = sizeof(cmd.request);
-
-       cmd.request.rgid = rgid;
-       cmd.request.trigger_irq = trigger_irq;
-       cmd.request.event_code = event_code;
-       cmd.request.info = info;
-
-       return ism_cmd(ism, &cmd);
-}
-
 static unsigned int max_bytes(unsigned int start, unsigned int len,
                              unsigned int boundary)
 {
@@ -503,14 +469,6 @@ u8 *ism_get_seid(void)
 }
 EXPORT_SYMBOL_GPL(ism_get_seid);
 
-static u16 ism_get_chid(struct ism_dev *ism)
-{
-       if (!ism || !ism->pdev)
-               return 0;
-
-       return to_zpci(ism->pdev)->pchid;
-}
-
 static void ism_handle_event(struct ism_dev *ism)
 {
        struct ism_event *entry;
@@ -569,11 +527,6 @@ static irqreturn_t ism_handle_irq(int irq, void *data)
        return IRQ_HANDLED;
 }
 
-static u64 ism_get_local_gid(struct ism_dev *ism)
-{
-       return ism->local_gid;
-}
-
 static int ism_dev_init(struct ism_dev *ism)
 {
        struct pci_dev *pdev = ism->pdev;
@@ -774,6 +727,22 @@ module_exit(ism_exit);
 /*************************** SMC-D Implementation *****************************/
 
 #if IS_ENABLED(CONFIG_SMC)
+static int ism_query_rgid(struct ism_dev *ism, u64 rgid, u32 vid_valid,
+                         u32 vid)
+{
+       union ism_query_rgid cmd;
+
+       memset(&cmd, 0, sizeof(cmd));
+       cmd.request.hdr.cmd = ISM_QUERY_RGID;
+       cmd.request.hdr.len = sizeof(cmd.request);
+
+       cmd.request.rgid = rgid;
+       cmd.request.vlan_valid = vid_valid;
+       cmd.request.vlan_id = vid;
+
+       return ism_cmd(ism, &cmd);
+}
+
 static int smcd_query_rgid(struct smcd_dev *smcd, u64 rgid, u32 vid_valid,
                           u32 vid)
 {
@@ -811,6 +780,23 @@ static int smcd_reset_vlan_required(struct smcd_dev *smcd)
        return ism_cmd_simple(smcd->priv, ISM_RESET_VLAN);
 }
 
+static int ism_signal_ieq(struct ism_dev *ism, u64 rgid, u32 trigger_irq,
+                         u32 event_code, u64 info)
+{
+       union ism_sig_ieq cmd;
+
+       memset(&cmd, 0, sizeof(cmd));
+       cmd.request.hdr.cmd = ISM_SIGNAL_IEQ;
+       cmd.request.hdr.len = sizeof(cmd.request);
+
+       cmd.request.rgid = rgid;
+       cmd.request.trigger_irq = trigger_irq;
+       cmd.request.event_code = event_code;
+       cmd.request.info = info;
+
+       return ism_cmd(ism, &cmd);
+}
+
 static int smcd_signal_ieq(struct smcd_dev *smcd, u64 rgid, u32 trigger_irq,
                           u32 event_code, u64 info)
 {
@@ -830,11 +816,24 @@ static int smcd_supports_v2(void)
                SYSTEM_EID.type[0] != '0';
 }
 
+static u64 ism_get_local_gid(struct ism_dev *ism)
+{
+       return ism->local_gid;
+}
+
 static u64 smcd_get_local_gid(struct smcd_dev *smcd)
 {
        return ism_get_local_gid(smcd->priv);
 }
 
+static u16 ism_get_chid(struct ism_dev *ism)
+{
+       if (!ism || !ism->pdev)
+               return 0;
+
+       return to_zpci(ism->pdev)->pchid;
+}
+
 static u16 smcd_get_chid(struct smcd_dev *smcd)
 {
        return ism_get_chid(smcd->priv);
index 32d1e73..03348f6 100644 (file)
@@ -1837,8 +1837,16 @@ static void qla2x00_abort_srb(struct qla_qpair *qp, srb_t *sp, const int res,
                }
 
                spin_lock_irqsave(qp->qp_lock_ptr, *flags);
-               if (ret_cmd && blk_mq_request_started(scsi_cmd_to_rq(cmd)))
-                       sp->done(sp, res);
+               switch (sp->type) {
+               case SRB_SCSI_CMD:
+                       if (ret_cmd && blk_mq_request_started(scsi_cmd_to_rq(cmd)))
+                               sp->done(sp, res);
+                       break;
+               default:
+                       if (ret_cmd)
+                               sp->done(sp, res);
+                       break;
+               }
        } else {
                sp->done(sp, res);
        }
index 67922e2..6d8218a 100644 (file)
@@ -1019,7 +1019,7 @@ static ssize_t sdebug_error_write(struct file *file, const char __user *ubuf,
        struct sdebug_err_inject *inject;
        struct scsi_device *sdev = (struct scsi_device *)file->f_inode->i_private;
 
-       buf = kmalloc(count, GFP_KERNEL);
+       buf = kzalloc(count + 1, GFP_KERNEL);
        if (!buf)
                return -ENOMEM;
 
@@ -1132,7 +1132,6 @@ static const struct file_operations sdebug_target_reset_fail_fops = {
 static int sdebug_target_alloc(struct scsi_target *starget)
 {
        struct sdebug_target_info *targetip;
-       struct dentry *dentry;
 
        targetip = kzalloc(sizeof(struct sdebug_target_info), GFP_KERNEL);
        if (!targetip)
@@ -1140,15 +1139,9 @@ static int sdebug_target_alloc(struct scsi_target *starget)
 
        targetip->debugfs_entry = debugfs_create_dir(dev_name(&starget->dev),
                                sdebug_debugfs_root);
-       if (IS_ERR_OR_NULL(targetip->debugfs_entry))
-               pr_info("%s: failed to create debugfs directory for target %s\n",
-                       __func__, dev_name(&starget->dev));
 
        debugfs_create_file("fail_reset", 0600, targetip->debugfs_entry, starget,
                                &sdebug_target_reset_fail_fops);
-       if (IS_ERR_OR_NULL(dentry))
-               pr_info("%s: failed to create fail_reset file for target %s\n",
-                       __func__, dev_name(&starget->dev));
 
        starget->hostdata = targetip;
 
index 530918c..fa00dd5 100644 (file)
@@ -1643,24 +1643,21 @@ out:
        return disk_changed ? DISK_EVENT_MEDIA_CHANGE : 0;
 }
 
-static int sd_sync_cache(struct scsi_disk *sdkp, struct scsi_sense_hdr *sshdr)
+static int sd_sync_cache(struct scsi_disk *sdkp)
 {
        int retries, res;
        struct scsi_device *sdp = sdkp->device;
        const int timeout = sdp->request_queue->rq_timeout
                * SD_FLUSH_TIMEOUT_MULTIPLIER;
-       struct scsi_sense_hdr my_sshdr;
+       struct scsi_sense_hdr sshdr;
        const struct scsi_exec_args exec_args = {
                .req_flags = BLK_MQ_REQ_PM,
-               /* caller might not be interested in sense, but we need it */
-               .sshdr = sshdr ? : &my_sshdr,
+               .sshdr = &sshdr,
        };
 
        if (!scsi_device_online(sdp))
                return -ENODEV;
 
-       sshdr = exec_args.sshdr;
-
        for (retries = 3; retries > 0; --retries) {
                unsigned char cmd[16] = { 0 };
 
@@ -1685,15 +1682,23 @@ static int sd_sync_cache(struct scsi_disk *sdkp, struct scsi_sense_hdr *sshdr)
                        return res;
 
                if (scsi_status_is_check_condition(res) &&
-                   scsi_sense_valid(sshdr)) {
-                       sd_print_sense_hdr(sdkp, sshdr);
+                   scsi_sense_valid(&sshdr)) {
+                       sd_print_sense_hdr(sdkp, &sshdr);
 
                        /* we need to evaluate the error return  */
-                       if (sshdr->asc == 0x3a ||       /* medium not present */
-                           sshdr->asc == 0x20 ||       /* invalid command */
-                           (sshdr->asc == 0x74 && sshdr->ascq == 0x71))        /* drive is password locked */
+                       if (sshdr.asc == 0x3a ||        /* medium not present */
+                           sshdr.asc == 0x20 ||        /* invalid command */
+                           (sshdr.asc == 0x74 && sshdr.ascq == 0x71))  /* drive is password locked */
                                /* this is no error here */
                                return 0;
+                       /*
+                        * This drive doesn't support sync and there's not much
+                        * we can do because this is called during shutdown
+                        * or suspend so just return success so those operations
+                        * can proceed.
+                        */
+                       if (sshdr.sense_key == ILLEGAL_REQUEST)
+                               return 0;
                }
 
                switch (host_byte(res)) {
@@ -3853,7 +3858,7 @@ static void sd_shutdown(struct device *dev)
 
        if (sdkp->WCE && sdkp->media_present) {
                sd_printk(KERN_NOTICE, sdkp, "Synchronizing SCSI cache\n");
-               sd_sync_cache(sdkp, NULL);
+               sd_sync_cache(sdkp);
        }
 
        if ((system_state != SYSTEM_RESTART &&
@@ -3874,7 +3879,6 @@ static inline bool sd_do_start_stop(struct scsi_device *sdev, bool runtime)
 static int sd_suspend_common(struct device *dev, bool runtime)
 {
        struct scsi_disk *sdkp = dev_get_drvdata(dev);
-       struct scsi_sense_hdr sshdr;
        int ret = 0;
 
        if (!sdkp)      /* E.g.: runtime suspend following sd_remove() */
@@ -3883,24 +3887,13 @@ static int sd_suspend_common(struct device *dev, bool runtime)
        if (sdkp->WCE && sdkp->media_present) {
                if (!sdkp->device->silence_suspend)
                        sd_printk(KERN_NOTICE, sdkp, "Synchronizing SCSI cache\n");
-               ret = sd_sync_cache(sdkp, &sshdr);
-
-               if (ret) {
-                       /* ignore OFFLINE device */
-                       if (ret == -ENODEV)
-                               return 0;
-
-                       if (!scsi_sense_valid(&sshdr) ||
-                           sshdr.sense_key != ILLEGAL_REQUEST)
-                               return ret;
+               ret = sd_sync_cache(sdkp);
+               /* ignore OFFLINE device */
+               if (ret == -ENODEV)
+                       return 0;
 
-                       /*
-                        * sshdr.sense_key == ILLEGAL_REQUEST means this drive
-                        * doesn't support sync. There's not much to do and
-                        * suspend shouldn't fail.
-                        */
-                       ret = 0;
-               }
+               if (ret)
+                       return ret;
        }
 
        if (sd_do_start_stop(sdkp->device, runtime)) {
index 1e15ffa..44e9b09 100644 (file)
@@ -1143,7 +1143,7 @@ int tb_port_lane_bonding_enable(struct tb_port *port)
         * Only set bonding if the link was not already bonded. This
         * avoids the lane adapter to re-enter bonding state.
         */
-       if (width == TB_LINK_WIDTH_SINGLE) {
+       if (width == TB_LINK_WIDTH_SINGLE && !tb_is_upstream_port(port)) {
                ret = tb_port_set_lane_bonding(port, true);
                if (ret)
                        goto err_lane1;
@@ -2880,6 +2880,7 @@ static int tb_switch_lane_bonding_disable(struct tb_switch *sw)
        return tb_port_wait_for_link_width(down, TB_LINK_WIDTH_SINGLE, 100);
 }
 
+/* Note updating sw->link_width done in tb_switch_update_link_attributes() */
 static int tb_switch_asym_enable(struct tb_switch *sw, enum tb_link_width width)
 {
        struct tb_port *up, *down, *port;
@@ -2919,10 +2920,10 @@ static int tb_switch_asym_enable(struct tb_switch *sw, enum tb_link_width width)
                        return ret;
        }
 
-       sw->link_width = width;
        return 0;
 }
 
+/* Note updating sw->link_width done in tb_switch_update_link_attributes() */
 static int tb_switch_asym_disable(struct tb_switch *sw)
 {
        struct tb_port *up, *down;
@@ -2957,7 +2958,6 @@ static int tb_switch_asym_disable(struct tb_switch *sw)
                        return ret;
        }
 
-       sw->link_width = TB_LINK_WIDTH_DUAL;
        return 0;
 }
 
index 5acdeb7..fd49f86 100644 (file)
@@ -213,7 +213,17 @@ static void tb_add_dp_resources(struct tb_switch *sw)
                if (!tb_switch_query_dp_resource(sw, port))
                        continue;
 
-               list_add(&port->list, &tcm->dp_resources);
+               /*
+                * If DP IN on device router exist, position it at the
+                * beginning of the DP resources list, so that it is used
+                * before DP IN of the host router. This way external GPU(s)
+                * will be prioritized when pairing DP IN to a DP OUT.
+                */
+               if (tb_route(sw))
+                       list_add(&port->list, &tcm->dp_resources);
+               else
+                       list_add_tail(&port->list, &tcm->dp_resources);
+
                tb_port_dbg(port, "DP IN resource available\n");
        }
 }
index 2ba8ec2..0787456 100644 (file)
@@ -436,7 +436,7 @@ int ufshcd_mcq_init(struct ufs_hba *hba)
 
        for (i = 0; i < hba->nr_hw_queues; i++) {
                hwq = &hba->uhq[i];
-               hwq->max_entries = hba->nutrs;
+               hwq->max_entries = hba->nutrs + 1;
                spin_lock_init(&hwq->sq_lock);
                spin_lock_init(&hwq->cq_lock);
                mutex_init(&hwq->sq_mutex);
@@ -630,6 +630,7 @@ int ufshcd_mcq_abort(struct scsi_cmnd *cmd)
        int tag = scsi_cmd_to_rq(cmd)->tag;
        struct ufshcd_lrb *lrbp = &hba->lrb[tag];
        struct ufs_hw_queue *hwq;
+       unsigned long flags;
        int err = FAILED;
 
        if (!ufshcd_cmd_inflight(lrbp->cmd)) {
@@ -670,8 +671,10 @@ int ufshcd_mcq_abort(struct scsi_cmnd *cmd)
        }
 
        err = SUCCESS;
+       spin_lock_irqsave(&hwq->cq_lock, flags);
        if (ufshcd_cmd_inflight(lrbp->cmd))
                ufshcd_release_scsi_cmd(hba, lrbp);
+       spin_unlock_irqrestore(&hwq->cq_lock, flags);
 
 out:
        return err;
index af98177..02f297f 100644 (file)
@@ -1529,6 +1529,7 @@ irqreturn_t cdnsp_thread_irq_handler(int irq, void *data)
        unsigned long flags;
        int counter = 0;
 
+       local_bh_disable();
        spin_lock_irqsave(&pdev->lock, flags);
 
        if (pdev->cdnsp_state & (CDNSP_STATE_HALTED | CDNSP_STATE_DYING)) {
@@ -1541,6 +1542,7 @@ irqreturn_t cdnsp_thread_irq_handler(int irq, void *data)
                        cdnsp_died(pdev);
 
                spin_unlock_irqrestore(&pdev->lock, flags);
+               local_bh_enable();
                return IRQ_HANDLED;
        }
 
@@ -1557,6 +1559,7 @@ irqreturn_t cdnsp_thread_irq_handler(int irq, void *data)
        cdnsp_update_erst_dequeue(pdev, event_ring_deq, 1);
 
        spin_unlock_irqrestore(&pdev->lock, flags);
+       local_bh_enable();
 
        return IRQ_HANDLED;
 }
index b19e38d..7f8d33f 100644 (file)
@@ -1047,7 +1047,7 @@ int usb_get_bos_descriptor(struct usb_device *dev)
 
                if (cap->bDescriptorType != USB_DT_DEVICE_CAPABILITY) {
                        dev_notice(ddev, "descriptor type invalid, skip\n");
-                       continue;
+                       goto skip_to_next_descriptor;
                }
 
                switch (cap_type) {
@@ -1078,6 +1078,7 @@ int usb_get_bos_descriptor(struct usb_device *dev)
                        break;
                }
 
+skip_to_next_descriptor:
                total_len -= length;
                buffer += length;
        }
index b4584a0..87480a6 100644 (file)
@@ -622,29 +622,6 @@ static int hub_ext_port_status(struct usb_hub *hub, int port1, int type,
                ret = 0;
        }
        mutex_unlock(&hub->status_mutex);
-
-       /*
-        * There is no need to lock status_mutex here, because status_mutex
-        * protects hub->status, and the phy driver only checks the port
-        * status without changing the status.
-        */
-       if (!ret) {
-               struct usb_device *hdev = hub->hdev;
-
-               /*
-                * Only roothub will be notified of port state changes,
-                * since the USB PHY only cares about changes at the next
-                * level.
-                */
-               if (is_root_hub(hdev)) {
-                       struct usb_hcd *hcd = bus_to_hcd(hdev->bus);
-
-                       if (hcd->usb_phy)
-                               usb_phy_notify_port_status(hcd->usb_phy,
-                                                          port1 - 1, *status, *change);
-               }
-       }
-
        return ret;
 }
 
index 0144ca8..5c7538d 100644 (file)
@@ -2015,15 +2015,17 @@ static void dwc2_hc_n_intr(struct dwc2_hsotg *hsotg, int chnum)
 {
        struct dwc2_qtd *qtd;
        struct dwc2_host_chan *chan;
-       u32 hcint, hcintmsk;
+       u32 hcint, hcintraw, hcintmsk;
 
        chan = hsotg->hc_ptr_array[chnum];
 
-       hcint = dwc2_readl(hsotg, HCINT(chnum));
+       hcintraw = dwc2_readl(hsotg, HCINT(chnum));
        hcintmsk = dwc2_readl(hsotg, HCINTMSK(chnum));
+       hcint = hcintraw & hcintmsk;
+       dwc2_writel(hsotg, hcint, HCINT(chnum));
+
        if (!chan) {
                dev_err(hsotg->dev, "## hc_ptr_array for channel is NULL ##\n");
-               dwc2_writel(hsotg, hcint, HCINT(chnum));
                return;
        }
 
@@ -2032,11 +2034,9 @@ static void dwc2_hc_n_intr(struct dwc2_hsotg *hsotg, int chnum)
                         chnum);
                dev_vdbg(hsotg->dev,
                         "  hcint 0x%08x, hcintmsk 0x%08x, hcint&hcintmsk 0x%08x\n",
-                        hcint, hcintmsk, hcint & hcintmsk);
+                        hcintraw, hcintmsk, hcint);
        }
 
-       dwc2_writel(hsotg, hcint, HCINT(chnum));
-
        /*
         * If we got an interrupt after someone called
         * dwc2_hcd_endpoint_disable() we don't want to crash below
@@ -2046,8 +2046,7 @@ static void dwc2_hc_n_intr(struct dwc2_hsotg *hsotg, int chnum)
                return;
        }
 
-       chan->hcint = hcint;
-       hcint &= hcintmsk;
+       chan->hcint = hcintraw;
 
        /*
         * If the channel was halted due to a dequeue, the qtd list might
index 0328c86..b101dbf 100644 (file)
@@ -2034,6 +2034,8 @@ static int dwc3_probe(struct platform_device *pdev)
 
        pm_runtime_put(dev);
 
+       dma_set_max_seg_size(dev, UINT_MAX);
+
        return 0;
 
 err_exit_debugfs:
index 039bf24..57ddd2e 100644 (file)
@@ -505,6 +505,7 @@ static int dwc3_setup_role_switch(struct dwc3 *dwc)
                dwc->role_switch_default_mode = USB_DR_MODE_PERIPHERAL;
                mode = DWC3_GCTL_PRTCAP_DEVICE;
        }
+       dwc3_set_mode(dwc, mode);
 
        dwc3_role_switch.fwnode = dev_fwnode(dwc->dev);
        dwc3_role_switch.set = dwc3_usb_role_switch_set;
@@ -526,7 +527,6 @@ static int dwc3_setup_role_switch(struct dwc3 *dwc)
                }
        }
 
-       dwc3_set_mode(dwc, mode);
        return 0;
 }
 #else
index 3de43df..fdf6d5d 100644 (file)
@@ -546,10 +546,9 @@ static int dwc3_qcom_setup_irq(struct platform_device *pdev)
                                pdata ? pdata->hs_phy_irq_index : -1);
        if (irq > 0) {
                /* Keep wakeup interrupts disabled until suspend */
-               irq_set_status_flags(irq, IRQ_NOAUTOEN);
                ret = devm_request_threaded_irq(qcom->dev, irq, NULL,
                                        qcom_dwc3_resume_irq,
-                                       IRQF_TRIGGER_HIGH | IRQF_ONESHOT,
+                                       IRQF_ONESHOT | IRQF_NO_AUTOEN,
                                        "qcom_dwc3 HS", qcom);
                if (ret) {
                        dev_err(qcom->dev, "hs_phy_irq failed: %d\n", ret);
@@ -561,10 +560,9 @@ static int dwc3_qcom_setup_irq(struct platform_device *pdev)
        irq = dwc3_qcom_get_irq(pdev, "dp_hs_phy_irq",
                                pdata ? pdata->dp_hs_phy_irq_index : -1);
        if (irq > 0) {
-               irq_set_status_flags(irq, IRQ_NOAUTOEN);
                ret = devm_request_threaded_irq(qcom->dev, irq, NULL,
                                        qcom_dwc3_resume_irq,
-                                       IRQF_TRIGGER_HIGH | IRQF_ONESHOT,
+                                       IRQF_ONESHOT | IRQF_NO_AUTOEN,
                                        "qcom_dwc3 DP_HS", qcom);
                if (ret) {
                        dev_err(qcom->dev, "dp_hs_phy_irq failed: %d\n", ret);
@@ -576,10 +574,9 @@ static int dwc3_qcom_setup_irq(struct platform_device *pdev)
        irq = dwc3_qcom_get_irq(pdev, "dm_hs_phy_irq",
                                pdata ? pdata->dm_hs_phy_irq_index : -1);
        if (irq > 0) {
-               irq_set_status_flags(irq, IRQ_NOAUTOEN);
                ret = devm_request_threaded_irq(qcom->dev, irq, NULL,
                                        qcom_dwc3_resume_irq,
-                                       IRQF_TRIGGER_HIGH | IRQF_ONESHOT,
+                                       IRQF_ONESHOT | IRQF_NO_AUTOEN,
                                        "qcom_dwc3 DM_HS", qcom);
                if (ret) {
                        dev_err(qcom->dev, "dm_hs_phy_irq failed: %d\n", ret);
@@ -591,10 +588,9 @@ static int dwc3_qcom_setup_irq(struct platform_device *pdev)
        irq = dwc3_qcom_get_irq(pdev, "ss_phy_irq",
                                pdata ? pdata->ss_phy_irq_index : -1);
        if (irq > 0) {
-               irq_set_status_flags(irq, IRQ_NOAUTOEN);
                ret = devm_request_threaded_irq(qcom->dev, irq, NULL,
                                        qcom_dwc3_resume_irq,
-                                       IRQF_TRIGGER_HIGH | IRQF_ONESHOT,
+                                       IRQF_ONESHOT | IRQF_NO_AUTOEN,
                                        "qcom_dwc3 SS", qcom);
                if (ret) {
                        dev_err(qcom->dev, "ss_phy_irq failed: %d\n", ret);
@@ -758,6 +754,7 @@ static int dwc3_qcom_of_register_core(struct platform_device *pdev)
        if (!qcom->dwc3) {
                ret = -ENODEV;
                dev_err(dev, "failed to get dwc3 platform device\n");
+               of_platform_depopulate(dev);
        }
 
 node_put:
@@ -766,9 +763,9 @@ node_put:
        return ret;
 }
 
-static struct platform_device *
-dwc3_qcom_create_urs_usb_platdev(struct device *dev)
+static struct platform_device *dwc3_qcom_create_urs_usb_platdev(struct device *dev)
 {
+       struct platform_device *urs_usb = NULL;
        struct fwnode_handle *fwh;
        struct acpi_device *adev;
        char name[8];
@@ -788,9 +785,26 @@ dwc3_qcom_create_urs_usb_platdev(struct device *dev)
 
        adev = to_acpi_device_node(fwh);
        if (!adev)
-               return NULL;
+               goto err_put_handle;
+
+       urs_usb = acpi_create_platform_device(adev, NULL);
+       if (IS_ERR_OR_NULL(urs_usb))
+               goto err_put_handle;
+
+       return urs_usb;
+
+err_put_handle:
+       fwnode_handle_put(fwh);
+
+       return urs_usb;
+}
 
-       return acpi_create_platform_device(adev, NULL);
+static void dwc3_qcom_destroy_urs_usb_platdev(struct platform_device *urs_usb)
+{
+       struct fwnode_handle *fwh = urs_usb->dev.fwnode;
+
+       platform_device_unregister(urs_usb);
+       fwnode_handle_put(fwh);
 }
 
 static int dwc3_qcom_probe(struct platform_device *pdev)
@@ -874,13 +888,13 @@ static int dwc3_qcom_probe(struct platform_device *pdev)
        qcom->qscratch_base = devm_ioremap_resource(dev, parent_res);
        if (IS_ERR(qcom->qscratch_base)) {
                ret = PTR_ERR(qcom->qscratch_base);
-               goto clk_disable;
+               goto free_urs;
        }
 
        ret = dwc3_qcom_setup_irq(pdev);
        if (ret) {
                dev_err(dev, "failed to setup IRQs, err=%d\n", ret);
-               goto clk_disable;
+               goto free_urs;
        }
 
        /*
@@ -899,7 +913,7 @@ static int dwc3_qcom_probe(struct platform_device *pdev)
 
        if (ret) {
                dev_err(dev, "failed to register DWC3 Core, err=%d\n", ret);
-               goto depopulate;
+               goto free_urs;
        }
 
        ret = dwc3_qcom_interconnect_init(qcom);
@@ -931,10 +945,16 @@ static int dwc3_qcom_probe(struct platform_device *pdev)
 interconnect_exit:
        dwc3_qcom_interconnect_exit(qcom);
 depopulate:
-       if (np)
+       if (np) {
                of_platform_depopulate(&pdev->dev);
-       else
-               platform_device_put(pdev);
+       } else {
+               device_remove_software_node(&qcom->dwc3->dev);
+               platform_device_del(qcom->dwc3);
+       }
+       platform_device_put(qcom->dwc3);
+free_urs:
+       if (qcom->urs_usb)
+               dwc3_qcom_destroy_urs_usb_platdev(qcom->urs_usb);
 clk_disable:
        for (i = qcom->num_clocks - 1; i >= 0; i--) {
                clk_disable_unprepare(qcom->clks[i]);
@@ -953,11 +973,16 @@ static void dwc3_qcom_remove(struct platform_device *pdev)
        struct device *dev = &pdev->dev;
        int i;
 
-       device_remove_software_node(&qcom->dwc3->dev);
-       if (np)
+       if (np) {
                of_platform_depopulate(&pdev->dev);
-       else
-               platform_device_put(pdev);
+       } else {
+               device_remove_software_node(&qcom->dwc3->dev);
+               platform_device_del(qcom->dwc3);
+       }
+       platform_device_put(qcom->dwc3);
+
+       if (qcom->urs_usb)
+               dwc3_qcom_destroy_urs_usb_platdev(qcom->urs_usb);
 
        for (i = qcom->num_clocks - 1; i >= 0; i--) {
                clk_disable_unprepare(qcom->clks[i]);
index 590028e..3cd6b18 100644 (file)
@@ -183,10 +183,13 @@ static enum usb_device_speed __get_dwc3_maximum_speed(struct device_node *np)
 
        ret = of_property_read_string(dwc3_np, "maximum-speed", &maximum_speed);
        if (ret < 0)
-               return USB_SPEED_UNKNOWN;
+               goto out;
 
        ret = match_string(speed_names, ARRAY_SIZE(speed_names), maximum_speed);
 
+out:
+       of_node_put(dwc3_np);
+
        return (ret < 0) ? USB_SPEED_UNKNOWN : ret;
 }
 
@@ -339,6 +342,9 @@ static int dwc3_rtk_probe_dwc3_core(struct dwc3_rtk *rtk)
 
        switch_usb2_role(rtk, rtk->cur_role);
 
+       platform_device_put(dwc3_pdev);
+       of_node_put(dwc3_node);
+
        return 0;
 
 err_pdev_put:
index 5b3cd45..61f3f8b 100644 (file)
@@ -650,9 +650,8 @@ static int check_isoc_ss_overlap(struct mu3h_sch_ep_info *sch_ep, u32 offset)
 
                if (sch_ep->ep_type == ISOC_OUT_EP) {
                        for (j = 0; j < sch_ep->num_budget_microframes; j++) {
-                               k = XHCI_MTK_BW_INDEX(base + j + CS_OFFSET);
-                               /* use cs to indicate existence of in-ss @(base+j) */
-                               if (tt->fs_bus_bw_in[k])
+                               k = XHCI_MTK_BW_INDEX(base + j);
+                               if (tt->in_ss_cnt[k])
                                        return -ESCH_SS_OVERLAP;
                        }
                } else if (sch_ep->ep_type == ISOC_IN_EP || sch_ep->ep_type == INT_IN_EP) {
@@ -769,6 +768,14 @@ static void update_sch_tt(struct mu3h_sch_ep_info *sch_ep, bool used)
                                tt->fs_frame_bw[f] -= (u16)sch_ep->bw_budget_table[j];
                        }
                }
+
+               if (sch_ep->ep_type == ISOC_IN_EP || sch_ep->ep_type == INT_IN_EP) {
+                       k = XHCI_MTK_BW_INDEX(base);
+                       if (used)
+                               tt->in_ss_cnt[k]++;
+                       else
+                               tt->in_ss_cnt[k]--;
+               }
        }
 
        if (used)
index 865b55e..39f7ae7 100644 (file)
@@ -38,6 +38,7 @@
  * @fs_bus_bw_in: save bandwidth used by FS/LS IN eps in each uframes
  * @ls_bus_bw: save bandwidth used by LS eps in each uframes
  * @fs_frame_bw: save bandwidth used by FS/LS eps in each FS frames
+ * @in_ss_cnt: the count of Start-Split for IN eps
  * @ep_list: Endpoints using this TT
  */
 struct mu3h_sch_tt {
@@ -45,6 +46,7 @@ struct mu3h_sch_tt {
        u16 fs_bus_bw_in[XHCI_MTK_MAX_ESIT];
        u8 ls_bus_bw[XHCI_MTK_MAX_ESIT];
        u16 fs_frame_bw[XHCI_MTK_FRAMES_CNT];
+       u8 in_ss_cnt[XHCI_MTK_MAX_ESIT];
        struct list_head ep_list;
 };
 
index b931613..732cdeb 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/module.h>
 #include <linux/pci.h>
 #include <linux/of.h>
+#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/usb/phy.h>
 #include <linux/slab.h>
@@ -148,7 +149,7 @@ int xhci_plat_probe(struct platform_device *pdev, struct device *sysdev, const s
        int                     ret;
        int                     irq;
        struct xhci_plat_priv   *priv = NULL;
-
+       bool                    of_match;
 
        if (usb_disabled())
                return -ENODEV;
@@ -253,16 +254,23 @@ int xhci_plat_probe(struct platform_device *pdev, struct device *sysdev, const s
                                         &xhci->imod_interval);
        }
 
-       hcd->usb_phy = devm_usb_get_phy_by_phandle(sysdev, "usb-phy", 0);
-       if (IS_ERR(hcd->usb_phy)) {
-               ret = PTR_ERR(hcd->usb_phy);
-               if (ret == -EPROBE_DEFER)
-                       goto disable_clk;
-               hcd->usb_phy = NULL;
-       } else {
-               ret = usb_phy_init(hcd->usb_phy);
-               if (ret)
-                       goto disable_clk;
+       /*
+        * Drivers such as dwc3 manages PHYs themself (and rely on driver name
+        * matching for the xhci platform device).
+        */
+       of_match = of_match_device(pdev->dev.driver->of_match_table, &pdev->dev);
+       if (of_match) {
+               hcd->usb_phy = devm_usb_get_phy_by_phandle(sysdev, "usb-phy", 0);
+               if (IS_ERR(hcd->usb_phy)) {
+                       ret = PTR_ERR(hcd->usb_phy);
+                       if (ret == -EPROBE_DEFER)
+                               goto disable_clk;
+                       hcd->usb_phy = NULL;
+               } else {
+                       ret = usb_phy_init(hcd->usb_phy);
+                       if (ret)
+                               goto disable_clk;
+               }
        }
 
        hcd->tpl_support = of_usb_host_tpl_support(sysdev->of_node);
@@ -285,15 +293,17 @@ int xhci_plat_probe(struct platform_device *pdev, struct device *sysdev, const s
                        goto dealloc_usb2_hcd;
                }
 
-               xhci->shared_hcd->usb_phy = devm_usb_get_phy_by_phandle(sysdev,
-                           "usb-phy", 1);
-               if (IS_ERR(xhci->shared_hcd->usb_phy)) {
-                       xhci->shared_hcd->usb_phy = NULL;
-               } else {
-                       ret = usb_phy_init(xhci->shared_hcd->usb_phy);
-                       if (ret)
-                               dev_err(sysdev, "%s init usb3phy fail (ret=%d)\n",
-                                           __func__, ret);
+               if (of_match) {
+                       xhci->shared_hcd->usb_phy = devm_usb_get_phy_by_phandle(sysdev,
+                                                                               "usb-phy", 1);
+                       if (IS_ERR(xhci->shared_hcd->usb_phy)) {
+                               xhci->shared_hcd->usb_phy = NULL;
+                       } else {
+                               ret = usb_phy_init(xhci->shared_hcd->usb_phy);
+                               if (ret)
+                                       dev_err(sysdev, "%s init usb3phy fail (ret=%d)\n",
+                                               __func__, ret);
+                       }
                }
 
                xhci->shared_hcd->tpl_support = hcd->tpl_support;
index a341b2f..2b45404 100644 (file)
@@ -432,6 +432,8 @@ static const struct usb_device_id onboard_hub_id_table[] = {
        { USB_DEVICE(VENDOR_ID_MICROCHIP, 0x2412) }, /* USB2412 USB 2.0 */
        { USB_DEVICE(VENDOR_ID_MICROCHIP, 0x2514) }, /* USB2514B USB 2.0 */
        { USB_DEVICE(VENDOR_ID_MICROCHIP, 0x2517) }, /* USB2517 USB 2.0 */
+       { USB_DEVICE(VENDOR_ID_MICROCHIP, 0x2744) }, /* USB5744 USB 2.0 */
+       { USB_DEVICE(VENDOR_ID_MICROCHIP, 0x5744) }, /* USB5744 USB 3.0 */
        { USB_DEVICE(VENDOR_ID_REALTEK, 0x0411) }, /* RTS5411 USB 3.1 */
        { USB_DEVICE(VENDOR_ID_REALTEK, 0x5411) }, /* RTS5411 USB 2.1 */
        { USB_DEVICE(VENDOR_ID_REALTEK, 0x0414) }, /* RTS5414 USB 3.2 */
index c4e24a7..292110e 100644 (file)
@@ -16,6 +16,11 @@ static const struct onboard_hub_pdata microchip_usb424_data = {
        .num_supplies = 1,
 };
 
+static const struct onboard_hub_pdata microchip_usb5744_data = {
+       .reset_us = 0,
+       .num_supplies = 2,
+};
+
 static const struct onboard_hub_pdata realtek_rts5411_data = {
        .reset_us = 0,
        .num_supplies = 1,
@@ -50,6 +55,8 @@ static const struct of_device_id onboard_hub_match[] = {
        { .compatible = "usb424,2412", .data = &microchip_usb424_data, },
        { .compatible = "usb424,2514", .data = &microchip_usb424_data, },
        { .compatible = "usb424,2517", .data = &microchip_usb424_data, },
+       { .compatible = "usb424,2744", .data = &microchip_usb5744_data, },
+       { .compatible = "usb424,5744", .data = &microchip_usb5744_data, },
        { .compatible = "usb451,8140", .data = &ti_tusb8041_data, },
        { .compatible = "usb451,8142", .data = &ti_tusb8041_data, },
        { .compatible = "usb4b4,6504", .data = &cypress_hx3_data, },
index c9decd0..35770e6 100644 (file)
@@ -457,8 +457,8 @@ static void ljca_auxdev_acpi_bind(struct ljca_adapter *adap,
                                  u64 adr, u8 id)
 {
        struct ljca_match_ids_walk_data wd = { 0 };
-       struct acpi_device *parent, *adev;
        struct device *dev = adap->dev;
+       struct acpi_device *parent;
        char uid[4];
 
        parent = ACPI_COMPANION(dev);
@@ -466,17 +466,7 @@ static void ljca_auxdev_acpi_bind(struct ljca_adapter *adap,
                return;
 
        /*
-        * get auxdev ACPI handle from the ACPI device directly
-        * under the parent that matches _ADR.
-        */
-       adev = acpi_find_child_device(parent, adr, false);
-       if (adev) {
-               ACPI_COMPANION_SET(&auxdev->dev, adev);
-               return;
-       }
-
-       /*
-        * _ADR is a grey area in the ACPI specification, some
+        * Currently LJCA hw doesn't use _ADR instead the shipped
         * platforms use _HID to distinguish children devices.
         */
        switch (adr) {
@@ -656,10 +646,11 @@ static int ljca_enumerate_spi(struct ljca_adapter *adap)
        unsigned int i;
        int ret;
 
+       /* Not all LJCA chips implement SPI, a timeout reading the descriptors is normal */
        ret = ljca_send(adap, LJCA_CLIENT_MNG, LJCA_MNG_ENUM_SPI, NULL, 0, buf,
                        sizeof(buf), true, LJCA_ENUM_CLIENT_TIMEOUT_MS);
        if (ret < 0)
-               return ret;
+               return (ret == -ETIMEDOUT) ? 0 : ret;
 
        /* check firmware response */
        desc = (struct ljca_spi_descriptor *)buf;
index 45dcfaa..4dffcfe 100644 (file)
@@ -203,8 +203,8 @@ static void option_instat_callback(struct urb *urb);
 #define DELL_PRODUCT_5829E_ESIM                        0x81e4
 #define DELL_PRODUCT_5829E                     0x81e6
 
-#define DELL_PRODUCT_FM101R                    0x8213
-#define DELL_PRODUCT_FM101R_ESIM               0x8215
+#define DELL_PRODUCT_FM101R_ESIM               0x8213
+#define DELL_PRODUCT_FM101R                    0x8215
 
 #define KYOCERA_VENDOR_ID                      0x0c88
 #define KYOCERA_PRODUCT_KPC650                 0x17da
@@ -609,6 +609,8 @@ static void option_instat_callback(struct urb *urb);
 #define UNISOC_VENDOR_ID                       0x1782
 /* TOZED LT70-C based on UNISOC SL8563 uses UNISOC's vendor ID */
 #define TOZED_PRODUCT_LT70C                    0x4055
+/* Luat Air72*U series based on UNISOC UIS8910 uses UNISOC's vendor ID */
+#define LUAT_PRODUCT_AIR720U                   0x4e00
 
 /* Device flags */
 
@@ -1546,7 +1548,8 @@ static const struct usb_device_id option_ids[] = {
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0165, 0xff, 0xff, 0xff) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0167, 0xff, 0xff, 0xff),
          .driver_info = RSVD(4) },
-       { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0189, 0xff, 0xff, 0xff) },
+       { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0189, 0xff, 0xff, 0xff),
+         .driver_info = RSVD(4) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0191, 0xff, 0xff, 0xff), /* ZTE EuFi890 */
          .driver_info = RSVD(4) },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0196, 0xff, 0xff, 0xff) },
@@ -2249,6 +2252,7 @@ static const struct usb_device_id option_ids[] = {
          .driver_info = RSVD(4) | RSVD(5) | RSVD(6) },
        { USB_DEVICE(0x1782, 0x4d10) },                                         /* Fibocom L610 (AT mode) */
        { USB_DEVICE_INTERFACE_CLASS(0x1782, 0x4d11, 0xff) },                   /* Fibocom L610 (ECM/RNDIS mode) */
+       { USB_DEVICE_AND_INTERFACE_INFO(0x2cb7, 0x0001, 0xff, 0xff, 0xff) },    /* Fibocom L716-EU (ECM/RNDIS mode) */
        { USB_DEVICE(0x2cb7, 0x0104),                                           /* Fibocom NL678 series */
          .driver_info = RSVD(4) | RSVD(5) },
        { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0105, 0xff),                     /* Fibocom NL678 series */
@@ -2271,6 +2275,7 @@ static const struct usb_device_id option_ids[] = {
        { USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9191, 0xff, 0xff, 0x40) },
        { USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9191, 0xff, 0, 0) },
        { USB_DEVICE_AND_INTERFACE_INFO(UNISOC_VENDOR_ID, TOZED_PRODUCT_LT70C, 0xff, 0, 0) },
+       { USB_DEVICE_AND_INTERFACE_INFO(UNISOC_VENDOR_ID, LUAT_PRODUCT_AIR720U, 0xff, 0, 0) },
        { } /* Terminating entry */
 };
 MODULE_DEVICE_TABLE(usb, option_ids);
index 058d5b8..bfb6f94 100644 (file)
@@ -4273,7 +4273,8 @@ static void run_state_machine(struct tcpm_port *port)
                                current_lim = PD_P_SNK_STDBY_MW / 5;
                        tcpm_set_current_limit(port, current_lim, 5000);
                        /* Not sink vbus if operational current is 0mA */
-                       tcpm_set_charge(port, !!pdo_max_current(port->snk_pdo[0]));
+                       tcpm_set_charge(port, !port->pd_supported ||
+                                       pdo_max_current(port->snk_pdo[0]));
 
                        if (!port->pd_supported)
                                tcpm_set_state(port, SNK_READY, 0);
@@ -5391,6 +5392,15 @@ static void _tcpm_pd_hard_reset(struct tcpm_port *port)
        if (port->bist_request == BDO_MODE_TESTDATA && port->tcpc->set_bist_data)
                port->tcpc->set_bist_data(port->tcpc, false);
 
+       switch (port->state) {
+       case ERROR_RECOVERY:
+       case PORT_RESET:
+       case PORT_RESET_WAIT_OFF:
+               return;
+       default:
+               break;
+       }
+
        if (port->ams != NONE_AMS)
                port->ams = NONE_AMS;
        if (port->hard_reset_count < PD_N_HARD_RESET_COUNT)
index 0e867f5..196535a 100644 (file)
@@ -968,16 +968,17 @@ static int tps25750_start_patch_burst_mode(struct tps6598x *tps)
        ret = of_property_match_string(np, "reg-names", "patch-address");
        if (ret < 0) {
                dev_err(tps->dev, "failed to get patch-address %d\n", ret);
-               return ret;
+               goto release_fw;
        }
 
        ret = of_property_read_u32_index(np, "reg", ret, &addr);
        if (ret)
-               return ret;
+               goto release_fw;
 
        if (addr == 0 || (addr >= 0x20 && addr <= 0x23)) {
                dev_err(tps->dev, "wrong patch address %u\n", addr);
-               return -EINVAL;
+               ret = -EINVAL;
+               goto release_fw;
        }
 
        bpms_data.addr = (u8)addr;
@@ -1226,7 +1227,10 @@ static int tps6598x_probe(struct i2c_client *client)
                        TPS_REG_INT_PLUG_EVENT;
        }
 
-       tps->data = device_get_match_data(tps->dev);
+       if (dev_fwnode(tps->dev))
+               tps->data = device_get_match_data(tps->dev);
+       else
+               tps->data = i2c_get_match_data(client);
        if (!tps->data)
                return -EINVAL;
 
@@ -1425,7 +1429,7 @@ static const struct of_device_id tps6598x_of_match[] = {
 MODULE_DEVICE_TABLE(of, tps6598x_of_match);
 
 static const struct i2c_device_id tps6598x_id[] = {
-       { "tps6598x" },
+       { "tps6598x", (kernel_ulong_t)&tps6598x_data },
        { }
 };
 MODULE_DEVICE_TABLE(i2c, tps6598x_id);
index b3a3cb1..b137f36 100644 (file)
@@ -437,7 +437,7 @@ static int vdpasim_blk_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
        if (blk->shared_backend) {
                blk->buffer = shared_buffer;
        } else {
-               blk->buffer = kvmalloc(VDPASIM_BLK_CAPACITY << SECTOR_SHIFT,
+               blk->buffer = kvzalloc(VDPASIM_BLK_CAPACITY << SECTOR_SHIFT,
                                       GFP_KERNEL);
                if (!blk->buffer) {
                        ret = -ENOMEM;
@@ -495,7 +495,7 @@ static int __init vdpasim_blk_init(void)
                goto parent_err;
 
        if (shared_backend) {
-               shared_buffer = kvmalloc(VDPASIM_BLK_CAPACITY << SECTOR_SHIFT,
+               shared_buffer = kvzalloc(VDPASIM_BLK_CAPACITY << SECTOR_SHIFT,
                                         GFP_KERNEL);
                if (!shared_buffer) {
                        ret = -ENOMEM;
index 30df5c5..da7ec77 100644 (file)
@@ -1582,7 +1582,6 @@ static int vhost_vdpa_probe(struct vdpa_device *vdpa)
 
 err:
        put_device(&v->dev);
-       ida_simple_remove(&vhost_vdpa_ida, v->minor);
        return r;
 }
 
index c2524a7..7a55939 100644 (file)
@@ -242,7 +242,7 @@ void vp_del_vqs(struct virtio_device *vdev)
                        if (v != VIRTIO_MSI_NO_VECTOR) {
                                int irq = pci_irq_vector(vp_dev->pci_dev, v);
 
-                               irq_set_affinity_hint(irq, NULL);
+                               irq_update_affinity_hint(irq, NULL);
                                free_irq(irq, vq);
                        }
                }
@@ -443,10 +443,10 @@ int vp_set_vq_affinity(struct virtqueue *vq, const struct cpumask *cpu_mask)
                mask = vp_dev->msix_affinity_masks[info->msix_vector];
                irq = pci_irq_vector(vp_dev->pci_dev, info->msix_vector);
                if (!cpu_mask)
-                       irq_set_affinity_hint(irq, NULL);
+                       irq_update_affinity_hint(irq, NULL);
                else {
                        cpumask_copy(mask, cpu_mask);
-                       irq_set_affinity_hint(irq, mask);
+                       irq_set_affinity_and_hint(irq, mask);
                }
        }
        return 0;
index e2a1fe7..7de8b1e 100644 (file)
@@ -294,9 +294,10 @@ int vp_modern_probe(struct virtio_pci_modern_device *mdev)
 
        err = -EINVAL;
        mdev->common = vp_modern_map_capability(mdev, common,
-                                     sizeof(struct virtio_pci_common_cfg), 4,
-                                     0, sizeof(struct virtio_pci_modern_common_cfg),
-                                     &mdev->common_len, NULL);
+                             sizeof(struct virtio_pci_common_cfg), 4, 0,
+                             offsetofend(struct virtio_pci_modern_common_cfg,
+                                         queue_reset),
+                             &mdev->common_len, NULL);
        if (!mdev->common)
                goto err_map_common;
        mdev->isr = vp_modern_map_capability(mdev, isr, sizeof(u8), 1,
index b8f2f97..e358533 100644 (file)
@@ -171,11 +171,11 @@ static void evtchn_2l_handle_events(unsigned cpu, struct evtchn_loop_ctrl *ctrl)
        int i;
        struct shared_info *s = HYPERVISOR_shared_info;
        struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
+       evtchn_port_t evtchn;
 
        /* Timer interrupt has highest priority. */
-       irq = irq_from_virq(cpu, VIRQ_TIMER);
+       irq = irq_evtchn_from_virq(cpu, VIRQ_TIMER, &evtchn);
        if (irq != -1) {
-               evtchn_port_t evtchn = evtchn_from_irq(irq);
                word_idx = evtchn / BITS_PER_LONG;
                bit_idx = evtchn % BITS_PER_LONG;
                if (active_evtchns(cpu, s, word_idx) & (1ULL << bit_idx))
@@ -328,9 +328,9 @@ irqreturn_t xen_debug_interrupt(int irq, void *dev_id)
        for (i = 0; i < EVTCHN_2L_NR_CHANNELS; i++) {
                if (sync_test_bit(i, BM(sh->evtchn_pending))) {
                        int word_idx = i / BITS_PER_EVTCHN_WORD;
-                       printk("  %d: event %d -> irq %d%s%s%s\n",
+                       printk("  %d: event %d -> irq %u%s%s%s\n",
                               cpu_from_evtchn(i), i,
-                              get_evtchn_to_irq(i),
+                              irq_from_evtchn(i),
                               sync_test_bit(word_idx, BM(&v->evtchn_pending_sel))
                               ? "" : " l2-clear",
                               !sync_test_bit(i, BM(sh->evtchn_mask))
index 6de6b08..f5edb9e 100644 (file)
@@ -164,6 +164,8 @@ static DEFINE_PER_CPU(int [NR_VIRQS], virq_to_irq) = {[0 ... NR_VIRQS-1] = -1};
 
 /* IRQ <-> IPI mapping */
 static DEFINE_PER_CPU(int [XEN_NR_IPIS], ipi_to_irq) = {[0 ... XEN_NR_IPIS-1] = -1};
+/* Cache for IPI event channels - needed for hot cpu unplug (avoid RCU usage). */
+static DEFINE_PER_CPU(evtchn_port_t [XEN_NR_IPIS], ipi_to_evtchn) = {[0 ... XEN_NR_IPIS-1] = 0};
 
 /* Event channel distribution data */
 static atomic_t channels_on_cpu[NR_CPUS];
@@ -172,7 +174,7 @@ static int **evtchn_to_irq;
 #ifdef CONFIG_X86
 static unsigned long *pirq_eoi_map;
 #endif
-static bool (*pirq_needs_eoi)(unsigned irq);
+static bool (*pirq_needs_eoi)(struct irq_info *info);
 
 #define EVTCHN_ROW(e)  (e / (PAGE_SIZE/sizeof(**evtchn_to_irq)))
 #define EVTCHN_COL(e)  (e % (PAGE_SIZE/sizeof(**evtchn_to_irq)))
@@ -188,7 +190,6 @@ static struct irq_chip xen_lateeoi_chip;
 static struct irq_chip xen_percpu_chip;
 static struct irq_chip xen_pirq_chip;
 static void enable_dynirq(struct irq_data *data);
-static void disable_dynirq(struct irq_data *data);
 
 static DEFINE_PER_CPU(unsigned int, irq_epoch);
 
@@ -246,15 +247,6 @@ static int set_evtchn_to_irq(evtchn_port_t evtchn, unsigned int irq)
        return 0;
 }
 
-int get_evtchn_to_irq(evtchn_port_t evtchn)
-{
-       if (evtchn >= xen_evtchn_max_channels())
-               return -1;
-       if (evtchn_to_irq[EVTCHN_ROW(evtchn)] == NULL)
-               return -1;
-       return READ_ONCE(evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)]);
-}
-
 /* Get info for IRQ */
 static struct irq_info *info_for_irq(unsigned irq)
 {
@@ -272,6 +264,19 @@ static void set_info_for_irq(unsigned int irq, struct irq_info *info)
                irq_set_chip_data(irq, info);
 }
 
+static struct irq_info *evtchn_to_info(evtchn_port_t evtchn)
+{
+       int irq;
+
+       if (evtchn >= xen_evtchn_max_channels())
+               return NULL;
+       if (evtchn_to_irq[EVTCHN_ROW(evtchn)] == NULL)
+               return NULL;
+       irq = READ_ONCE(evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)]);
+
+       return (irq < 0) ? NULL : info_for_irq(irq);
+}
+
 /* Per CPU channel accounting */
 static void channels_on_cpu_dec(struct irq_info *info)
 {
@@ -298,6 +303,13 @@ static void channels_on_cpu_inc(struct irq_info *info)
        info->is_accounted = 1;
 }
 
+static void xen_irq_free_desc(unsigned int irq)
+{
+       /* Legacy IRQ descriptors are managed by the arch. */
+       if (irq >= nr_legacy_irqs())
+               irq_free_desc(irq);
+}
+
 static void delayed_free_irq(struct work_struct *work)
 {
        struct irq_info *info = container_of(to_rcu_work(work), struct irq_info,
@@ -309,14 +321,11 @@ static void delayed_free_irq(struct work_struct *work)
 
        kfree(info);
 
-       /* Legacy IRQ descriptors are managed by the arch. */
-       if (irq >= nr_legacy_irqs())
-               irq_free_desc(irq);
+       xen_irq_free_desc(irq);
 }
 
 /* Constructors for packed IRQ information. */
 static int xen_irq_info_common_setup(struct irq_info *info,
-                                    unsigned irq,
                                     enum xen_irq_type type,
                                     evtchn_port_t evtchn,
                                     unsigned short cpu)
@@ -326,29 +335,27 @@ static int xen_irq_info_common_setup(struct irq_info *info,
        BUG_ON(info->type != IRQT_UNBOUND && info->type != type);
 
        info->type = type;
-       info->irq = irq;
        info->evtchn = evtchn;
        info->cpu = cpu;
        info->mask_reason = EVT_MASK_REASON_EXPLICIT;
        raw_spin_lock_init(&info->lock);
 
-       ret = set_evtchn_to_irq(evtchn, irq);
+       ret = set_evtchn_to_irq(evtchn, info->irq);
        if (ret < 0)
                return ret;
 
-       irq_clear_status_flags(irq, IRQ_NOREQUEST|IRQ_NOAUTOEN);
+       irq_clear_status_flags(info->irq, IRQ_NOREQUEST | IRQ_NOAUTOEN);
 
        return xen_evtchn_port_setup(evtchn);
 }
 
-static int xen_irq_info_evtchn_setup(unsigned irq,
+static int xen_irq_info_evtchn_setup(struct irq_info *info,
                                     evtchn_port_t evtchn,
                                     struct xenbus_device *dev)
 {
-       struct irq_info *info = info_for_irq(irq);
        int ret;
 
-       ret = xen_irq_info_common_setup(info, irq, IRQT_EVTCHN, evtchn, 0);
+       ret = xen_irq_info_common_setup(info, IRQT_EVTCHN, evtchn, 0);
        info->u.interdomain = dev;
        if (dev)
                atomic_inc(&dev->event_channels);
@@ -356,49 +363,37 @@ static int xen_irq_info_evtchn_setup(unsigned irq,
        return ret;
 }
 
-static int xen_irq_info_ipi_setup(unsigned cpu,
-                                 unsigned irq,
-                                 evtchn_port_t evtchn,
-                                 enum ipi_vector ipi)
+static int xen_irq_info_ipi_setup(struct irq_info *info, unsigned int cpu,
+                                 evtchn_port_t evtchn, enum ipi_vector ipi)
 {
-       struct irq_info *info = info_for_irq(irq);
-
        info->u.ipi = ipi;
 
-       per_cpu(ipi_to_irq, cpu)[ipi] = irq;
+       per_cpu(ipi_to_irq, cpu)[ipi] = info->irq;
+       per_cpu(ipi_to_evtchn, cpu)[ipi] = evtchn;
 
-       return xen_irq_info_common_setup(info, irq, IRQT_IPI, evtchn, 0);
+       return xen_irq_info_common_setup(info, IRQT_IPI, evtchn, 0);
 }
 
-static int xen_irq_info_virq_setup(unsigned cpu,
-                                  unsigned irq,
-                                  evtchn_port_t evtchn,
-                                  unsigned virq)
+static int xen_irq_info_virq_setup(struct irq_info *info, unsigned int cpu,
+                                  evtchn_port_t evtchn, unsigned int virq)
 {
-       struct irq_info *info = info_for_irq(irq);
-
        info->u.virq = virq;
 
-       per_cpu(virq_to_irq, cpu)[virq] = irq;
+       per_cpu(virq_to_irq, cpu)[virq] = info->irq;
 
-       return xen_irq_info_common_setup(info, irq, IRQT_VIRQ, evtchn, 0);
+       return xen_irq_info_common_setup(info, IRQT_VIRQ, evtchn, 0);
 }
 
-static int xen_irq_info_pirq_setup(unsigned irq,
-                                  evtchn_port_t evtchn,
-                                  unsigned pirq,
-                                  unsigned gsi,
-                                  uint16_t domid,
-                                  unsigned char flags)
+static int xen_irq_info_pirq_setup(struct irq_info *info, evtchn_port_t evtchn,
+                                  unsigned int pirq, unsigned int gsi,
+                                  uint16_t domid, unsigned char flags)
 {
-       struct irq_info *info = info_for_irq(irq);
-
        info->u.pirq.pirq = pirq;
        info->u.pirq.gsi = gsi;
        info->u.pirq.domid = domid;
        info->u.pirq.flags = flags;
 
-       return xen_irq_info_common_setup(info, irq, IRQT_PIRQ, evtchn, 0);
+       return xen_irq_info_common_setup(info, IRQT_PIRQ, evtchn, 0);
 }
 
 static void xen_irq_info_cleanup(struct irq_info *info)
@@ -412,7 +407,7 @@ static void xen_irq_info_cleanup(struct irq_info *info)
 /*
  * Accessors for packed IRQ information.
  */
-evtchn_port_t evtchn_from_irq(unsigned irq)
+static evtchn_port_t evtchn_from_irq(unsigned int irq)
 {
        const struct irq_info *info = NULL;
 
@@ -426,64 +421,51 @@ evtchn_port_t evtchn_from_irq(unsigned irq)
 
 unsigned int irq_from_evtchn(evtchn_port_t evtchn)
 {
-       return get_evtchn_to_irq(evtchn);
+       struct irq_info *info = evtchn_to_info(evtchn);
+
+       return info ? info->irq : -1;
 }
 EXPORT_SYMBOL_GPL(irq_from_evtchn);
 
-int irq_from_virq(unsigned int cpu, unsigned int virq)
+int irq_evtchn_from_virq(unsigned int cpu, unsigned int virq,
+                        evtchn_port_t *evtchn)
 {
-       return per_cpu(virq_to_irq, cpu)[virq];
+       int irq = per_cpu(virq_to_irq, cpu)[virq];
+
+       *evtchn = evtchn_from_irq(irq);
+
+       return irq;
 }
 
-static enum ipi_vector ipi_from_irq(unsigned irq)
+static enum ipi_vector ipi_from_irq(struct irq_info *info)
 {
-       struct irq_info *info = info_for_irq(irq);
-
        BUG_ON(info == NULL);
        BUG_ON(info->type != IRQT_IPI);
 
        return info->u.ipi;
 }
 
-static unsigned virq_from_irq(unsigned irq)
+static unsigned int virq_from_irq(struct irq_info *info)
 {
-       struct irq_info *info = info_for_irq(irq);
-
        BUG_ON(info == NULL);
        BUG_ON(info->type != IRQT_VIRQ);
 
        return info->u.virq;
 }
 
-static unsigned pirq_from_irq(unsigned irq)
+static unsigned int pirq_from_irq(struct irq_info *info)
 {
-       struct irq_info *info = info_for_irq(irq);
-
        BUG_ON(info == NULL);
        BUG_ON(info->type != IRQT_PIRQ);
 
        return info->u.pirq.pirq;
 }
 
-static enum xen_irq_type type_from_irq(unsigned irq)
-{
-       return info_for_irq(irq)->type;
-}
-
-static unsigned cpu_from_irq(unsigned irq)
-{
-       return info_for_irq(irq)->cpu;
-}
-
 unsigned int cpu_from_evtchn(evtchn_port_t evtchn)
 {
-       int irq = get_evtchn_to_irq(evtchn);
-       unsigned ret = 0;
-
-       if (irq != -1)
-               ret = cpu_from_irq(irq);
+       struct irq_info *info = evtchn_to_info(evtchn);
 
-       return ret;
+       return info ? info->cpu : 0;
 }
 
 static void do_mask(struct irq_info *info, u8 reason)
@@ -515,36 +497,30 @@ static void do_unmask(struct irq_info *info, u8 reason)
 }
 
 #ifdef CONFIG_X86
-static bool pirq_check_eoi_map(unsigned irq)
+static bool pirq_check_eoi_map(struct irq_info *info)
 {
-       return test_bit(pirq_from_irq(irq), pirq_eoi_map);
+       return test_bit(pirq_from_irq(info), pirq_eoi_map);
 }
 #endif
 
-static bool pirq_needs_eoi_flag(unsigned irq)
+static bool pirq_needs_eoi_flag(struct irq_info *info)
 {
-       struct irq_info *info = info_for_irq(irq);
        BUG_ON(info->type != IRQT_PIRQ);
 
        return info->u.pirq.flags & PIRQ_NEEDS_EOI;
 }
 
-static void bind_evtchn_to_cpu(evtchn_port_t evtchn, unsigned int cpu,
+static void bind_evtchn_to_cpu(struct irq_info *info, unsigned int cpu,
                               bool force_affinity)
 {
-       int irq = get_evtchn_to_irq(evtchn);
-       struct irq_info *info = info_for_irq(irq);
-
-       BUG_ON(irq == -1);
-
        if (IS_ENABLED(CONFIG_SMP) && force_affinity) {
-               struct irq_data *data = irq_get_irq_data(irq);
+               struct irq_data *data = irq_get_irq_data(info->irq);
 
                irq_data_update_affinity(data, cpumask_of(cpu));
                irq_data_update_effective_affinity(data, cpumask_of(cpu));
        }
 
-       xen_evtchn_port_bind_to_cpu(evtchn, cpu, info->cpu);
+       xen_evtchn_port_bind_to_cpu(info->evtchn, cpu, info->cpu);
 
        channels_on_cpu_dec(info);
        info->cpu = cpu;
@@ -601,7 +577,9 @@ static void lateeoi_list_add(struct irq_info *info)
 
        spin_lock_irqsave(&eoi->eoi_list_lock, flags);
 
-       if (list_empty(&eoi->eoi_list)) {
+       elem = list_first_entry_or_null(&eoi->eoi_list, struct irq_info,
+                                       eoi_list);
+       if (!elem || info->eoi_time < elem->eoi_time) {
                list_add(&info->eoi_list, &eoi->eoi_list);
                mod_delayed_work_on(info->eoi_cpu, system_wq,
                                    &eoi->delayed, delay);
@@ -732,50 +710,49 @@ void xen_irq_lateeoi(unsigned int irq, unsigned int eoi_flags)
 }
 EXPORT_SYMBOL_GPL(xen_irq_lateeoi);
 
-static void xen_irq_init(unsigned irq)
+static struct irq_info *xen_irq_init(unsigned int irq)
 {
        struct irq_info *info;
 
        info = kzalloc(sizeof(*info), GFP_KERNEL);
-       if (info == NULL)
-               panic("Unable to allocate metadata for IRQ%d\n", irq);
+       if (info) {
+               info->irq = irq;
+               info->type = IRQT_UNBOUND;
+               info->refcnt = -1;
+               INIT_RCU_WORK(&info->rwork, delayed_free_irq);
 
-       info->type = IRQT_UNBOUND;
-       info->refcnt = -1;
-       INIT_RCU_WORK(&info->rwork, delayed_free_irq);
+               set_info_for_irq(irq, info);
+               /*
+                * Interrupt affinity setting can be immediate. No point
+                * in delaying it until an interrupt is handled.
+                */
+               irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
 
-       set_info_for_irq(irq, info);
-       /*
-        * Interrupt affinity setting can be immediate. No point
-        * in delaying it until an interrupt is handled.
-        */
-       irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
+               INIT_LIST_HEAD(&info->eoi_list);
+               list_add_tail(&info->list, &xen_irq_list_head);
+       }
 
-       INIT_LIST_HEAD(&info->eoi_list);
-       list_add_tail(&info->list, &xen_irq_list_head);
+       return info;
 }
 
-static int __must_check xen_allocate_irqs_dynamic(int nvec)
+static struct irq_info *xen_allocate_irq_dynamic(void)
 {
-       int i, irq = irq_alloc_descs(-1, 0, nvec, -1);
+       int irq = irq_alloc_desc_from(0, -1);
+       struct irq_info *info = NULL;
 
        if (irq >= 0) {
-               for (i = 0; i < nvec; i++)
-                       xen_irq_init(irq + i);
+               info = xen_irq_init(irq);
+               if (!info)
+                       xen_irq_free_desc(irq);
        }
 
-       return irq;
-}
-
-static inline int __must_check xen_allocate_irq_dynamic(void)
-{
-
-       return xen_allocate_irqs_dynamic(1);
+       return info;
 }
 
-static int __must_check xen_allocate_irq_gsi(unsigned gsi)
+static struct irq_info *xen_allocate_irq_gsi(unsigned int gsi)
 {
        int irq;
+       struct irq_info *info;
 
        /*
         * A PV guest has no concept of a GSI (since it has no ACPI
@@ -792,15 +769,15 @@ static int __must_check xen_allocate_irq_gsi(unsigned gsi)
        else
                irq = irq_alloc_desc_at(gsi, -1);
 
-       xen_irq_init(irq);
+       info = xen_irq_init(irq);
+       if (!info)
+               xen_irq_free_desc(irq);
 
-       return irq;
+       return info;
 }
 
-static void xen_free_irq(unsigned irq)
+static void xen_free_irq(struct irq_info *info)
 {
-       struct irq_info *info = info_for_irq(irq);
-
        if (WARN_ON(!info))
                return;
 
@@ -821,14 +798,11 @@ static void event_handler_exit(struct irq_info *info)
        clear_evtchn(info->evtchn);
 }
 
-static void pirq_query_unmask(int irq)
+static void pirq_query_unmask(struct irq_info *info)
 {
        struct physdev_irq_status_query irq_status;
-       struct irq_info *info = info_for_irq(irq);
-
-       BUG_ON(info->type != IRQT_PIRQ);
 
-       irq_status.irq = pirq_from_irq(irq);
+       irq_status.irq = pirq_from_irq(info);
        if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status))
                irq_status.flags = 0;
 
@@ -837,61 +811,81 @@ static void pirq_query_unmask(int irq)
                info->u.pirq.flags |= PIRQ_NEEDS_EOI;
 }
 
-static void eoi_pirq(struct irq_data *data)
+static void do_eoi_pirq(struct irq_info *info)
 {
-       struct irq_info *info = info_for_irq(data->irq);
-       evtchn_port_t evtchn = info ? info->evtchn : 0;
-       struct physdev_eoi eoi = { .irq = pirq_from_irq(data->irq) };
+       struct physdev_eoi eoi = { .irq = pirq_from_irq(info) };
        int rc = 0;
 
-       if (!VALID_EVTCHN(evtchn))
+       if (!VALID_EVTCHN(info->evtchn))
                return;
 
        event_handler_exit(info);
 
-       if (pirq_needs_eoi(data->irq)) {
+       if (pirq_needs_eoi(info)) {
                rc = HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi);
                WARN_ON(rc);
        }
 }
 
+static void eoi_pirq(struct irq_data *data)
+{
+       struct irq_info *info = info_for_irq(data->irq);
+
+       do_eoi_pirq(info);
+}
+
+static void do_disable_dynirq(struct irq_info *info)
+{
+       if (VALID_EVTCHN(info->evtchn))
+               do_mask(info, EVT_MASK_REASON_EXPLICIT);
+}
+
+static void disable_dynirq(struct irq_data *data)
+{
+       struct irq_info *info = info_for_irq(data->irq);
+
+       if (info)
+               do_disable_dynirq(info);
+}
+
 static void mask_ack_pirq(struct irq_data *data)
 {
-       disable_dynirq(data);
-       eoi_pirq(data);
+       struct irq_info *info = info_for_irq(data->irq);
+
+       if (info) {
+               do_disable_dynirq(info);
+               do_eoi_pirq(info);
+       }
 }
 
-static unsigned int __startup_pirq(unsigned int irq)
+static unsigned int __startup_pirq(struct irq_info *info)
 {
        struct evtchn_bind_pirq bind_pirq;
-       struct irq_info *info = info_for_irq(irq);
-       evtchn_port_t evtchn = evtchn_from_irq(irq);
+       evtchn_port_t evtchn = info->evtchn;
        int rc;
 
-       BUG_ON(info->type != IRQT_PIRQ);
-
        if (VALID_EVTCHN(evtchn))
                goto out;
 
-       bind_pirq.pirq = pirq_from_irq(irq);
+       bind_pirq.pirq = pirq_from_irq(info);
        /* NB. We are happy to share unless we are probing. */
        bind_pirq.flags = info->u.pirq.flags & PIRQ_SHAREABLE ?
                                        BIND_PIRQ__WILL_SHARE : 0;
        rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_pirq, &bind_pirq);
        if (rc != 0) {
-               pr_warn("Failed to obtain physical IRQ %d\n", irq);
+               pr_warn("Failed to obtain physical IRQ %d\n", info->irq);
                return 0;
        }
        evtchn = bind_pirq.port;
 
-       pirq_query_unmask(irq);
+       pirq_query_unmask(info);
 
-       rc = set_evtchn_to_irq(evtchn, irq);
+       rc = set_evtchn_to_irq(evtchn, info->irq);
        if (rc)
                goto err;
 
        info->evtchn = evtchn;
-       bind_evtchn_to_cpu(evtchn, 0, false);
+       bind_evtchn_to_cpu(info, 0, false);
 
        rc = xen_evtchn_port_setup(evtchn);
        if (rc)
@@ -900,26 +894,28 @@ static unsigned int __startup_pirq(unsigned int irq)
 out:
        do_unmask(info, EVT_MASK_REASON_EXPLICIT);
 
-       eoi_pirq(irq_get_irq_data(irq));
+       do_eoi_pirq(info);
 
        return 0;
 
 err:
-       pr_err("irq%d: Failed to set port to irq mapping (%d)\n", irq, rc);
+       pr_err("irq%d: Failed to set port to irq mapping (%d)\n", info->irq,
+              rc);
        xen_evtchn_close(evtchn);
        return 0;
 }
 
 static unsigned int startup_pirq(struct irq_data *data)
 {
-       return __startup_pirq(data->irq);
+       struct irq_info *info = info_for_irq(data->irq);
+
+       return __startup_pirq(info);
 }
 
 static void shutdown_pirq(struct irq_data *data)
 {
-       unsigned int irq = data->irq;
-       struct irq_info *info = info_for_irq(irq);
-       evtchn_port_t evtchn = evtchn_from_irq(irq);
+       struct irq_info *info = info_for_irq(data->irq);
+       evtchn_port_t evtchn = info->evtchn;
 
        BUG_ON(info->type != IRQT_PIRQ);
 
@@ -957,10 +953,14 @@ int xen_irq_from_gsi(unsigned gsi)
 }
 EXPORT_SYMBOL_GPL(xen_irq_from_gsi);
 
-static void __unbind_from_irq(unsigned int irq)
+static void __unbind_from_irq(struct irq_info *info, unsigned int irq)
 {
-       evtchn_port_t evtchn = evtchn_from_irq(irq);
-       struct irq_info *info = info_for_irq(irq);
+       evtchn_port_t evtchn;
+
+       if (!info) {
+               xen_irq_free_desc(irq);
+               return;
+       }
 
        if (info->refcnt > 0) {
                info->refcnt--;
@@ -968,19 +968,22 @@ static void __unbind_from_irq(unsigned int irq)
                        return;
        }
 
+       evtchn = info->evtchn;
+
        if (VALID_EVTCHN(evtchn)) {
-               unsigned int cpu = cpu_from_irq(irq);
+               unsigned int cpu = info->cpu;
                struct xenbus_device *dev;
 
                if (!info->is_static)
                        xen_evtchn_close(evtchn);
 
-               switch (type_from_irq(irq)) {
+               switch (info->type) {
                case IRQT_VIRQ:
-                       per_cpu(virq_to_irq, cpu)[virq_from_irq(irq)] = -1;
+                       per_cpu(virq_to_irq, cpu)[virq_from_irq(info)] = -1;
                        break;
                case IRQT_IPI:
-                       per_cpu(ipi_to_irq, cpu)[ipi_from_irq(irq)] = -1;
+                       per_cpu(ipi_to_irq, cpu)[ipi_from_irq(info)] = -1;
+                       per_cpu(ipi_to_evtchn, cpu)[ipi_from_irq(info)] = 0;
                        break;
                case IRQT_EVTCHN:
                        dev = info->u.interdomain;
@@ -994,7 +997,7 @@ static void __unbind_from_irq(unsigned int irq)
                xen_irq_info_cleanup(info);
        }
 
-       xen_free_irq(irq);
+       xen_free_irq(info);
 }
 
 /*
@@ -1010,24 +1013,24 @@ static void __unbind_from_irq(unsigned int irq)
 int xen_bind_pirq_gsi_to_irq(unsigned gsi,
                             unsigned pirq, int shareable, char *name)
 {
-       int irq;
+       struct irq_info *info;
        struct physdev_irq irq_op;
        int ret;
 
        mutex_lock(&irq_mapping_update_lock);
 
-       irq = xen_irq_from_gsi(gsi);
-       if (irq != -1) {
+       ret = xen_irq_from_gsi(gsi);
+       if (ret != -1) {
                pr_info("%s: returning irq %d for gsi %u\n",
-                       __func__, irq, gsi);
+                       __func__, ret, gsi);
                goto out;
        }
 
-       irq = xen_allocate_irq_gsi(gsi);
-       if (irq < 0)
+       info = xen_allocate_irq_gsi(gsi);
+       if (!info)
                goto out;
 
-       irq_op.irq = irq;
+       irq_op.irq = info->irq;
        irq_op.vector = 0;
 
        /* Only the privileged domain can do this. For non-priv, the pcifront
@@ -1035,20 +1038,19 @@ int xen_bind_pirq_gsi_to_irq(unsigned gsi,
         * this in the priv domain. */
        if (xen_initial_domain() &&
            HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) {
-               xen_free_irq(irq);
-               irq = -ENOSPC;
+               xen_free_irq(info);
+               ret = -ENOSPC;
                goto out;
        }
 
-       ret = xen_irq_info_pirq_setup(irq, 0, pirq, gsi, DOMID_SELF,
+       ret = xen_irq_info_pirq_setup(info, 0, pirq, gsi, DOMID_SELF,
                               shareable ? PIRQ_SHAREABLE : 0);
        if (ret < 0) {
-               __unbind_from_irq(irq);
-               irq = ret;
+               __unbind_from_irq(info, info->irq);
                goto out;
        }
 
-       pirq_query_unmask(irq);
+       pirq_query_unmask(info);
        /* We try to use the handler with the appropriate semantic for the
         * type of interrupt: if the interrupt is an edge triggered
         * interrupt we use handle_edge_irq.
@@ -1065,16 +1067,18 @@ int xen_bind_pirq_gsi_to_irq(unsigned gsi,
         * is the right choice either way.
         */
        if (shareable)
-               irq_set_chip_and_handler_name(irq, &xen_pirq_chip,
+               irq_set_chip_and_handler_name(info->irq, &xen_pirq_chip,
                                handle_fasteoi_irq, name);
        else
-               irq_set_chip_and_handler_name(irq, &xen_pirq_chip,
+               irq_set_chip_and_handler_name(info->irq, &xen_pirq_chip,
                                handle_edge_irq, name);
 
+       ret = info->irq;
+
 out:
        mutex_unlock(&irq_mapping_update_lock);
 
-       return irq;
+       return ret;
 }
 
 #ifdef CONFIG_PCI_MSI
@@ -1096,17 +1100,22 @@ int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
                             int pirq, int nvec, const char *name, domid_t domid)
 {
        int i, irq, ret;
+       struct irq_info *info;
 
        mutex_lock(&irq_mapping_update_lock);
 
-       irq = xen_allocate_irqs_dynamic(nvec);
+       irq = irq_alloc_descs(-1, 0, nvec, -1);
        if (irq < 0)
                goto out;
 
        for (i = 0; i < nvec; i++) {
+               info = xen_irq_init(irq + i);
+               if (!info)
+                       goto error_irq;
+
                irq_set_chip_and_handler_name(irq + i, &xen_pirq_chip, handle_edge_irq, name);
 
-               ret = xen_irq_info_pirq_setup(irq + i, 0, pirq + i, 0, domid,
+               ret = xen_irq_info_pirq_setup(info, 0, pirq + i, 0, domid,
                                              i == 0 ? 0 : PIRQ_MSI_GROUP);
                if (ret < 0)
                        goto error_irq;
@@ -1118,9 +1127,12 @@ int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
 out:
        mutex_unlock(&irq_mapping_update_lock);
        return irq;
+
 error_irq:
-       while (nvec--)
-               __unbind_from_irq(irq + nvec);
+       while (nvec--) {
+               info = info_for_irq(irq + nvec);
+               __unbind_from_irq(info, irq + nvec);
+       }
        mutex_unlock(&irq_mapping_update_lock);
        return ret;
 }
@@ -1156,67 +1168,45 @@ int xen_destroy_irq(int irq)
                }
        }
 
-       xen_free_irq(irq);
+       xen_free_irq(info);
 
 out:
        mutex_unlock(&irq_mapping_update_lock);
        return rc;
 }
 
-int xen_irq_from_pirq(unsigned pirq)
-{
-       int irq;
-
-       struct irq_info *info;
-
-       mutex_lock(&irq_mapping_update_lock);
-
-       list_for_each_entry(info, &xen_irq_list_head, list) {
-               if (info->type != IRQT_PIRQ)
-                       continue;
-               irq = info->irq;
-               if (info->u.pirq.pirq == pirq)
-                       goto out;
-       }
-       irq = -1;
-out:
-       mutex_unlock(&irq_mapping_update_lock);
-
-       return irq;
-}
-
-
 int xen_pirq_from_irq(unsigned irq)
 {
-       return pirq_from_irq(irq);
+       struct irq_info *info = info_for_irq(irq);
+
+       return pirq_from_irq(info);
 }
 EXPORT_SYMBOL_GPL(xen_pirq_from_irq);
 
 static int bind_evtchn_to_irq_chip(evtchn_port_t evtchn, struct irq_chip *chip,
                                   struct xenbus_device *dev)
 {
-       int irq;
-       int ret;
+       int ret = -ENOMEM;
+       struct irq_info *info;
 
        if (evtchn >= xen_evtchn_max_channels())
                return -ENOMEM;
 
        mutex_lock(&irq_mapping_update_lock);
 
-       irq = get_evtchn_to_irq(evtchn);
+       info = evtchn_to_info(evtchn);
 
-       if (irq == -1) {
-               irq = xen_allocate_irq_dynamic();
-               if (irq < 0)
+       if (!info) {
+               info = xen_allocate_irq_dynamic();
+               if (!info)
                        goto out;
 
-               irq_set_chip_and_handler_name(irq, chip,
+               irq_set_chip_and_handler_name(info->irq, chip,
                                              handle_edge_irq, "event");
 
-               ret = xen_irq_info_evtchn_setup(irq, evtchn, dev);
+               ret = xen_irq_info_evtchn_setup(info, evtchn, dev);
                if (ret < 0) {
-                       __unbind_from_irq(irq);
-                       irq = ret;
+                       __unbind_from_irq(info, info->irq);
                        goto out;
                }
                /*
@@ -1226,17 +1216,17 @@ static int bind_evtchn_to_irq_chip(evtchn_port_t evtchn, struct irq_chip *chip,
                 * affinity setting is not invoked on them so nothing would
                 * bind the channel.
                 */
-               bind_evtchn_to_cpu(evtchn, 0, false);
-       } else {
-               struct irq_info *info = info_for_irq(irq);
-               if (!WARN_ON(!info || info->type != IRQT_EVTCHN))
-                       info->refcnt++;
+               bind_evtchn_to_cpu(info, 0, false);
+       } else if (!WARN_ON(info->type != IRQT_EVTCHN)) {
+               info->refcnt++;
        }
 
+       ret = info->irq;
+
 out:
        mutex_unlock(&irq_mapping_update_lock);
 
-       return irq;
+       return ret;
 }
 
 int bind_evtchn_to_irq(evtchn_port_t evtchn)
@@ -1255,18 +1245,19 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
 {
        struct evtchn_bind_ipi bind_ipi;
        evtchn_port_t evtchn;
-       int ret, irq;
+       struct irq_info *info;
+       int ret;
 
        mutex_lock(&irq_mapping_update_lock);
 
-       irq = per_cpu(ipi_to_irq, cpu)[ipi];
+       ret = per_cpu(ipi_to_irq, cpu)[ipi];
 
-       if (irq == -1) {
-               irq = xen_allocate_irq_dynamic();
-               if (irq < 0)
+       if (ret == -1) {
+               info = xen_allocate_irq_dynamic();
+               if (!info)
                        goto out;
 
-               irq_set_chip_and_handler_name(irq, &xen_percpu_chip,
+               irq_set_chip_and_handler_name(info->irq, &xen_percpu_chip,
                                              handle_percpu_irq, "ipi");
 
                bind_ipi.vcpu = xen_vcpu_nr(cpu);
@@ -1275,25 +1266,25 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
                        BUG();
                evtchn = bind_ipi.port;
 
-               ret = xen_irq_info_ipi_setup(cpu, irq, evtchn, ipi);
+               ret = xen_irq_info_ipi_setup(info, cpu, evtchn, ipi);
                if (ret < 0) {
-                       __unbind_from_irq(irq);
-                       irq = ret;
+                       __unbind_from_irq(info, info->irq);
                        goto out;
                }
                /*
                 * Force the affinity mask to the target CPU so proc shows
                 * the correct target.
                 */
-               bind_evtchn_to_cpu(evtchn, cpu, true);
+               bind_evtchn_to_cpu(info, cpu, true);
+               ret = info->irq;
        } else {
-               struct irq_info *info = info_for_irq(irq);
+               info = info_for_irq(ret);
                WARN_ON(info == NULL || info->type != IRQT_IPI);
        }
 
  out:
        mutex_unlock(&irq_mapping_update_lock);
-       return irq;
+       return ret;
 }
 
 static int bind_interdomain_evtchn_to_irq_chip(struct xenbus_device *dev,
@@ -1361,22 +1352,23 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu, bool percpu)
 {
        struct evtchn_bind_virq bind_virq;
        evtchn_port_t evtchn = 0;
-       int irq, ret;
+       struct irq_info *info;
+       int ret;
 
        mutex_lock(&irq_mapping_update_lock);
 
-       irq = per_cpu(virq_to_irq, cpu)[virq];
+       ret = per_cpu(virq_to_irq, cpu)[virq];
 
-       if (irq == -1) {
-               irq = xen_allocate_irq_dynamic();
-               if (irq < 0)
+       if (ret == -1) {
+               info = xen_allocate_irq_dynamic();
+               if (!info)
                        goto out;
 
                if (percpu)
-                       irq_set_chip_and_handler_name(irq, &xen_percpu_chip,
+                       irq_set_chip_and_handler_name(info->irq, &xen_percpu_chip,
                                                      handle_percpu_irq, "virq");
                else
-                       irq_set_chip_and_handler_name(irq, &xen_dynamic_chip,
+                       irq_set_chip_and_handler_name(info->irq, &xen_dynamic_chip,
                                                      handle_edge_irq, "virq");
 
                bind_virq.virq = virq;
@@ -1391,10 +1383,9 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu, bool percpu)
                        BUG_ON(ret < 0);
                }
 
-               ret = xen_irq_info_virq_setup(cpu, irq, evtchn, virq);
+               ret = xen_irq_info_virq_setup(info, cpu, evtchn, virq);
                if (ret < 0) {
-                       __unbind_from_irq(irq);
-                       irq = ret;
+                       __unbind_from_irq(info, info->irq);
                        goto out;
                }
 
@@ -1402,22 +1393,26 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu, bool percpu)
                 * Force the affinity mask for percpu interrupts so proc
                 * shows the correct target.
                 */
-               bind_evtchn_to_cpu(evtchn, cpu, percpu);
+               bind_evtchn_to_cpu(info, cpu, percpu);
+               ret = info->irq;
        } else {
-               struct irq_info *info = info_for_irq(irq);
+               info = info_for_irq(ret);
                WARN_ON(info == NULL || info->type != IRQT_VIRQ);
        }
 
 out:
        mutex_unlock(&irq_mapping_update_lock);
 
-       return irq;
+       return ret;
 }
 
 static void unbind_from_irq(unsigned int irq)
 {
+       struct irq_info *info;
+
        mutex_lock(&irq_mapping_update_lock);
-       __unbind_from_irq(irq);
+       info = info_for_irq(irq);
+       __unbind_from_irq(info, irq);
        mutex_unlock(&irq_mapping_update_lock);
 }
 
@@ -1568,13 +1563,7 @@ EXPORT_SYMBOL_GPL(xen_set_irq_priority);
 
 int evtchn_make_refcounted(evtchn_port_t evtchn, bool is_static)
 {
-       int irq = get_evtchn_to_irq(evtchn);
-       struct irq_info *info;
-
-       if (irq == -1)
-               return -ENOENT;
-
-       info = info_for_irq(irq);
+       struct irq_info *info = evtchn_to_info(evtchn);
 
        if (!info)
                return -ENOENT;
@@ -1590,7 +1579,6 @@ EXPORT_SYMBOL_GPL(evtchn_make_refcounted);
 
 int evtchn_get(evtchn_port_t evtchn)
 {
-       int irq;
        struct irq_info *info;
        int err = -ENOENT;
 
@@ -1599,11 +1587,7 @@ int evtchn_get(evtchn_port_t evtchn)
 
        mutex_lock(&irq_mapping_update_lock);
 
-       irq = get_evtchn_to_irq(evtchn);
-       if (irq == -1)
-               goto done;
-
-       info = info_for_irq(irq);
+       info = evtchn_to_info(evtchn);
 
        if (!info)
                goto done;
@@ -1623,16 +1607,17 @@ EXPORT_SYMBOL_GPL(evtchn_get);
 
 void evtchn_put(evtchn_port_t evtchn)
 {
-       int irq = get_evtchn_to_irq(evtchn);
-       if (WARN_ON(irq == -1))
+       struct irq_info *info = evtchn_to_info(evtchn);
+
+       if (WARN_ON(!info))
                return;
-       unbind_from_irq(irq);
+       unbind_from_irq(info->irq);
 }
 EXPORT_SYMBOL_GPL(evtchn_put);
 
 void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector)
 {
-       int irq;
+       evtchn_port_t evtchn;
 
 #ifdef CONFIG_X86
        if (unlikely(vector == XEN_NMI_VECTOR)) {
@@ -1643,9 +1628,9 @@ void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector)
                return;
        }
 #endif
-       irq = per_cpu(ipi_to_irq, cpu)[vector];
-       BUG_ON(irq < 0);
-       notify_remote_via_irq(irq);
+       evtchn = per_cpu(ipi_to_evtchn, cpu)[vector];
+       BUG_ON(evtchn == 0);
+       notify_remote_via_evtchn(evtchn);
 }
 
 struct evtchn_loop_ctrl {
@@ -1656,12 +1641,10 @@ struct evtchn_loop_ctrl {
 
 void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl)
 {
-       int irq;
-       struct irq_info *info;
+       struct irq_info *info = evtchn_to_info(port);
        struct xenbus_device *dev;
 
-       irq = get_evtchn_to_irq(port);
-       if (irq == -1)
+       if (!info)
                return;
 
        /*
@@ -1686,7 +1669,6 @@ void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl)
                }
        }
 
-       info = info_for_irq(irq);
        if (xchg_acquire(&info->is_active, 1))
                return;
 
@@ -1700,7 +1682,7 @@ void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl)
                info->eoi_time = get_jiffies_64() + event_eoi_delay;
        }
 
-       generic_handle_irq(irq);
+       generic_handle_irq(info->irq);
 }
 
 int xen_evtchn_do_upcall(void)
@@ -1758,16 +1740,17 @@ void rebind_evtchn_irq(evtchn_port_t evtchn, int irq)
        mutex_lock(&irq_mapping_update_lock);
 
        /* After resume the irq<->evtchn mappings are all cleared out */
-       BUG_ON(get_evtchn_to_irq(evtchn) != -1);
+       BUG_ON(evtchn_to_info(evtchn));
        /* Expect irq to have been bound before,
           so there should be a proper type */
        BUG_ON(info->type == IRQT_UNBOUND);
 
-       (void)xen_irq_info_evtchn_setup(irq, evtchn, NULL);
+       info->irq = irq;
+       (void)xen_irq_info_evtchn_setup(info, evtchn, NULL);
 
        mutex_unlock(&irq_mapping_update_lock);
 
-       bind_evtchn_to_cpu(evtchn, info->cpu, false);
+       bind_evtchn_to_cpu(info, info->cpu, false);
 
        /* Unmask the event channel. */
        enable_irq(irq);
@@ -1801,7 +1784,7 @@ static int xen_rebind_evtchn_to_cpu(struct irq_info *info, unsigned int tcpu)
         * it, but don't do the xenlinux-level rebind in that case.
         */
        if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0)
-               bind_evtchn_to_cpu(evtchn, tcpu, false);
+               bind_evtchn_to_cpu(info, tcpu, false);
 
        do_unmask(info, EVT_MASK_REASON_TEMPORARY);
 
@@ -1858,28 +1841,30 @@ static void enable_dynirq(struct irq_data *data)
                do_unmask(info, EVT_MASK_REASON_EXPLICIT);
 }
 
-static void disable_dynirq(struct irq_data *data)
+static void do_ack_dynirq(struct irq_info *info)
 {
-       struct irq_info *info = info_for_irq(data->irq);
-       evtchn_port_t evtchn = info ? info->evtchn : 0;
+       evtchn_port_t evtchn = info->evtchn;
 
        if (VALID_EVTCHN(evtchn))
-               do_mask(info, EVT_MASK_REASON_EXPLICIT);
+               event_handler_exit(info);
 }
 
 static void ack_dynirq(struct irq_data *data)
 {
        struct irq_info *info = info_for_irq(data->irq);
-       evtchn_port_t evtchn = info ? info->evtchn : 0;
 
-       if (VALID_EVTCHN(evtchn))
-               event_handler_exit(info);
+       if (info)
+               do_ack_dynirq(info);
 }
 
 static void mask_ack_dynirq(struct irq_data *data)
 {
-       disable_dynirq(data);
-       ack_dynirq(data);
+       struct irq_info *info = info_for_irq(data->irq);
+
+       if (info) {
+               do_disable_dynirq(info);
+               do_ack_dynirq(info);
+       }
 }
 
 static void lateeoi_ack_dynirq(struct irq_data *data)
@@ -1952,13 +1937,13 @@ static void restore_pirqs(void)
                if (rc) {
                        pr_warn("xen map irq failed gsi=%d irq=%d pirq=%d rc=%d\n",
                                gsi, irq, pirq, rc);
-                       xen_free_irq(irq);
+                       xen_free_irq(info);
                        continue;
                }
 
                printk(KERN_DEBUG "xen: --> irq=%d, pirq=%d\n", irq, map_irq.pirq);
 
-               __startup_pirq(irq);
+               __startup_pirq(info);
        }
 }
 
@@ -1966,13 +1951,15 @@ static void restore_cpu_virqs(unsigned int cpu)
 {
        struct evtchn_bind_virq bind_virq;
        evtchn_port_t evtchn;
+       struct irq_info *info;
        int virq, irq;
 
        for (virq = 0; virq < NR_VIRQS; virq++) {
                if ((irq = per_cpu(virq_to_irq, cpu)[virq]) == -1)
                        continue;
+               info = info_for_irq(irq);
 
-               BUG_ON(virq_from_irq(irq) != virq);
+               BUG_ON(virq_from_irq(info) != virq);
 
                /* Get a new binding from Xen. */
                bind_virq.virq = virq;
@@ -1983,9 +1970,9 @@ static void restore_cpu_virqs(unsigned int cpu)
                evtchn = bind_virq.port;
 
                /* Record the new mapping. */
-               (void)xen_irq_info_virq_setup(cpu, irq, evtchn, virq);
+               xen_irq_info_virq_setup(info, cpu, evtchn, virq);
                /* The affinity mask is still valid */
-               bind_evtchn_to_cpu(evtchn, cpu, false);
+               bind_evtchn_to_cpu(info, cpu, false);
        }
 }
 
@@ -1993,13 +1980,15 @@ static void restore_cpu_ipis(unsigned int cpu)
 {
        struct evtchn_bind_ipi bind_ipi;
        evtchn_port_t evtchn;
+       struct irq_info *info;
        int ipi, irq;
 
        for (ipi = 0; ipi < XEN_NR_IPIS; ipi++) {
                if ((irq = per_cpu(ipi_to_irq, cpu)[ipi]) == -1)
                        continue;
+               info = info_for_irq(irq);
 
-               BUG_ON(ipi_from_irq(irq) != ipi);
+               BUG_ON(ipi_from_irq(info) != ipi);
 
                /* Get a new binding from Xen. */
                bind_ipi.vcpu = xen_vcpu_nr(cpu);
@@ -2009,9 +1998,9 @@ static void restore_cpu_ipis(unsigned int cpu)
                evtchn = bind_ipi.port;
 
                /* Record the new mapping. */
-               (void)xen_irq_info_ipi_setup(cpu, irq, evtchn, ipi);
+               xen_irq_info_ipi_setup(info, cpu, evtchn, ipi);
                /* The affinity mask is still valid */
-               bind_evtchn_to_cpu(evtchn, cpu, false);
+               bind_evtchn_to_cpu(info, cpu, false);
        }
 }
 
@@ -2025,13 +2014,6 @@ void xen_clear_irq_pending(int irq)
                event_handler_exit(info);
 }
 EXPORT_SYMBOL(xen_clear_irq_pending);
-void xen_set_irq_pending(int irq)
-{
-       evtchn_port_t evtchn = evtchn_from_irq(irq);
-
-       if (VALID_EVTCHN(evtchn))
-               set_evtchn(evtchn);
-}
 
 bool xen_test_irq_pending(int irq)
 {
index 4d3398e..19ae316 100644 (file)
@@ -33,7 +33,6 @@ struct evtchn_ops {
 
 extern const struct evtchn_ops *evtchn_ops;
 
-int get_evtchn_to_irq(evtchn_port_t evtchn);
 void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl);
 
 unsigned int cpu_from_evtchn(evtchn_port_t evtchn);
index b3e3d1b..5086552 100644 (file)
@@ -47,6 +47,9 @@
 #include <asm/xen/hypervisor.h>
 #include <asm/xen/hypercall.h>
 
+#ifdef CONFIG_ACPI
+#include <acpi/processor.h>
+#endif
 
 /*
  * @cpu_id: Xen physical cpu logic number
@@ -400,4 +403,23 @@ bool __init xen_processor_present(uint32_t acpi_id)
 
        return online;
 }
+
+void xen_sanitize_proc_cap_bits(uint32_t *cap)
+{
+       struct xen_platform_op op = {
+               .cmd                    = XENPF_set_processor_pminfo,
+               .u.set_pminfo.id        = -1,
+               .u.set_pminfo.type      = XEN_PM_PDC,
+       };
+       u32 buf[3] = { ACPI_PDC_REVISION_ID, 1, *cap };
+       int ret;
+
+       set_xen_guest_handle(op.u.set_pminfo.pdc, buf);
+       ret = HYPERVISOR_platform_op(&op);
+       if (ret)
+               pr_err("sanitize of _PDC buffer bits from Xen failed: %d\n",
+                      ret);
+       else
+               *cap = buf[2];
+}
 #endif
index 1ce7f3c..0eb337a 100644 (file)
@@ -1115,7 +1115,7 @@ struct privcmd_kernel_ioreq {
        spinlock_t lock; /* Protects ioeventfds list */
        struct list_head ioeventfds;
        struct list_head list;
-       struct ioreq_port ports[0];
+       struct ioreq_port ports[] __counted_by(vcpus);
 };
 
 static irqreturn_t ioeventfd_interrupt(int irq, void *dev_id)
index 946bd56..0e6c6c2 100644 (file)
@@ -405,4 +405,5 @@ const struct dma_map_ops xen_swiotlb_dma_ops = {
        .get_sgtable = dma_common_get_sgtable,
        .alloc_pages = dma_common_alloc_pages,
        .free_pages = dma_common_free_pages,
+       .max_mapping_size = swiotlb_max_mapping_size,
 };
index b52e0fa..223870a 100644 (file)
@@ -21,7 +21,7 @@
 
 #include <xen/xen-front-pgdir-shbuf.h>
 
-/**
+/*
  * This structure represents the structure of a shared page
  * that contains grant references to the pages of the shared
  * buffer. This structure is common to many Xen para-virtualized
@@ -33,7 +33,7 @@ struct xen_page_directory {
        grant_ref_t gref[]; /* Variable length */
 };
 
-/**
+/*
  * Shared buffer ops which are differently implemented
  * depending on the allocation mode, e.g. if the buffer
  * is allocated by the corresponding backend or frontend.
@@ -61,7 +61,7 @@ struct xen_front_pgdir_shbuf_ops {
        int (*unmap)(struct xen_front_pgdir_shbuf *buf);
 };
 
-/**
+/*
  * Get granted reference to the very first page of the
  * page directory. Usually this is passed to the backend,
  * so it can find/fill the grant references to the buffer's
@@ -81,7 +81,7 @@ xen_front_pgdir_shbuf_get_dir_start(struct xen_front_pgdir_shbuf *buf)
 }
 EXPORT_SYMBOL_GPL(xen_front_pgdir_shbuf_get_dir_start);
 
-/**
+/*
  * Map granted references of the shared buffer.
  *
  * Depending on the shared buffer mode of allocation
@@ -102,7 +102,7 @@ int xen_front_pgdir_shbuf_map(struct xen_front_pgdir_shbuf *buf)
 }
 EXPORT_SYMBOL_GPL(xen_front_pgdir_shbuf_map);
 
-/**
+/*
  * Unmap granted references of the shared buffer.
  *
  * Depending on the shared buffer mode of allocation
@@ -123,7 +123,7 @@ int xen_front_pgdir_shbuf_unmap(struct xen_front_pgdir_shbuf *buf)
 }
 EXPORT_SYMBOL_GPL(xen_front_pgdir_shbuf_unmap);
 
-/**
+/*
  * Free all the resources of the shared buffer.
  *
  * \param buf shared buffer which resources to be freed.
@@ -150,7 +150,7 @@ EXPORT_SYMBOL_GPL(xen_front_pgdir_shbuf_free);
                                 offsetof(struct xen_page_directory, \
                                          gref)) / sizeof(grant_ref_t))
 
-/**
+/*
  * Get the number of pages the page directory consumes itself.
  *
  * \param buf shared buffer.
@@ -160,7 +160,7 @@ static int get_num_pages_dir(struct xen_front_pgdir_shbuf *buf)
        return DIV_ROUND_UP(buf->num_pages, XEN_NUM_GREFS_PER_PAGE);
 }
 
-/**
+/*
  * Calculate the number of grant references needed to share the buffer
  * and its pages when backend allocates the buffer.
  *
@@ -172,7 +172,7 @@ static void backend_calc_num_grefs(struct xen_front_pgdir_shbuf *buf)
        buf->num_grefs = get_num_pages_dir(buf);
 }
 
-/**
+/*
  * Calculate the number of grant references needed to share the buffer
  * and its pages when frontend allocates the buffer.
  *
@@ -190,7 +190,7 @@ static void guest_calc_num_grefs(struct xen_front_pgdir_shbuf *buf)
 #define xen_page_to_vaddr(page) \
        ((uintptr_t)pfn_to_kaddr(page_to_xen_pfn(page)))
 
-/**
+/*
  * Unmap the buffer previously mapped with grant references
  * provided by the backend.
  *
@@ -238,7 +238,7 @@ static int backend_unmap(struct xen_front_pgdir_shbuf *buf)
        return ret;
 }
 
-/**
+/*
  * Map the buffer with grant references provided by the backend.
  *
  * \param buf shared buffer.
@@ -320,7 +320,7 @@ static int backend_map(struct xen_front_pgdir_shbuf *buf)
        return ret;
 }
 
-/**
+/*
  * Fill page directory with grant references to the pages of the
  * page directory itself.
  *
@@ -350,7 +350,7 @@ static void backend_fill_page_dir(struct xen_front_pgdir_shbuf *buf)
        page_dir->gref_dir_next_page = XEN_GREF_LIST_END;
 }
 
-/**
+/*
  * Fill page directory with grant references to the pages of the
  * page directory and the buffer we share with the backend.
  *
@@ -389,7 +389,7 @@ static void guest_fill_page_dir(struct xen_front_pgdir_shbuf *buf)
        }
 }
 
-/**
+/*
  * Grant references to the frontend's buffer pages.
  *
  * These will be shared with the backend, so it can
@@ -418,7 +418,7 @@ static int guest_grant_refs_for_buffer(struct xen_front_pgdir_shbuf *buf,
        return 0;
 }
 
-/**
+/*
  * Grant all the references needed to share the buffer.
  *
  * Grant references to the page directory pages and, if
@@ -466,7 +466,7 @@ static int grant_references(struct xen_front_pgdir_shbuf *buf)
        return 0;
 }
 
-/**
+/*
  * Allocate all required structures to mange shared buffer.
  *
  * \param buf shared buffer.
@@ -506,7 +506,7 @@ static const struct xen_front_pgdir_shbuf_ops local_ops = {
        .grant_refs_for_buffer = guest_grant_refs_for_buffer,
 };
 
-/**
+/*
  * Allocate a new instance of a shared buffer.
  *
  * \param cfg configuration to be used while allocating a new shared buffer.
index 4d04ef2..1fa8cf2 100644 (file)
@@ -132,8 +132,8 @@ static int afs_probe_cell_name(struct dentry *dentry)
 
        ret = dns_query(net->net, "afsdb", name, len, "srv=1",
                        NULL, NULL, false);
-       if (ret == -ENODATA)
-               ret = -EDESTADDRREQ;
+       if (ret == -ENODATA || ret == -ENOKEY)
+               ret = -ENOENT;
        return ret;
 }
 
index c9cef37..a812952 100644 (file)
@@ -553,6 +553,7 @@ struct afs_server_entry {
 };
 
 struct afs_server_list {
+       struct rcu_head         rcu;
        afs_volid_t             vids[AFS_MAXTYPES]; /* Volume IDs */
        refcount_t              usage;
        unsigned char           nr_servers;
index ed90567..b59896b 100644 (file)
@@ -17,7 +17,7 @@ void afs_put_serverlist(struct afs_net *net, struct afs_server_list *slist)
                for (i = 0; i < slist->nr_servers; i++)
                        afs_unuse_server(net, slist->servers[i].server,
                                         afs_server_trace_put_slist);
-               kfree(slist);
+               kfree_rcu(slist, rcu);
        }
 }
 
index 95d7130..a01a0fb 100644 (file)
@@ -407,6 +407,10 @@ static int afs_validate_fc(struct fs_context *fc)
                        return PTR_ERR(volume);
 
                ctx->volume = volume;
+               if (volume->type != AFSVL_RWVOL) {
+                       ctx->flock_mode = afs_flock_mode_local;
+                       fc->sb_flags |= SB_RDONLY;
+               }
        }
 
        return 0;
index 488e584..eb415ce 100644 (file)
@@ -58,6 +58,12 @@ static bool afs_start_vl_iteration(struct afs_vl_cursor *vc)
                }
 
                /* Status load is ordered after lookup counter load */
+               if (cell->dns_status == DNS_LOOKUP_GOT_NOT_FOUND) {
+                       pr_warn("No record of cell %s\n", cell->name);
+                       vc->error = -ENOENT;
+                       return false;
+               }
+
                if (cell->dns_source == DNS_RECORD_UNAVAILABLE) {
                        vc->error = -EDESTADDRREQ;
                        return false;
@@ -285,6 +291,7 @@ failed:
  */
 static void afs_vl_dump_edestaddrreq(const struct afs_vl_cursor *vc)
 {
+       struct afs_cell *cell = vc->cell;
        static int count;
        int i;
 
@@ -294,6 +301,9 @@ static void afs_vl_dump_edestaddrreq(const struct afs_vl_cursor *vc)
 
        rcu_read_lock();
        pr_notice("EDESTADDR occurred\n");
+       pr_notice("CELL: %s err=%d\n", cell->name, cell->error);
+       pr_notice("DNS: src=%u st=%u lc=%x\n",
+                 cell->dns_source, cell->dns_status, cell->dns_lookup_count);
        pr_notice("VC: ut=%lx ix=%u ni=%hu fl=%hx err=%hd\n",
                  vc->untried, vc->index, vc->nr_iterations, vc->flags, vc->error);
 
index a5083d4..1f5db68 100644 (file)
@@ -309,9 +309,7 @@ static int autofs_fill_super(struct super_block *s, struct fs_context *fc)
        struct autofs_fs_context *ctx = fc->fs_private;
        struct autofs_sb_info *sbi = s->s_fs_info;
        struct inode *root_inode;
-       struct dentry *root;
        struct autofs_info *ino;
-       int ret = -ENOMEM;
 
        pr_debug("starting up, sbi = %p\n", sbi);
 
@@ -328,56 +326,44 @@ static int autofs_fill_super(struct super_block *s, struct fs_context *fc)
         */
        ino = autofs_new_ino(sbi);
        if (!ino)
-               goto fail;
+               return -ENOMEM;
 
        root_inode = autofs_get_inode(s, S_IFDIR | 0755);
+       if (!root_inode)
+               return -ENOMEM;
+
        root_inode->i_uid = ctx->uid;
        root_inode->i_gid = ctx->gid;
+       root_inode->i_fop = &autofs_root_operations;
+       root_inode->i_op = &autofs_dir_inode_operations;
 
-       root = d_make_root(root_inode);
-       if (!root)
-               goto fail_ino;
-
-       root->d_fsdata = ino;
+       s->s_root = d_make_root(root_inode);
+       if (unlikely(!s->s_root)) {
+               autofs_free_ino(ino);
+               return -ENOMEM;
+       }
+       s->s_root->d_fsdata = ino;
 
        if (ctx->pgrp_set) {
                sbi->oz_pgrp = find_get_pid(ctx->pgrp);
-               if (!sbi->oz_pgrp) {
-                       ret = invalf(fc, "Could not find process group %d",
-                                    ctx->pgrp);
-                       goto fail_dput;
-               }
-       } else {
+               if (!sbi->oz_pgrp)
+                       return invalf(fc, "Could not find process group %d",
+                                     ctx->pgrp);
+       } else
                sbi->oz_pgrp = get_task_pid(current, PIDTYPE_PGID);
-       }
 
        if (autofs_type_trigger(sbi->type))
-               __managed_dentry_set_managed(root);
-
-       root_inode->i_fop = &autofs_root_operations;
-       root_inode->i_op = &autofs_dir_inode_operations;
+               /* s->s_root won't be contended so there's little to
+                * be gained by not taking the d_lock when setting
+                * d_flags, even when a lot mounts are being done.
+                */
+               managed_dentry_set_managed(s->s_root);
 
        pr_debug("pipe fd = %d, pgrp = %u\n",
                 sbi->pipefd, pid_nr(sbi->oz_pgrp));
 
        sbi->flags &= ~AUTOFS_SBI_CATATONIC;
-
-       /*
-        * Success! Install the root dentry now to indicate completion.
-        */
-       s->s_root = root;
        return 0;
-
-       /*
-        * Failure ... clean up.
-        */
-fail_dput:
-       dput(root);
-       goto fail;
-fail_ino:
-       autofs_free_ino(ino);
-fail:
-       return ret;
 }
 
 /*
index ef02c9b..23c0834 100644 (file)
@@ -313,17 +313,17 @@ struct btree *bch2_backpointer_get_node(struct btree_trans *trans,
                                  bp.level - 1,
                                  0);
        b = bch2_btree_iter_peek_node(iter);
-       if (IS_ERR(b))
+       if (IS_ERR_OR_NULL(b))
                goto err;
 
        BUG_ON(b->c.level != bp.level - 1);
 
-       if (b && extent_matches_bp(c, bp.btree_id, bp.level,
-                                  bkey_i_to_s_c(&b->key),
-                                  bucket, bp))
+       if (extent_matches_bp(c, bp.btree_id, bp.level,
+                             bkey_i_to_s_c(&b->key),
+                             bucket, bp))
                return b;
 
-       if (b && btree_node_will_make_reachable(b)) {
+       if (btree_node_will_make_reachable(b)) {
                b = ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node);
        } else {
                backpointer_not_found(trans, bp_pos, bp, bkey_i_to_s_c(&b->key));
index 9cb8684..403aa33 100644 (file)
@@ -617,7 +617,7 @@ struct journal_seq_blacklist_table {
                u64             start;
                u64             end;
                bool            dirty;
-       }                       entries[0];
+       }                       entries[];
 };
 
 struct journal_keys {
index c2adf3f..6fa90bc 100644 (file)
@@ -3087,8 +3087,6 @@ void bch2_trans_put(struct btree_trans *trans)
                srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx);
        }
 
-       bch2_journal_preres_put(&c->journal, &trans->journal_preres);
-
        kfree(trans->extra_journal_entries.data);
 
        if (trans->fs_usage_deltas) {
index 9b78f78..37fbf22 100644 (file)
@@ -89,10 +89,13 @@ static void bkey_cached_free(struct btree_key_cache *bc,
        ck->btree_trans_barrier_seq =
                start_poll_synchronize_srcu(&c->btree_trans_barrier);
 
-       if (ck->c.lock.readers)
+       if (ck->c.lock.readers) {
                list_move_tail(&ck->list, &bc->freed_pcpu);
-       else
+               bc->nr_freed_pcpu++;
+       } else {
                list_move_tail(&ck->list, &bc->freed_nonpcpu);
+               bc->nr_freed_nonpcpu++;
+       }
        atomic_long_inc(&bc->nr_freed);
 
        kfree(ck->k);
@@ -109,6 +112,8 @@ static void __bkey_cached_move_to_freelist_ordered(struct btree_key_cache *bc,
 {
        struct bkey_cached *pos;
 
+       bc->nr_freed_nonpcpu++;
+
        list_for_each_entry_reverse(pos, &bc->freed_nonpcpu, list) {
                if (ULONG_CMP_GE(ck->btree_trans_barrier_seq,
                                 pos->btree_trans_barrier_seq)) {
@@ -158,6 +163,7 @@ static void bkey_cached_move_to_freelist(struct btree_key_cache *bc,
 #else
                mutex_lock(&bc->lock);
                list_move_tail(&ck->list, &bc->freed_nonpcpu);
+               bc->nr_freed_nonpcpu++;
                mutex_unlock(&bc->lock);
 #endif
        } else {
@@ -217,6 +223,7 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path,
                               f->nr < ARRAY_SIZE(f->objs) / 2) {
                                ck = list_last_entry(&bc->freed_nonpcpu, struct bkey_cached, list);
                                list_del_init(&ck->list);
+                               bc->nr_freed_nonpcpu--;
                                f->objs[f->nr++] = ck;
                        }
 
@@ -229,6 +236,7 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path,
                if (!list_empty(&bc->freed_nonpcpu)) {
                        ck = list_last_entry(&bc->freed_nonpcpu, struct bkey_cached, list);
                        list_del_init(&ck->list);
+                       bc->nr_freed_nonpcpu--;
                }
                mutex_unlock(&bc->lock);
 #endif
@@ -664,7 +672,6 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans,
                goto out;
 
        bch2_journal_pin_drop(j, &ck->journal);
-       bch2_journal_preres_put(j, &ck->res);
 
        BUG_ON(!btree_node_locked(c_iter.path, 0));
 
@@ -762,18 +769,6 @@ bool bch2_btree_insert_key_cached(struct btree_trans *trans,
 
        BUG_ON(insert->k.u64s > ck->u64s);
 
-       if (likely(!(flags & BTREE_INSERT_JOURNAL_REPLAY))) {
-               int difference;
-
-               BUG_ON(jset_u64s(insert->k.u64s) > trans->journal_preres.u64s);
-
-               difference = jset_u64s(insert->k.u64s) - ck->res.u64s;
-               if (difference > 0) {
-                       trans->journal_preres.u64s      -= difference;
-                       ck->res.u64s                    += difference;
-               }
-       }
-
        bkey_copy(ck->k, insert);
        ck->valid = true;
 
@@ -850,6 +845,8 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
         * Newest freed entries are at the end of the list - once we hit one
         * that's too new to be freed, we can bail out:
         */
+       scanned += bc->nr_freed_nonpcpu;
+
        list_for_each_entry_safe(ck, t, &bc->freed_nonpcpu, list) {
                if (!poll_state_synchronize_srcu(&c->btree_trans_barrier,
                                                 ck->btree_trans_barrier_seq))
@@ -859,13 +856,15 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
                six_lock_exit(&ck->c.lock);
                kmem_cache_free(bch2_key_cache, ck);
                atomic_long_dec(&bc->nr_freed);
-               scanned++;
                freed++;
+               bc->nr_freed_nonpcpu--;
        }
 
        if (scanned >= nr)
                goto out;
 
+       scanned += bc->nr_freed_pcpu;
+
        list_for_each_entry_safe(ck, t, &bc->freed_pcpu, list) {
                if (!poll_state_synchronize_srcu(&c->btree_trans_barrier,
                                                 ck->btree_trans_barrier_seq))
@@ -875,8 +874,8 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
                six_lock_exit(&ck->c.lock);
                kmem_cache_free(bch2_key_cache, ck);
                atomic_long_dec(&bc->nr_freed);
-               scanned++;
                freed++;
+               bc->nr_freed_pcpu--;
        }
 
        if (scanned >= nr)
@@ -982,6 +981,9 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc)
        }
 #endif
 
+       BUG_ON(list_count_nodes(&bc->freed_pcpu) != bc->nr_freed_pcpu);
+       BUG_ON(list_count_nodes(&bc->freed_nonpcpu) != bc->nr_freed_nonpcpu);
+
        list_splice(&bc->freed_pcpu,    &items);
        list_splice(&bc->freed_nonpcpu, &items);
 
@@ -991,7 +993,6 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc)
                cond_resched();
 
                bch2_journal_pin_drop(&c->journal, &ck->journal);
-               bch2_journal_preres_put(&c->journal, &ck->res);
 
                list_del(&ck->list);
                kfree(ck->k);
diff --git a/fs/bcachefs/btree_key_cache_types.h b/fs/bcachefs/btree_key_cache_types.h
new file mode 100644 (file)
index 0000000..290e4e5
--- /dev/null
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_BTREE_KEY_CACHE_TYPES_H
+#define _BCACHEFS_BTREE_KEY_CACHE_TYPES_H
+
+struct btree_key_cache_freelist {
+       struct bkey_cached      *objs[16];
+       unsigned                nr;
+};
+
+struct btree_key_cache {
+       struct mutex            lock;
+       struct rhashtable       table;
+       bool                    table_init_done;
+
+       struct list_head        freed_pcpu;
+       size_t                  nr_freed_pcpu;
+       struct list_head        freed_nonpcpu;
+       size_t                  nr_freed_nonpcpu;
+
+       struct shrinker         *shrink;
+       unsigned                shrink_iter;
+       struct btree_key_cache_freelist __percpu *pcpu_freed;
+
+       atomic_long_t           nr_freed;
+       atomic_long_t           nr_keys;
+       atomic_long_t           nr_dirty;
+};
+
+struct bkey_cached_key {
+       u32                     btree_id;
+       struct bpos             pos;
+} __packed __aligned(4);
+
+#endif /* _BCACHEFS_BTREE_KEY_CACHE_TYPES_H */
index decad7b..12907be 100644 (file)
@@ -78,6 +78,53 @@ inline void bch2_btree_node_prep_for_write(struct btree_trans *trans,
                bch2_btree_init_next(trans, b);
 }
 
+static noinline int trans_lock_write_fail(struct btree_trans *trans, struct btree_insert_entry *i)
+{
+       while (--i >= trans->updates) {
+               if (same_leaf_as_prev(trans, i))
+                       continue;
+
+               bch2_btree_node_unlock_write(trans, i->path, insert_l(i)->b);
+       }
+
+       trace_and_count(trans->c, trans_restart_would_deadlock_write, trans);
+       return btree_trans_restart(trans, BCH_ERR_transaction_restart_would_deadlock_write);
+}
+
+static inline int bch2_trans_lock_write(struct btree_trans *trans)
+{
+       struct btree_insert_entry *i;
+
+       EBUG_ON(trans->write_locked);
+
+       trans_for_each_update(trans, i) {
+               if (same_leaf_as_prev(trans, i))
+                       continue;
+
+               if (bch2_btree_node_lock_write(trans, i->path, &insert_l(i)->b->c))
+                       return trans_lock_write_fail(trans, i);
+
+               if (!i->cached)
+                       bch2_btree_node_prep_for_write(trans, i->path, insert_l(i)->b);
+       }
+
+       trans->write_locked = true;
+       return 0;
+}
+
+static inline void bch2_trans_unlock_write(struct btree_trans *trans)
+{
+       if (likely(trans->write_locked)) {
+               struct btree_insert_entry *i;
+
+               trans_for_each_update(trans, i)
+                       if (!same_leaf_as_prev(trans, i))
+                               bch2_btree_node_unlock_write_inlined(trans, i->path,
+                                                                    insert_l(i)->b);
+               trans->write_locked = false;
+       }
+}
+
 /* Inserting into a given leaf node (last stage of insert): */
 
 /* Handle overwrites and do insert, for non extents: */
@@ -276,17 +323,6 @@ static inline void btree_insert_entry_checks(struct btree_trans *trans,
                bch2_snapshot_is_internal_node(trans->c, i->k->k.p.snapshot));
 }
 
-static noinline int
-bch2_trans_journal_preres_get_cold(struct btree_trans *trans, unsigned flags,
-                                  unsigned long trace_ip)
-{
-       return drop_locks_do(trans,
-               bch2_journal_preres_get(&trans->c->journal,
-                       &trans->journal_preres,
-                       trans->journal_preres_u64s,
-                       (flags & BCH_WATERMARK_MASK)));
-}
-
 static __always_inline int bch2_trans_journal_res_get(struct btree_trans *trans,
                                                      unsigned flags)
 {
@@ -321,6 +357,45 @@ static inline int btree_key_can_insert(struct btree_trans *trans,
        return 0;
 }
 
+noinline static int
+btree_key_can_insert_cached_slowpath(struct btree_trans *trans, unsigned flags,
+                                    struct btree_path *path, unsigned new_u64s)
+{
+       struct bch_fs *c = trans->c;
+       struct btree_insert_entry *i;
+       struct bkey_cached *ck = (void *) path->l[0].b;
+       struct bkey_i *new_k;
+       int ret;
+
+       bch2_trans_unlock_write(trans);
+       bch2_trans_unlock(trans);
+
+       new_k = kmalloc(new_u64s * sizeof(u64), GFP_KERNEL);
+       if (!new_k) {
+               bch_err(c, "error allocating memory for key cache key, btree %s u64s %u",
+                       bch2_btree_id_str(path->btree_id), new_u64s);
+               return -BCH_ERR_ENOMEM_btree_key_cache_insert;
+       }
+
+       ret =   bch2_trans_relock(trans) ?:
+               bch2_trans_lock_write(trans);
+       if (unlikely(ret)) {
+               kfree(new_k);
+               return ret;
+       }
+
+       memcpy(new_k, ck->k, ck->u64s * sizeof(u64));
+
+       trans_for_each_update(trans, i)
+               if (i->old_v == &ck->k->v)
+                       i->old_v = &new_k->v;
+
+       kfree(ck->k);
+       ck->u64s        = new_u64s;
+       ck->k           = new_k;
+       return 0;
+}
+
 static int btree_key_can_insert_cached(struct btree_trans *trans, unsigned flags,
                                       struct btree_path *path, unsigned u64s)
 {
@@ -347,12 +422,9 @@ static int btree_key_can_insert_cached(struct btree_trans *trans, unsigned flags
                return 0;
 
        new_u64s        = roundup_pow_of_two(u64s);
-       new_k           = krealloc(ck->k, new_u64s * sizeof(u64), GFP_NOFS);
-       if (!new_k) {
-               bch_err(c, "error allocating memory for key cache key, btree %s u64s %u",
-                       bch2_btree_id_str(path->btree_id), new_u64s);
-               return -BCH_ERR_ENOMEM_btree_key_cache_insert;
-       }
+       new_k           = krealloc(ck->k, new_u64s * sizeof(u64), GFP_NOWAIT);
+       if (unlikely(!new_k))
+               return btree_key_can_insert_cached_slowpath(trans, flags, path, new_u64s);
 
        trans_for_each_update(trans, i)
                if (i->old_v == &ck->k->v)
@@ -732,37 +804,6 @@ revert_fs_usage:
        return ret;
 }
 
-static noinline int trans_lock_write_fail(struct btree_trans *trans, struct btree_insert_entry *i)
-{
-       while (--i >= trans->updates) {
-               if (same_leaf_as_prev(trans, i))
-                       continue;
-
-               bch2_btree_node_unlock_write(trans, i->path, insert_l(i)->b);
-       }
-
-       trace_and_count(trans->c, trans_restart_would_deadlock_write, trans);
-       return btree_trans_restart(trans, BCH_ERR_transaction_restart_would_deadlock_write);
-}
-
-static inline int trans_lock_write(struct btree_trans *trans)
-{
-       struct btree_insert_entry *i;
-
-       trans_for_each_update(trans, i) {
-               if (same_leaf_as_prev(trans, i))
-                       continue;
-
-               if (bch2_btree_node_lock_write(trans, i->path, &insert_l(i)->b->c))
-                       return trans_lock_write_fail(trans, i);
-
-               if (!i->cached)
-                       bch2_btree_node_prep_for_write(trans, i->path, insert_l(i)->b);
-       }
-
-       return 0;
-}
-
 static noinline void bch2_drop_overwrites_from_journal(struct btree_trans *trans)
 {
        struct btree_insert_entry *i;
@@ -830,15 +871,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, unsigned flags
                }
        }
 
-       ret = bch2_journal_preres_get(&c->journal,
-                       &trans->journal_preres, trans->journal_preres_u64s,
-                       (flags & BCH_WATERMARK_MASK)|JOURNAL_RES_GET_NONBLOCK);
-       if (unlikely(ret == -BCH_ERR_journal_preres_get_blocked))
-               ret = bch2_trans_journal_preres_get_cold(trans, flags, trace_ip);
-       if (unlikely(ret))
-               return ret;
-
-       ret = trans_lock_write(trans);
+       ret = bch2_trans_lock_write(trans);
        if (unlikely(ret))
                return ret;
 
@@ -847,10 +880,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, unsigned flags
        if (!ret && unlikely(trans->journal_replay_not_finished))
                bch2_drop_overwrites_from_journal(trans);
 
-       trans_for_each_update(trans, i)
-               if (!same_leaf_as_prev(trans, i))
-                       bch2_btree_node_unlock_write_inlined(trans, i->path,
-                                                       insert_l(i)->b);
+       bch2_trans_unlock_write(trans);
 
        if (!ret && trans->journal_pin)
                bch2_journal_pin_add(&c->journal, trans->journal_res.seq,
@@ -1003,7 +1033,6 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
        struct bch_fs *c = trans->c;
        struct btree_insert_entry *i = NULL;
        struct btree_write_buffered_key *wb;
-       unsigned u64s;
        int ret = 0;
 
        if (!trans->nr_updates &&
@@ -1063,13 +1092,8 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
 
        EBUG_ON(test_bit(BCH_FS_CLEAN_SHUTDOWN, &c->flags));
 
-       memset(&trans->journal_preres, 0, sizeof(trans->journal_preres));
-
        trans->journal_u64s             = trans->extra_journal_entries.nr;
-       trans->journal_preres_u64s      = 0;
-
        trans->journal_transaction_names = READ_ONCE(c->opts.journal_transaction_names);
-
        if (trans->journal_transaction_names)
                trans->journal_u64s += jset_u64s(JSET_ENTRY_LOG_U64s);
 
@@ -1085,16 +1109,11 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
                if (i->key_cache_already_flushed)
                        continue;
 
-               /* we're going to journal the key being updated: */
-               u64s = jset_u64s(i->k->k.u64s);
-               if (i->cached &&
-                   likely(!(flags & BTREE_INSERT_JOURNAL_REPLAY)))
-                       trans->journal_preres_u64s += u64s;
-
                if (i->flags & BTREE_UPDATE_NOJOURNAL)
                        continue;
 
-               trans->journal_u64s += u64s;
+               /* we're going to journal the key being updated: */
+               trans->journal_u64s += jset_u64s(i->k->k.u64s);
 
                /* and we're also going to log the overwrite: */
                if (trans->journal_transaction_names)
@@ -1126,8 +1145,6 @@ retry:
 
        trace_and_count(c, transaction_commit, trans, _RET_IP_);
 out:
-       bch2_journal_preres_put(&c->journal, &trans->journal_preres);
-
        if (likely(!(flags & BTREE_INSERT_NOCHECK_RW)))
                bch2_write_ref_put(c, BCH_WRITE_REF_trans);
 out_reset:
index 941841a..60453ba 100644 (file)
@@ -5,7 +5,7 @@
 #include <linux/list.h>
 #include <linux/rhashtable.h>
 
-//#include "bkey_methods.h"
+#include "btree_key_cache_types.h"
 #include "buckets_types.h"
 #include "darray.h"
 #include "errcode.h"
@@ -312,31 +312,6 @@ struct btree_iter {
 #endif
 };
 
-struct btree_key_cache_freelist {
-       struct bkey_cached      *objs[16];
-       unsigned                nr;
-};
-
-struct btree_key_cache {
-       struct mutex            lock;
-       struct rhashtable       table;
-       bool                    table_init_done;
-       struct list_head        freed_pcpu;
-       struct list_head        freed_nonpcpu;
-       struct shrinker         *shrink;
-       unsigned                shrink_iter;
-       struct btree_key_cache_freelist __percpu *pcpu_freed;
-
-       atomic_long_t           nr_freed;
-       atomic_long_t           nr_keys;
-       atomic_long_t           nr_dirty;
-};
-
-struct bkey_cached_key {
-       u32                     btree_id;
-       struct bpos             pos;
-} __packed __aligned(4);
-
 #define BKEY_CACHED_ACCESSED           0
 #define BKEY_CACHED_DIRTY              1
 
@@ -352,7 +327,6 @@ struct bkey_cached {
        struct rhash_head       hash;
        struct list_head        list;
 
-       struct journal_preres   res;
        struct journal_entry_pin journal;
        u64                     seq;
 
@@ -389,11 +363,7 @@ struct btree_insert_entry {
        unsigned long           ip_allocated;
 };
 
-#ifndef CONFIG_LOCKDEP
 #define BTREE_ITER_MAX         64
-#else
-#define BTREE_ITER_MAX         32
-#endif
 
 struct btree_trans_commit_hook;
 typedef int (btree_trans_commit_hook_fn)(struct btree_trans *, struct btree_trans_commit_hook *);
@@ -434,6 +404,7 @@ struct btree_trans {
        bool                    journal_transaction_names:1;
        bool                    journal_replay_not_finished:1;
        bool                    notrace_relock_fail:1;
+       bool                    write_locked:1;
        enum bch_errcode        restarted:16;
        u32                     restart_count;
        unsigned long           last_begin_ip;
@@ -465,11 +436,9 @@ struct btree_trans {
        struct journal_entry_pin *journal_pin;
 
        struct journal_res      journal_res;
-       struct journal_preres   journal_preres;
        u64                     *journal_seq;
        struct disk_reservation *disk_res;
        unsigned                journal_u64s;
-       unsigned                journal_preres_u64s;
        struct replicas_delta_list *fs_usage_deltas;
 };
 
index 39c2db6..76f27bc 100644 (file)
@@ -513,8 +513,6 @@ static void bch2_btree_update_free(struct btree_update *as, struct btree_trans *
                up_read(&c->gc_lock);
        as->took_gc_lock = false;
 
-       bch2_journal_preres_put(&c->journal, &as->journal_preres);
-
        bch2_journal_pin_drop(&c->journal, &as->journal);
        bch2_journal_pin_flush(&c->journal, &as->journal);
        bch2_disk_reservation_put(c, &as->disk_res);
@@ -734,8 +732,6 @@ err:
 
        bch2_journal_pin_drop(&c->journal, &as->journal);
 
-       bch2_journal_preres_put(&c->journal, &as->journal_preres);
-
        mutex_lock(&c->btree_interior_update_lock);
        for (i = 0; i < as->nr_new_nodes; i++) {
                b = as->new_nodes[i];
@@ -1047,7 +1043,6 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
        unsigned nr_nodes[2] = { 0, 0 };
        unsigned update_level = level;
        enum bch_watermark watermark = flags & BCH_WATERMARK_MASK;
-       unsigned journal_flags = 0;
        int ret = 0;
        u32 restart_count = trans->restart_count;
 
@@ -1061,10 +1056,6 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
        flags &= ~BCH_WATERMARK_MASK;
        flags |= watermark;
 
-       if (flags & BTREE_INSERT_JOURNAL_RECLAIM)
-               journal_flags |= JOURNAL_RES_GET_NONBLOCK;
-       journal_flags |= watermark;
-
        while (1) {
                nr_nodes[!!update_level] += 1 + split;
                update_level++;
@@ -1129,27 +1120,6 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
        if (ret)
                goto err;
 
-       ret = bch2_journal_preres_get(&c->journal, &as->journal_preres,
-                                     BTREE_UPDATE_JOURNAL_RES,
-                                     journal_flags|JOURNAL_RES_GET_NONBLOCK);
-       if (ret) {
-               if (flags & BTREE_INSERT_JOURNAL_RECLAIM) {
-                       ret = -BCH_ERR_journal_reclaim_would_deadlock;
-                       goto err;
-               }
-
-               ret = drop_locks_do(trans,
-                       bch2_journal_preres_get(&c->journal, &as->journal_preres,
-                                             BTREE_UPDATE_JOURNAL_RES,
-                                             journal_flags));
-               if (ret == -BCH_ERR_journal_preres_get_blocked) {
-                       trace_and_count(c, trans_restart_journal_preres_get, trans, _RET_IP_, journal_flags);
-                       ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_journal_preres_get);
-               }
-               if (ret)
-                       goto err;
-       }
-
        ret = bch2_disk_reservation_get(c, &as->disk_res,
                        (nr_nodes[0] + nr_nodes[1]) * btree_sectors(c),
                        c->opts.metadata_replicas,
index 4df2151..031076e 100644 (file)
@@ -55,7 +55,6 @@ struct btree_update {
        unsigned                        update_level;
 
        struct disk_reservation         disk_res;
-       struct journal_preres           journal_preres;
 
        /*
         * BTREE_INTERIOR_UPDATING_NODE:
index 0771a6d..5ed6620 100644 (file)
@@ -239,6 +239,34 @@ restart_drop_extra_replicas:
 
                next_pos = insert->k.p;
 
+               /*
+                * Check for nonce offset inconsistency:
+                * This is debug code - we've been seeing this bug rarely, and
+                * it's been hard to reproduce, so this should give us some more
+                * information when it does occur:
+                */
+               struct printbuf err = PRINTBUF;
+               int invalid = bch2_bkey_invalid(c, bkey_i_to_s_c(insert), __btree_node_type(0, m->btree_id), 0, &err);
+               printbuf_exit(&err);
+
+               if (invalid) {
+                       struct printbuf buf = PRINTBUF;
+
+                       prt_str(&buf, "about to insert invalid key in data update path");
+                       prt_str(&buf, "\nold: ");
+                       bch2_bkey_val_to_text(&buf, c, old);
+                       prt_str(&buf, "\nk:   ");
+                       bch2_bkey_val_to_text(&buf, c, k);
+                       prt_str(&buf, "\nnew: ");
+                       bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(insert));
+
+                       bch2_print_string_as_lines(KERN_ERR, buf.buf);
+                       printbuf_exit(&buf);
+
+                       bch2_fatal_error(c);
+                       goto out;
+               }
+
                ret =   bch2_insert_snapshot_whiteouts(trans, m->btree_id,
                                                k.k->p, bkey_start_pos(&insert->k)) ?:
                        bch2_insert_snapshot_whiteouts(trans, m->btree_id,
index d613695..4d0cb0c 100644 (file)
@@ -555,6 +555,7 @@ void bch2_target_to_text(struct printbuf *out, struct bch_fs *c, unsigned v)
        case TARGET_DEV: {
                struct bch_dev *ca;
 
+               out->atomic++;
                rcu_read_lock();
                ca = t.dev < c->sb.nr_devices
                        ? rcu_dereference(c->devs[t.dev])
@@ -570,6 +571,7 @@ void bch2_target_to_text(struct printbuf *out, struct bch_fs *c, unsigned v)
                }
 
                rcu_read_unlock();
+               out->atomic--;
                break;
        }
        case TARGET_GROUP:
@@ -580,7 +582,7 @@ void bch2_target_to_text(struct printbuf *out, struct bch_fs *c, unsigned v)
        }
 }
 
-void bch2_target_to_text_sb(struct printbuf *out, struct bch_sb *sb, unsigned v)
+static void bch2_target_to_text_sb(struct printbuf *out, struct bch_sb *sb, unsigned v)
 {
        struct target t = target_decode(v);
 
index 875f7c5..2a77de1 100644 (file)
@@ -1373,6 +1373,15 @@ ec_new_stripe_head_alloc(struct bch_fs *c, unsigned target,
                        h->nr_active_devs++;
 
        rcu_read_unlock();
+
+       /*
+        * If we only have redundancy + 1 devices, we're better off with just
+        * replication:
+        */
+       if (h->nr_active_devs < h->redundancy + 2)
+               bch_err(c, "insufficient devices available to create stripe (have %u, need %u) - mismatched bucket sizes?",
+                       h->nr_active_devs, h->redundancy + 2);
+
        list_add(&h->list, &c->ec_stripe_head_list);
        return h;
 }
@@ -1424,6 +1433,11 @@ __bch2_ec_stripe_head_get(struct btree_trans *trans,
 
        h = ec_new_stripe_head_alloc(c, target, algo, redundancy, watermark);
 found:
+       if (!IS_ERR_OR_NULL(h) &&
+           h->nr_active_devs < h->redundancy + 2) {
+               mutex_unlock(&h->lock);
+               h = NULL;
+       }
        mutex_unlock(&c->ec_stripe_head_lock);
        return h;
 }
@@ -1681,8 +1695,6 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans,
        int ret;
 
        h = __bch2_ec_stripe_head_get(trans, target, algo, redundancy, watermark);
-       if (!h)
-               bch_err(c, "no stripe head");
        if (IS_ERR_OR_NULL(h))
                return h;
 
index 8bd9bcd..ff664fd 100644 (file)
@@ -13,7 +13,7 @@
 
 int bch2_filemap_get_contig_folios_d(struct address_space *mapping,
                                     loff_t start, u64 end,
-                                    int fgp_flags, gfp_t gfp,
+                                    fgf_t fgp_flags, gfp_t gfp,
                                     folios *fs)
 {
        struct folio *f;
index a2222ad..27f712a 100644 (file)
@@ -7,7 +7,7 @@
 typedef DARRAY(struct folio *) folios;
 
 int bch2_filemap_get_contig_folios_d(struct address_space *, loff_t,
-                                    u64, int, gfp_t, folios *);
+                                    u64, fgf_t, gfp_t, folios *);
 int bch2_write_invalidate_inode_pages_range(struct address_space *, loff_t, loff_t);
 
 /*
index 166d8d8..8ef8173 100644 (file)
@@ -1922,10 +1922,7 @@ out:
        return dget(sb->s_root);
 
 err_put_super:
-       sb->s_fs_info = NULL;
-       c->vfs_sb = NULL;
        deactivate_locked_super(sb);
-       bch2_fs_stop(c);
        return ERR_PTR(bch2_err_class(ret));
 }
 
@@ -1933,11 +1930,8 @@ static void bch2_kill_sb(struct super_block *sb)
 {
        struct bch_fs *c = sb->s_fs_info;
 
-       if (c)
-               c->vfs_sb = NULL;
        generic_shutdown_super(sb);
-       if (c)
-               bch2_fs_free(c);
+       bch2_fs_free(c);
 }
 
 static struct file_system_type bcache_fs_type = {
index 9f3e9bd..e0c5cd1 100644 (file)
@@ -2220,7 +2220,7 @@ static int nlink_cmp(const void *_l, const void *_r)
        const struct nlink *l = _l;
        const struct nlink *r = _r;
 
-       return cmp_int(l->inum, r->inum) ?: cmp_int(l->snapshot, r->snapshot);
+       return cmp_int(l->inum, r->inum);
 }
 
 static void inc_link(struct bch_fs *c, struct snapshots_seen *s,
index def77f2..c7849b0 100644 (file)
@@ -1134,7 +1134,7 @@ static int may_delete_deleted_inode(struct btree_trans *trans,
                 * unlinked inodes in the snapshot leaves:
                 */
                *need_another_pass = true;
-               return 0;
+               goto out;
        }
 
        ret = 1;
@@ -1169,8 +1169,10 @@ again:
         */
        for_each_btree_key(trans, iter, BTREE_ID_deleted_inodes, POS_MIN,
                           BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
-               ret = lockrestart_do(trans, may_delete_deleted_inode(trans, &iter, k.k->p,
-                                                                    &need_another_pass));
+               ret = commit_do(trans, NULL, NULL,
+                               BTREE_INSERT_NOFAIL|
+                               BTREE_INSERT_LAZY_RW,
+                       may_delete_deleted_inode(trans, &iter, k.k->p, &need_another_pass));
                if (ret < 0)
                        break;
 
index f02b3f7..d704a8f 100644 (file)
@@ -795,7 +795,7 @@ static int bch2_write_decrypt(struct bch_write_op *op)
         * checksum:
         */
        csum = bch2_checksum_bio(c, op->crc.csum_type, nonce, &op->wbio.bio);
-       if (bch2_crc_cmp(op->crc.csum, csum))
+       if (bch2_crc_cmp(op->crc.csum, csum) && !c->opts.no_data_io)
                return -EIO;
 
        ret = bch2_encrypt_bio(c, op->crc.csum_type, nonce, &op->wbio.bio);
index 5b5d69f..23a9b78 100644 (file)
@@ -526,36 +526,6 @@ int bch2_journal_res_get_slowpath(struct journal *j, struct journal_res *res,
        return ret;
 }
 
-/* journal_preres: */
-
-static bool journal_preres_available(struct journal *j,
-                                    struct journal_preres *res,
-                                    unsigned new_u64s,
-                                    unsigned flags)
-{
-       bool ret = bch2_journal_preres_get_fast(j, res, new_u64s, flags, true);
-
-       if (!ret && mutex_trylock(&j->reclaim_lock)) {
-               bch2_journal_reclaim(j);
-               mutex_unlock(&j->reclaim_lock);
-       }
-
-       return ret;
-}
-
-int __bch2_journal_preres_get(struct journal *j,
-                             struct journal_preres *res,
-                             unsigned new_u64s,
-                             unsigned flags)
-{
-       int ret;
-
-       closure_wait_event(&j->preres_wait,
-                  (ret = bch2_journal_error(j)) ||
-                  journal_preres_available(j, res, new_u64s, flags));
-       return ret;
-}
-
 /* journal_entry_res: */
 
 void bch2_journal_entry_res_resize(struct journal *j,
@@ -1306,7 +1276,6 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
        prt_printf(out, "last_seq:\t\t%llu\n",          journal_last_seq(j));
        prt_printf(out, "last_seq_ondisk:\t%llu\n",             j->last_seq_ondisk);
        prt_printf(out, "flushed_seq_ondisk:\t%llu\n",  j->flushed_seq_ondisk);
-       prt_printf(out, "prereserved:\t\t%u/%u\n",              j->prereserved.reserved, j->prereserved.remaining);
        prt_printf(out, "watermark:\t\t%s\n",           bch2_watermarks[j->watermark]);
        prt_printf(out, "each entry reserved:\t%u\n",   j->entry_u64s_reserved);
        prt_printf(out, "nr flush writes:\t%llu\n",             j->nr_flush_writes);
index 011711e..c85d01c 100644 (file)
@@ -395,104 +395,6 @@ out:
        return 0;
 }
 
-/* journal_preres: */
-
-static inline void journal_set_watermark(struct journal *j)
-{
-       union journal_preres_state s = READ_ONCE(j->prereserved);
-       unsigned watermark = BCH_WATERMARK_stripe;
-
-       if (fifo_free(&j->pin) < j->pin.size / 4)
-               watermark = max_t(unsigned, watermark, BCH_WATERMARK_copygc);
-       if (fifo_free(&j->pin) < j->pin.size / 8)
-               watermark = max_t(unsigned, watermark, BCH_WATERMARK_reclaim);
-
-       if (s.reserved > s.remaining)
-               watermark = max_t(unsigned, watermark, BCH_WATERMARK_copygc);
-       if (!s.remaining)
-               watermark = max_t(unsigned, watermark, BCH_WATERMARK_reclaim);
-
-       if (watermark == j->watermark)
-               return;
-
-       swap(watermark, j->watermark);
-       if (watermark > j->watermark)
-               journal_wake(j);
-}
-
-static inline void bch2_journal_preres_put(struct journal *j,
-                                          struct journal_preres *res)
-{
-       union journal_preres_state s = { .reserved = res->u64s };
-
-       if (!res->u64s)
-               return;
-
-       s.v = atomic64_sub_return(s.v, &j->prereserved.counter);
-       res->u64s = 0;
-
-       if (unlikely(s.waiting)) {
-               clear_bit(ilog2((((union journal_preres_state) { .waiting = 1 }).v)),
-                         (unsigned long *) &j->prereserved.v);
-               closure_wake_up(&j->preres_wait);
-       }
-
-       if (s.reserved <= s.remaining && j->watermark)
-               journal_set_watermark(j);
-}
-
-int __bch2_journal_preres_get(struct journal *,
-                       struct journal_preres *, unsigned, unsigned);
-
-static inline int bch2_journal_preres_get_fast(struct journal *j,
-                                              struct journal_preres *res,
-                                              unsigned new_u64s,
-                                              unsigned flags,
-                                              bool set_waiting)
-{
-       int d = new_u64s - res->u64s;
-       union journal_preres_state old, new;
-       u64 v = atomic64_read(&j->prereserved.counter);
-       enum bch_watermark watermark = flags & BCH_WATERMARK_MASK;
-       int ret;
-
-       do {
-               old.v = new.v = v;
-               ret = 0;
-
-               if (watermark == BCH_WATERMARK_reclaim ||
-                   new.reserved + d < new.remaining) {
-                       new.reserved += d;
-                       ret = 1;
-               } else if (set_waiting && !new.waiting)
-                       new.waiting = true;
-               else
-                       return 0;
-       } while ((v = atomic64_cmpxchg(&j->prereserved.counter,
-                                      old.v, new.v)) != old.v);
-
-       if (ret)
-               res->u64s += d;
-       return ret;
-}
-
-static inline int bch2_journal_preres_get(struct journal *j,
-                                         struct journal_preres *res,
-                                         unsigned new_u64s,
-                                         unsigned flags)
-{
-       if (new_u64s <= res->u64s)
-               return 0;
-
-       if (bch2_journal_preres_get_fast(j, res, new_u64s, flags, false))
-               return 0;
-
-       if (flags & JOURNAL_RES_GET_NONBLOCK)
-               return -BCH_ERR_journal_preres_get_blocked;
-
-       return __bch2_journal_preres_get(j, res, new_u64s, flags);
-}
-
 /* journal_entry_res: */
 
 void bch2_journal_entry_res_resize(struct journal *,
index f4bc2cd..786a092 100644 (file)
@@ -1079,6 +1079,12 @@ found:
 
        if (ja->bucket_seq[ja->cur_idx] &&
            ja->sectors_free == ca->mi.bucket_size) {
+#if 0
+               /*
+                * Debug code for ZNS support, where we (probably) want to be
+                * correlated where we stopped in the journal to the zone write
+                * points:
+                */
                bch_err(c, "ja->sectors_free == ca->mi.bucket_size");
                bch_err(c, "cur_idx %u/%u", ja->cur_idx, ja->nr);
                for (i = 0; i < 3; i++) {
@@ -1086,6 +1092,7 @@ found:
 
                        bch_err(c, "bucket_seq[%u] = %llu", idx, ja->bucket_seq[idx]);
                }
+#endif
                ja->sectors_free = 0;
        }
 
index 9a584aa..e63c6ed 100644 (file)
@@ -50,16 +50,21 @@ unsigned bch2_journal_dev_buckets_available(struct journal *j,
        return available;
 }
 
-static void journal_set_remaining(struct journal *j, unsigned u64s_remaining)
+static inline void journal_set_watermark(struct journal *j, bool low_on_space)
 {
-       union journal_preres_state old, new;
-       u64 v = atomic64_read(&j->prereserved.counter);
+       unsigned watermark = BCH_WATERMARK_stripe;
 
-       do {
-               old.v = new.v = v;
-               new.remaining = u64s_remaining;
-       } while ((v = atomic64_cmpxchg(&j->prereserved.counter,
-                                      old.v, new.v)) != old.v);
+       if (low_on_space)
+               watermark = max_t(unsigned, watermark, BCH_WATERMARK_reclaim);
+       if (fifo_free(&j->pin) < j->pin.size / 4)
+               watermark = max_t(unsigned, watermark, BCH_WATERMARK_reclaim);
+
+       if (watermark == j->watermark)
+               return;
+
+       swap(watermark, j->watermark);
+       if (watermark > j->watermark)
+               journal_wake(j);
 }
 
 static struct journal_space
@@ -162,7 +167,6 @@ void bch2_journal_space_available(struct journal *j)
        struct bch_fs *c = container_of(j, struct bch_fs, journal);
        struct bch_dev *ca;
        unsigned clean, clean_ondisk, total;
-       s64 u64s_remaining = 0;
        unsigned max_entry_size  = min(j->buf[0].buf_size >> 9,
                                       j->buf[1].buf_size >> 9);
        unsigned i, nr_online = 0, nr_devs_want;
@@ -222,16 +226,10 @@ void bch2_journal_space_available(struct journal *j)
        else
                clear_bit(JOURNAL_MAY_SKIP_FLUSH, &j->flags);
 
-       u64s_remaining  = (u64) clean << 6;
-       u64s_remaining -= (u64) total << 3;
-       u64s_remaining = max(0LL, u64s_remaining);
-       u64s_remaining /= 4;
-       u64s_remaining = min_t(u64, u64s_remaining, U32_MAX);
+       journal_set_watermark(j, clean * 4 <= total);
 out:
        j->cur_entry_sectors    = !ret ? j->space[journal_space_discarded].next_entry : 0;
        j->cur_entry_error      = ret;
-       journal_set_remaining(j, u64s_remaining);
-       journal_set_watermark(j);
 
        if (!ret)
                journal_wake(j);
@@ -555,11 +553,6 @@ static u64 journal_seq_to_flush(struct journal *j)
                /* Try to keep the journal at most half full: */
                nr_buckets = ja->nr / 2;
 
-               /* And include pre-reservations: */
-               nr_buckets += DIV_ROUND_UP(j->prereserved.reserved,
-                                          (ca->mi.bucket_size << 6) -
-                                          journal_entry_overhead(j));
-
                nr_buckets = min(nr_buckets, ja->nr);
 
                bucket_to_flush = (ja->cur_idx + nr_buckets) % ja->nr;
@@ -638,10 +631,7 @@ static int __bch2_journal_reclaim(struct journal *j, bool direct, bool kicked)
                               msecs_to_jiffies(c->opts.journal_reclaim_delay)))
                        min_nr = 1;
 
-               if (j->prereserved.reserved * 4 > j->prereserved.remaining)
-                       min_nr = 1;
-
-               if (fifo_free(&j->pin) <= 32)
+               if (j->watermark != BCH_WATERMARK_stripe)
                        min_nr = 1;
 
                if (atomic_read(&c->btree_cache.dirty) * 2 > c->btree_cache.used)
@@ -652,8 +642,6 @@ static int __bch2_journal_reclaim(struct journal *j, bool direct, bool kicked)
                trace_and_count(c, journal_reclaim_start, c,
                                direct, kicked,
                                min_nr, min_key_cache,
-                               j->prereserved.reserved,
-                               j->prereserved.remaining,
                                atomic_read(&c->btree_cache.dirty),
                                c->btree_cache.used,
                                atomic_long_read(&c->btree_key_cache.nr_dirty),
index 42504e1..a756b69 100644 (file)
@@ -76,14 +76,6 @@ struct journal_res {
        u64                     seq;
 };
 
-/*
- * For reserving space in the journal prior to getting a reservation on a
- * particular journal entry:
- */
-struct journal_preres {
-       unsigned                u64s;
-};
-
 union journal_res_state {
        struct {
                atomic64_t      counter;
@@ -104,22 +96,6 @@ union journal_res_state {
        };
 };
 
-union journal_preres_state {
-       struct {
-               atomic64_t      counter;
-       };
-
-       struct {
-               u64             v;
-       };
-
-       struct {
-               u64             waiting:1,
-                               reserved:31,
-                               remaining:32;
-       };
-};
-
 /* bytes: */
 #define JOURNAL_ENTRY_SIZE_MIN         (64U << 10) /* 64k */
 #define JOURNAL_ENTRY_SIZE_MAX         (4U  << 20) /* 4M */
@@ -180,8 +156,6 @@ struct journal {
        union journal_res_state reservations;
        enum bch_watermark      watermark;
 
-       union journal_preres_state prereserved;
-
        } __aligned(SMP_CACHE_BYTES);
 
        unsigned long           flags;
index b775cf0..9779044 100644 (file)
@@ -163,8 +163,11 @@ static int __do_six_trylock(struct six_lock *lock, enum six_lock_type type,
                this_cpu_sub(*lock->readers, !ret);
                preempt_enable();
 
-               if (!ret && (old & SIX_LOCK_WAITING_write))
-                       ret = -1 - SIX_LOCK_write;
+               if (!ret) {
+                       smp_mb();
+                       if (atomic_read(&lock->state) & SIX_LOCK_WAITING_write)
+                               ret = -1 - SIX_LOCK_write;
+               }
        } else if (type == SIX_LOCK_write && lock->readers) {
                if (try) {
                        atomic_add(SIX_LOCK_HELD_write, &lock->state);
index 8683344..2d2e66a 100644 (file)
@@ -20,7 +20,7 @@ struct snapshot_t {
 };
 
 struct snapshot_table {
-       struct snapshot_t       s[0];
+       DECLARE_FLEX_ARRAY(struct snapshot_t, s);
 };
 
 typedef struct {
index 893304a..7857671 100644 (file)
@@ -196,10 +196,9 @@ DEFINE_EVENT(bio, journal_write,
 TRACE_EVENT(journal_reclaim_start,
        TP_PROTO(struct bch_fs *c, bool direct, bool kicked,
                 u64 min_nr, u64 min_key_cache,
-                u64 prereserved, u64 prereserved_total,
                 u64 btree_cache_dirty, u64 btree_cache_total,
                 u64 btree_key_cache_dirty, u64 btree_key_cache_total),
-       TP_ARGS(c, direct, kicked, min_nr, min_key_cache, prereserved, prereserved_total,
+       TP_ARGS(c, direct, kicked, min_nr, min_key_cache,
                btree_cache_dirty, btree_cache_total,
                btree_key_cache_dirty, btree_key_cache_total),
 
@@ -209,8 +208,6 @@ TRACE_EVENT(journal_reclaim_start,
                __field(bool,           kicked                  )
                __field(u64,            min_nr                  )
                __field(u64,            min_key_cache           )
-               __field(u64,            prereserved             )
-               __field(u64,            prereserved_total       )
                __field(u64,            btree_cache_dirty       )
                __field(u64,            btree_cache_total       )
                __field(u64,            btree_key_cache_dirty   )
@@ -223,22 +220,18 @@ TRACE_EVENT(journal_reclaim_start,
                __entry->kicked                 = kicked;
                __entry->min_nr                 = min_nr;
                __entry->min_key_cache          = min_key_cache;
-               __entry->prereserved            = prereserved;
-               __entry->prereserved_total      = prereserved_total;
                __entry->btree_cache_dirty      = btree_cache_dirty;
                __entry->btree_cache_total      = btree_cache_total;
                __entry->btree_key_cache_dirty  = btree_key_cache_dirty;
                __entry->btree_key_cache_total  = btree_key_cache_total;
        ),
 
-       TP_printk("%d,%d direct %u kicked %u min %llu key cache %llu prereserved %llu/%llu btree cache %llu/%llu key cache %llu/%llu",
+       TP_printk("%d,%d direct %u kicked %u min %llu key cache %llu btree cache %llu/%llu key cache %llu/%llu",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
                  __entry->direct,
                  __entry->kicked,
                  __entry->min_nr,
                  __entry->min_key_cache,
-                 __entry->prereserved,
-                 __entry->prereserved_total,
                  __entry->btree_cache_dirty,
                  __entry->btree_cache_total,
                  __entry->btree_key_cache_dirty,
index a39ff0c..79d9826 100644 (file)
@@ -552,6 +552,14 @@ static int bch2_xattr_bcachefs_set(const struct xattr_handler *handler,
                s.v = v + 1;
                s.defined = true;
        } else {
+               /*
+                * Check if this option was set on the parent - if so, switched
+                * back to inheriting from the parent:
+                *
+                * rename() also has to deal with keeping inherited options up
+                * to date - see bch2_reinherit_attrs()
+                */
+               spin_lock(&dentry->d_lock);
                if (!IS_ROOT(dentry)) {
                        struct bch_inode_info *dir =
                                to_bch_ei(d_inode(dentry->d_parent));
@@ -560,6 +568,7 @@ static int bch2_xattr_bcachefs_set(const struct xattr_handler *handler,
                } else {
                        s.v = 0;
                }
+               spin_unlock(&dentry->d_lock);
 
                s.defined = false;
        }
index 2a9344a..35c1d24 100644 (file)
@@ -432,7 +432,7 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
        if (btrfs_block_can_be_shared(trans, root, buf)) {
                ret = btrfs_lookup_extent_info(trans, fs_info, buf->start,
                                               btrfs_header_level(buf), 1,
-                                              &refs, &flags);
+                                              &refs, &flags, NULL);
                if (ret)
                        return ret;
                if (unlikely(refs == 0)) {
index 9223934..891ea2f 100644 (file)
@@ -1041,7 +1041,7 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
                return -ENOMEM;
        }
 
-       if (btrfs_qgroup_enabled(fs_info) && !generic_ref->skip_qgroup) {
+       if (btrfs_qgroup_full_accounting(fs_info) && !generic_ref->skip_qgroup) {
                record = kzalloc(sizeof(*record), GFP_NOFS);
                if (!record) {
                        kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
@@ -1144,7 +1144,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
                return -ENOMEM;
        }
 
-       if (btrfs_qgroup_enabled(fs_info) && !generic_ref->skip_qgroup) {
+       if (btrfs_qgroup_full_accounting(fs_info) && !generic_ref->skip_qgroup) {
                record = kzalloc(sizeof(*record), GFP_NOFS);
                if (!record) {
                        kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
index c8e5b47..0455935 100644 (file)
@@ -102,7 +102,8 @@ int btrfs_lookup_data_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len)
  */
 int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
                             struct btrfs_fs_info *fs_info, u64 bytenr,
-                            u64 offset, int metadata, u64 *refs, u64 *flags)
+                            u64 offset, int metadata, u64 *refs, u64 *flags,
+                            u64 *owning_root)
 {
        struct btrfs_root *extent_root;
        struct btrfs_delayed_ref_head *head;
@@ -114,6 +115,7 @@ int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
        u32 item_size;
        u64 num_refs;
        u64 extent_flags;
+       u64 owner = 0;
        int ret;
 
        /*
@@ -167,6 +169,8 @@ search_again:
                                            struct btrfs_extent_item);
                        num_refs = btrfs_extent_refs(leaf, ei);
                        extent_flags = btrfs_extent_flags(leaf, ei);
+                       owner = btrfs_get_extent_owner_root(fs_info, leaf,
+                                                           path->slots[0]);
                } else {
                        ret = -EUCLEAN;
                        btrfs_err(fs_info,
@@ -226,6 +230,8 @@ out:
                *refs = num_refs;
        if (flags)
                *flags = extent_flags;
+       if (owning_root)
+               *owning_root = owner;
 out_free:
        btrfs_free_path(path);
        return ret;
@@ -5234,7 +5240,7 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
                /* We don't lock the tree block, it's OK to be racy here */
                ret = btrfs_lookup_extent_info(trans, fs_info, bytenr,
                                               wc->level - 1, 1, &refs,
-                                              &flags);
+                                              &flags, NULL);
                /* We don't care about errors in readahead. */
                if (ret < 0)
                        continue;
@@ -5301,7 +5307,8 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
                ret = btrfs_lookup_extent_info(trans, fs_info,
                                               eb->start, level, 1,
                                               &wc->refs[level],
-                                              &wc->flags[level]);
+                                              &wc->flags[level],
+                                              NULL);
                BUG_ON(ret == -ENOMEM);
                if (ret)
                        return ret;
@@ -5391,6 +5398,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
        u64 bytenr;
        u64 generation;
        u64 parent;
+       u64 owner_root = 0;
        struct btrfs_tree_parent_check check = { 0 };
        struct btrfs_key key;
        struct btrfs_ref ref = { 0 };
@@ -5434,7 +5442,8 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
 
        ret = btrfs_lookup_extent_info(trans, fs_info, bytenr, level - 1, 1,
                                       &wc->refs[level - 1],
-                                      &wc->flags[level - 1]);
+                                      &wc->flags[level - 1],
+                                      &owner_root);
        if (ret < 0)
                goto out_unlock;
 
@@ -5567,8 +5576,7 @@ skip:
                find_next_key(path, level, &wc->drop_progress);
 
                btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, bytenr,
-                                      fs_info->nodesize, parent,
-                                      btrfs_header_owner(next));
+                                      fs_info->nodesize, parent, owner_root);
                btrfs_init_tree_ref(&ref, level - 1, root->root_key.objectid,
                                    0, false);
                ret = btrfs_free_extent(trans, &ref);
@@ -5635,7 +5643,8 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
                        ret = btrfs_lookup_extent_info(trans, fs_info,
                                                       eb->start, level, 1,
                                                       &wc->refs[level],
-                                                      &wc->flags[level]);
+                                                      &wc->flags[level],
+                                                      NULL);
                        if (ret < 0) {
                                btrfs_tree_unlock_rw(eb, path->locks[level]);
                                path->locks[level] = 0;
@@ -5880,7 +5889,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc)
                        ret = btrfs_lookup_extent_info(trans, fs_info,
                                                path->nodes[level]->start,
                                                level, 1, &wc->refs[level],
-                                               &wc->flags[level]);
+                                               &wc->flags[level], NULL);
                        if (ret < 0) {
                                err = ret;
                                goto out_end_trans;
index 0716f65..2e06603 100644 (file)
@@ -99,7 +99,8 @@ u64 btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info,
 int btrfs_lookup_data_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len);
 int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
                             struct btrfs_fs_info *fs_info, u64 bytenr,
-                            u64 offset, int metadata, u64 *refs, u64 *flags);
+                            u64 offset, int metadata, u64 *refs, u64 *flags,
+                            u64 *owner_root);
 int btrfs_pin_extent(struct btrfs_trans_handle *trans, u64 bytenr, u64 num,
                     int reserved);
 int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans,
index 5e3fccd..9f5a989 100644 (file)
@@ -6983,8 +6983,15 @@ static struct extent_map *btrfs_new_extent_direct(struct btrfs_inode *inode,
        int ret;
 
        alloc_hint = get_extent_allocation_hint(inode, start, len);
+again:
        ret = btrfs_reserve_extent(root, len, len, fs_info->sectorsize,
                                   0, alloc_hint, &ins, 1, 1);
+       if (ret == -EAGAIN) {
+               ASSERT(btrfs_is_zoned(fs_info));
+               wait_on_bit_io(&inode->root->fs_info->flags, BTRFS_FS_NEED_ZONE_FINISH,
+                              TASK_UNINTERRUPTIBLE);
+               goto again;
+       }
        if (ret)
                return ERR_PTR(ret);
 
index 752acff..dfe257e 100644 (file)
@@ -1528,7 +1528,7 @@ static noinline int key_in_sk(struct btrfs_key *key,
 static noinline int copy_to_sk(struct btrfs_path *path,
                               struct btrfs_key *key,
                               struct btrfs_ioctl_search_key *sk,
-                              size_t *buf_size,
+                              u64 *buf_size,
                               char __user *ubuf,
                               unsigned long *sk_offset,
                               int *num_found)
@@ -1660,7 +1660,7 @@ out:
 
 static noinline int search_ioctl(struct inode *inode,
                                 struct btrfs_ioctl_search_key *sk,
-                                size_t *buf_size,
+                                u64 *buf_size,
                                 char __user *ubuf)
 {
        struct btrfs_fs_info *info = btrfs_sb(inode->i_sb);
@@ -1733,7 +1733,7 @@ static noinline int btrfs_ioctl_tree_search(struct inode *inode,
        struct btrfs_ioctl_search_args __user *uargs = argp;
        struct btrfs_ioctl_search_key sk;
        int ret;
-       size_t buf_size;
+       u64 buf_size;
 
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
@@ -1763,8 +1763,8 @@ static noinline int btrfs_ioctl_tree_search_v2(struct inode *inode,
        struct btrfs_ioctl_search_args_v2 __user *uarg = argp;
        struct btrfs_ioctl_search_args_v2 args;
        int ret;
-       size_t buf_size;
-       const size_t buf_limit = SZ_16M;
+       u64 buf_size;
+       const u64 buf_limit = SZ_16M;
 
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
index edb84cc..ce446d9 100644 (file)
@@ -1888,7 +1888,7 @@ int btrfs_qgroup_trace_extent_nolock(struct btrfs_fs_info *fs_info,
        u64 bytenr = record->bytenr;
 
        if (!btrfs_qgroup_full_accounting(fs_info))
-               return 0;
+               return 1;
 
        lockdep_assert_held(&delayed_refs->lock);
        trace_btrfs_qgroup_trace_extent(fs_info, record);
@@ -2874,13 +2874,19 @@ int btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, u64 bytenr,
        qgroup_update_counters(fs_info, &qgroups, nr_old_roots, nr_new_roots,
                               num_bytes, seq);
 
+       /*
+        * We're done using the iterator, release all its qgroups while holding
+        * fs_info->qgroup_lock so that we don't race with btrfs_remove_qgroup()
+        * and trigger use-after-free accesses to qgroups.
+        */
+       qgroup_iterator_nested_clean(&qgroups);
+
        /*
         * Bump qgroup_seq to avoid seq overlap
         */
        fs_info->qgroup_seq += max(nr_old_roots, nr_new_roots) + 1;
        spin_unlock(&fs_info->qgroup_lock);
 out_free:
-       qgroup_iterator_nested_clean(&qgroups);
        ulist_free(old_roots);
        ulist_free(new_roots);
        return ret;
index 944e8f1..9589362 100644 (file)
@@ -145,7 +145,7 @@ int btrfs_insert_raid_extent(struct btrfs_trans_handle *trans,
                btrfs_put_bioc(bioc);
        }
 
-       return ret;
+       return 0;
 }
 
 int btrfs_get_raid_extent_offset(struct btrfs_fs_info *fs_info,
index 9ce5be2..f62a408 100644 (file)
@@ -1868,6 +1868,9 @@ static int queue_scrub_stripe(struct scrub_ctx *sctx, struct btrfs_block_group *
         */
        ASSERT(sctx->cur_stripe < SCRUB_TOTAL_STRIPES);
 
+       /* @found_logical_ret must be specified. */
+       ASSERT(found_logical_ret);
+
        stripe = &sctx->stripes[sctx->cur_stripe];
        scrub_reset_stripe(stripe);
        ret = scrub_find_fill_first_stripe(bg, &sctx->extent_path,
@@ -1876,8 +1879,7 @@ static int queue_scrub_stripe(struct scrub_ctx *sctx, struct btrfs_block_group *
        /* Either >0 as no more extents or <0 for error. */
        if (ret)
                return ret;
-       if (found_logical_ret)
-               *found_logical_ret = stripe->logical;
+       *found_logical_ret = stripe->logical;
        sctx->cur_stripe++;
 
        /* We filled one group, submit it. */
@@ -2080,7 +2082,7 @@ static int scrub_simple_mirror(struct scrub_ctx *sctx,
 
        /* Go through each extent items inside the logical range */
        while (cur_logical < logical_end) {
-               u64 found_logical;
+               u64 found_logical = U64_MAX;
                u64 cur_physical = physical + cur_logical - logical_start;
 
                /* Canceled? */
@@ -2115,6 +2117,8 @@ static int scrub_simple_mirror(struct scrub_ctx *sctx,
                if (ret < 0)
                        break;
 
+               /* queue_scrub_stripe() returned 0, @found_logical must be updated. */
+               ASSERT(found_logical != U64_MAX);
                cur_logical = found_logical + BTRFS_STRIPE_LEN;
 
                /* Don't hold CPU for too long time */
index c87e188..c6f1662 100644 (file)
@@ -748,13 +748,13 @@ static noinline struct btrfs_device *device_list_add(const char *path,
 
        if (!fs_devices) {
                fs_devices = alloc_fs_devices(disk_super->fsid);
+               if (IS_ERR(fs_devices))
+                       return ERR_CAST(fs_devices);
+
                if (has_metadata_uuid)
                        memcpy(fs_devices->metadata_uuid,
                               disk_super->metadata_uuid, BTRFS_FSID_SIZE);
 
-               if (IS_ERR(fs_devices))
-                       return ERR_CAST(fs_devices);
-
                if (same_fsid_diff_dev) {
                        generate_random_uuid(fs_devices->fsid);
                        fs_devices->temp_fsid = true;
index 3504ade..188378c 100644 (file)
@@ -1661,13 +1661,6 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
        }
 
 out:
-       if (cache->alloc_offset > fs_info->zone_size) {
-               btrfs_err(fs_info,
-                       "zoned: invalid write pointer %llu in block group %llu",
-                       cache->alloc_offset, cache->start);
-               ret = -EIO;
-       }
-
        if (cache->alloc_offset > cache->zone_capacity) {
                btrfs_err(fs_info,
 "zoned: invalid write pointer %llu (larger than zone capacity %llu) in block group %llu",
index a25dd3d..b0e8774 100644 (file)
@@ -998,6 +998,14 @@ static int ecryptfs_getattr_link(struct mnt_idmap *idmap,
        return rc;
 }
 
+static int ecryptfs_do_getattr(const struct path *path, struct kstat *stat,
+                              u32 request_mask, unsigned int flags)
+{
+       if (flags & AT_GETATTR_NOSEC)
+               return vfs_getattr_nosec(path, stat, request_mask, flags);
+       return vfs_getattr(path, stat, request_mask, flags);
+}
+
 static int ecryptfs_getattr(struct mnt_idmap *idmap,
                            const struct path *path, struct kstat *stat,
                            u32 request_mask, unsigned int flags)
@@ -1006,8 +1014,8 @@ static int ecryptfs_getattr(struct mnt_idmap *idmap,
        struct kstat lower_stat;
        int rc;
 
-       rc = vfs_getattr(ecryptfs_dentry_to_lower_path(dentry), &lower_stat,
-                        request_mask, flags);
+       rc = ecryptfs_do_getattr(ecryptfs_dentry_to_lower_path(dentry),
+                                &lower_stat, request_mask, flags);
        if (!rc) {
                fsstack_copy_attr_all(d_inode(dentry),
                                      ecryptfs_inode_to_lower(d_inode(dentry)));
index e540648..1d318f8 100644 (file)
@@ -21,7 +21,7 @@ config EROFS_FS
          performance under extremely memory pressure without extra cost.
 
          See the documentation at <file:Documentation/filesystems/erofs.rst>
-         for more details.
+         and the web pages at <https://erofs.docs.kernel.org> for more details.
 
          If unsure, say N.
 
index 029c761..c98aeda 100644 (file)
@@ -220,7 +220,7 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
                        up_read(&devs->rwsem);
                        return 0;
                }
-               map->m_bdev = dif->bdev_handle->bdev;
+               map->m_bdev = dif->bdev_handle ? dif->bdev_handle->bdev : NULL;
                map->m_daxdev = dif->dax_dev;
                map->m_dax_part_off = dif->dax_part_off;
                map->m_fscache = dif->fscache;
@@ -238,7 +238,8 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
                        if (map->m_pa >= startoff &&
                            map->m_pa < startoff + length) {
                                map->m_pa -= startoff;
-                               map->m_bdev = dif->bdev_handle->bdev;
+                               map->m_bdev = dif->bdev_handle ?
+                                             dif->bdev_handle->bdev : NULL;
                                map->m_daxdev = dif->dax_dev;
                                map->m_dax_part_off = dif->dax_part_off;
                                map->m_fscache = dif->fscache;
index b8ad05b..14a79d3 100644 (file)
@@ -15,11 +15,11 @@ static void *erofs_read_inode(struct erofs_buf *buf,
        struct erofs_sb_info *sbi = EROFS_SB(sb);
        struct erofs_inode *vi = EROFS_I(inode);
        const erofs_off_t inode_loc = erofs_iloc(inode);
-
        erofs_blk_t blkaddr, nblks = 0;
        void *kaddr;
        struct erofs_inode_compact *dic;
        struct erofs_inode_extended *die, *copied = NULL;
+       union erofs_inode_i_u iu;
        unsigned int ifmt;
        int err;
 
@@ -35,9 +35,8 @@ static void *erofs_read_inode(struct erofs_buf *buf,
 
        dic = kaddr + *ofs;
        ifmt = le16_to_cpu(dic->i_format);
-
        if (ifmt & ~EROFS_I_ALL) {
-               erofs_err(inode->i_sb, "unsupported i_format %u of nid %llu",
+               erofs_err(sb, "unsupported i_format %u of nid %llu",
                          ifmt, vi->nid);
                err = -EOPNOTSUPP;
                goto err_out;
@@ -45,7 +44,7 @@ static void *erofs_read_inode(struct erofs_buf *buf,
 
        vi->datalayout = erofs_inode_datalayout(ifmt);
        if (vi->datalayout >= EROFS_INODE_DATALAYOUT_MAX) {
-               erofs_err(inode->i_sb, "unsupported datalayout %u of nid %llu",
+               erofs_err(sb, "unsupported datalayout %u of nid %llu",
                          vi->datalayout, vi->nid);
                err = -EOPNOTSUPP;
                goto err_out;
@@ -82,40 +81,15 @@ static void *erofs_read_inode(struct erofs_buf *buf,
                vi->xattr_isize = erofs_xattr_ibody_size(die->i_xattr_icount);
 
                inode->i_mode = le16_to_cpu(die->i_mode);
-               switch (inode->i_mode & S_IFMT) {
-               case S_IFREG:
-               case S_IFDIR:
-               case S_IFLNK:
-                       vi->raw_blkaddr = le32_to_cpu(die->i_u.raw_blkaddr);
-                       break;
-               case S_IFCHR:
-               case S_IFBLK:
-                       inode->i_rdev =
-                               new_decode_dev(le32_to_cpu(die->i_u.rdev));
-                       break;
-               case S_IFIFO:
-               case S_IFSOCK:
-                       inode->i_rdev = 0;
-                       break;
-               default:
-                       goto bogusimode;
-               }
+               iu = die->i_u;
                i_uid_write(inode, le32_to_cpu(die->i_uid));
                i_gid_write(inode, le32_to_cpu(die->i_gid));
                set_nlink(inode, le32_to_cpu(die->i_nlink));
-
-               /* extended inode has its own timestamp */
+               /* each extended inode has its own timestamp */
                inode_set_ctime(inode, le64_to_cpu(die->i_mtime),
                                le32_to_cpu(die->i_mtime_nsec));
 
                inode->i_size = le64_to_cpu(die->i_size);
-
-               /* total blocks for compressed files */
-               if (erofs_inode_is_data_compressed(vi->datalayout))
-                       nblks = le32_to_cpu(die->i_u.compressed_blocks);
-               else if (vi->datalayout == EROFS_INODE_CHUNK_BASED)
-                       /* fill chunked inode summary info */
-                       vi->chunkformat = le16_to_cpu(die->i_u.c.format);
                kfree(copied);
                copied = NULL;
                break;
@@ -125,49 +99,51 @@ static void *erofs_read_inode(struct erofs_buf *buf,
                vi->xattr_isize = erofs_xattr_ibody_size(dic->i_xattr_icount);
 
                inode->i_mode = le16_to_cpu(dic->i_mode);
-               switch (inode->i_mode & S_IFMT) {
-               case S_IFREG:
-               case S_IFDIR:
-               case S_IFLNK:
-                       vi->raw_blkaddr = le32_to_cpu(dic->i_u.raw_blkaddr);
-                       break;
-               case S_IFCHR:
-               case S_IFBLK:
-                       inode->i_rdev =
-                               new_decode_dev(le32_to_cpu(dic->i_u.rdev));
-                       break;
-               case S_IFIFO:
-               case S_IFSOCK:
-                       inode->i_rdev = 0;
-                       break;
-               default:
-                       goto bogusimode;
-               }
+               iu = dic->i_u;
                i_uid_write(inode, le16_to_cpu(dic->i_uid));
                i_gid_write(inode, le16_to_cpu(dic->i_gid));
                set_nlink(inode, le16_to_cpu(dic->i_nlink));
-
                /* use build time for compact inodes */
                inode_set_ctime(inode, sbi->build_time, sbi->build_time_nsec);
 
                inode->i_size = le32_to_cpu(dic->i_size);
-               if (erofs_inode_is_data_compressed(vi->datalayout))
-                       nblks = le32_to_cpu(dic->i_u.compressed_blocks);
-               else if (vi->datalayout == EROFS_INODE_CHUNK_BASED)
-                       vi->chunkformat = le16_to_cpu(dic->i_u.c.format);
                break;
        default:
-               erofs_err(inode->i_sb,
-                         "unsupported on-disk inode version %u of nid %llu",
+               erofs_err(sb, "unsupported on-disk inode version %u of nid %llu",
                          erofs_inode_version(ifmt), vi->nid);
                err = -EOPNOTSUPP;
                goto err_out;
        }
 
-       if (vi->datalayout == EROFS_INODE_CHUNK_BASED) {
+       switch (inode->i_mode & S_IFMT) {
+       case S_IFREG:
+       case S_IFDIR:
+       case S_IFLNK:
+               vi->raw_blkaddr = le32_to_cpu(iu.raw_blkaddr);
+               break;
+       case S_IFCHR:
+       case S_IFBLK:
+               inode->i_rdev = new_decode_dev(le32_to_cpu(iu.rdev));
+               break;
+       case S_IFIFO:
+       case S_IFSOCK:
+               inode->i_rdev = 0;
+               break;
+       default:
+               erofs_err(sb, "bogus i_mode (%o) @ nid %llu", inode->i_mode,
+                         vi->nid);
+               err = -EFSCORRUPTED;
+               goto err_out;
+       }
+
+       /* total blocks for compressed files */
+       if (erofs_inode_is_data_compressed(vi->datalayout)) {
+               nblks = le32_to_cpu(iu.compressed_blocks);
+       } else if (vi->datalayout == EROFS_INODE_CHUNK_BASED) {
+               /* fill chunked inode summary info */
+               vi->chunkformat = le16_to_cpu(iu.c.format);
                if (vi->chunkformat & ~EROFS_CHUNK_FORMAT_ALL) {
-                       erofs_err(inode->i_sb,
-                                 "unsupported chunk format %x of nid %llu",
+                       erofs_err(sb, "unsupported chunk format %x of nid %llu",
                                  vi->chunkformat, vi->nid);
                        err = -EOPNOTSUPP;
                        goto err_out;
@@ -191,10 +167,6 @@ static void *erofs_read_inode(struct erofs_buf *buf,
                inode->i_blocks = nblks << (sb->s_blocksize_bits - 9);
        return kaddr;
 
-bogusimode:
-       erofs_err(inode->i_sb, "bogus i_mode (%o) @ nid %llu",
-                 inode->i_mode, vi->nid);
-       err = -EFSCORRUPTED;
 err_out:
        DBG_BUGON(1);
        kfree(copied);
index edcd8a6..f238d98 100644 (file)
@@ -215,6 +215,8 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
        lockdep_set_class_and_name(&mapping->invalidate_lock,
                                   &sb->s_type->invalidate_lock_key,
                                   "mapping.invalidate_lock");
+       if (sb->s_iflags & SB_I_STABLE_WRITES)
+               mapping_set_stable_writes(mapping);
        inode->i_private = NULL;
        inode->i_mapping = mapping;
        INIT_HLIST_HEAD(&inode->i_dentry);      /* buggered by rcu freeing */
index e9440d5..c2aa6fd 100644 (file)
@@ -399,6 +399,8 @@ static loff_t offset_dir_llseek(struct file *file, loff_t offset, int whence)
                return -EINVAL;
        }
 
+       /* In this case, ->private_data is protected by f_pos_lock */
+       file->private_data = NULL;
        return vfs_setpos(file, offset, U32_MAX);
 }
 
@@ -428,7 +430,7 @@ static bool offset_dir_emit(struct dir_context *ctx, struct dentry *dentry)
                          inode->i_ino, fs_umode_to_dtype(inode->i_mode));
 }
 
-static void offset_iterate_dir(struct inode *inode, struct dir_context *ctx)
+static void *offset_iterate_dir(struct inode *inode, struct dir_context *ctx)
 {
        struct offset_ctx *so_ctx = inode->i_op->get_offset_ctx(inode);
        XA_STATE(xas, &so_ctx->xa, ctx->pos);
@@ -437,7 +439,7 @@ static void offset_iterate_dir(struct inode *inode, struct dir_context *ctx)
        while (true) {
                dentry = offset_find_next(&xas);
                if (!dentry)
-                       break;
+                       return ERR_PTR(-ENOENT);
 
                if (!offset_dir_emit(ctx, dentry)) {
                        dput(dentry);
@@ -447,6 +449,7 @@ static void offset_iterate_dir(struct inode *inode, struct dir_context *ctx)
                dput(dentry);
                ctx->pos = xas.xa_index + 1;
        }
+       return NULL;
 }
 
 /**
@@ -479,7 +482,12 @@ static int offset_readdir(struct file *file, struct dir_context *ctx)
        if (!dir_emit_dots(file, ctx))
                return 0;
 
-       offset_iterate_dir(d_inode(dir), ctx);
+       /* In this case, ->private_data is protected by f_pos_lock */
+       if (ctx->pos == 2)
+               file->private_data = NULL;
+       else if (file->private_data == ERR_PTR(-ENOENT))
+               return 0;
+       file->private_data = offset_iterate_dir(d_inode(dir), ctx);
        return 0;
 }
 
index 929248c..4cbe043 100644 (file)
@@ -84,8 +84,8 @@ int   nfsd_net_reply_cache_init(struct nfsd_net *nn);
 void   nfsd_net_reply_cache_destroy(struct nfsd_net *nn);
 int    nfsd_reply_cache_init(struct nfsd_net *);
 void   nfsd_reply_cache_shutdown(struct nfsd_net *);
-int    nfsd_cache_lookup(struct svc_rqst *rqstp,
-                         struct nfsd_cacherep **cacherep);
+int    nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start,
+                         unsigned int len, struct nfsd_cacherep **cacherep);
 void   nfsd_cache_update(struct svc_rqst *rqstp, struct nfsd_cacherep *rp,
                          int cachetype, __be32 *statp);
 int    nfsd_reply_cache_stats_show(struct seq_file *m, void *v);
index 4045c85..4041592 100644 (file)
@@ -2804,7 +2804,7 @@ static int client_opens_release(struct inode *inode, struct file *file)
 
        /* XXX: alternatively, we could get/drop in seq start/stop */
        drop_client(clp);
-       return 0;
+       return seq_release(inode, file);
 }
 
 static const struct file_operations client_states_fops = {
index fd56a52..d3273a3 100644 (file)
@@ -369,33 +369,52 @@ nfsd_reply_cache_scan(struct shrinker *shrink, struct shrink_control *sc)
        return freed;
 }
 
-/*
- * Walk an xdr_buf and get a CRC for at most the first RC_CSUMLEN bytes
+/**
+ * nfsd_cache_csum - Checksum incoming NFS Call arguments
+ * @buf: buffer containing a whole RPC Call message
+ * @start: starting byte of the NFS Call header
+ * @remaining: size of the NFS Call header, in bytes
+ *
+ * Compute a weak checksum of the leading bytes of an NFS procedure
+ * call header to help verify that a retransmitted Call matches an
+ * entry in the duplicate reply cache.
+ *
+ * To avoid assumptions about how the RPC message is laid out in
+ * @buf and what else it might contain (eg, a GSS MIC suffix), the
+ * caller passes us the exact location and length of the NFS Call
+ * header.
+ *
+ * Returns a 32-bit checksum value, as defined in RFC 793.
  */
-static __wsum
-nfsd_cache_csum(struct svc_rqst *rqstp)
+static __wsum nfsd_cache_csum(struct xdr_buf *buf, unsigned int start,
+                             unsigned int remaining)
 {
+       unsigned int base, len;
+       struct xdr_buf subbuf;
+       __wsum csum = 0;
+       void *p;
        int idx;
-       unsigned int base;
-       __wsum csum;
-       struct xdr_buf *buf = &rqstp->rq_arg;
-       const unsigned char *p = buf->head[0].iov_base;
-       size_t csum_len = min_t(size_t, buf->head[0].iov_len + buf->page_len,
-                               RC_CSUMLEN);
-       size_t len = min(buf->head[0].iov_len, csum_len);
+
+       if (remaining > RC_CSUMLEN)
+               remaining = RC_CSUMLEN;
+       if (xdr_buf_subsegment(buf, &subbuf, start, remaining))
+               return csum;
 
        /* rq_arg.head first */
-       csum = csum_partial(p, len, 0);
-       csum_len -= len;
+       if (subbuf.head[0].iov_len) {
+               len = min_t(unsigned int, subbuf.head[0].iov_len, remaining);
+               csum = csum_partial(subbuf.head[0].iov_base, len, csum);
+               remaining -= len;
+       }
 
        /* Continue into page array */
-       idx = buf->page_base / PAGE_SIZE;
-       base = buf->page_base & ~PAGE_MASK;
-       while (csum_len) {
-               p = page_address(buf->pages[idx]) + base;
-               len = min_t(size_t, PAGE_SIZE - base, csum_len);
+       idx = subbuf.page_base / PAGE_SIZE;
+       base = subbuf.page_base & ~PAGE_MASK;
+       while (remaining) {
+               p = page_address(subbuf.pages[idx]) + base;
+               len = min_t(unsigned int, PAGE_SIZE - base, remaining);
                csum = csum_partial(p, len, csum);
-               csum_len -= len;
+               remaining -= len;
                base = 0;
                ++idx;
        }
@@ -466,6 +485,8 @@ out:
 /**
  * nfsd_cache_lookup - Find an entry in the duplicate reply cache
  * @rqstp: Incoming Call to find
+ * @start: starting byte in @rqstp->rq_arg of the NFS Call header
+ * @len: size of the NFS Call header, in bytes
  * @cacherep: OUT: DRC entry for this request
  *
  * Try to find an entry matching the current call in the cache. When none
@@ -479,7 +500,8 @@ out:
  *   %RC_REPLY: Reply from cache
  *   %RC_DROPIT: Do not process the request further
  */
-int nfsd_cache_lookup(struct svc_rqst *rqstp, struct nfsd_cacherep **cacherep)
+int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start,
+                     unsigned int len, struct nfsd_cacherep **cacherep)
 {
        struct nfsd_net         *nn;
        struct nfsd_cacherep    *rp, *found;
@@ -495,7 +517,7 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp, struct nfsd_cacherep **cacherep)
                goto out;
        }
 
-       csum = nfsd_cache_csum(rqstp);
+       csum = nfsd_cache_csum(&rqstp->rq_arg, start, len);
 
        /*
         * Since the common case is a cache miss followed by an insert,
@@ -641,24 +663,17 @@ void nfsd_cache_update(struct svc_rqst *rqstp, struct nfsd_cacherep *rp,
        return;
 }
 
-/*
- * Copy cached reply to current reply buffer. Should always fit.
- * FIXME as reply is in a page, we should just attach the page, and
- * keep a refcount....
- */
 static int
 nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *data)
 {
-       struct kvec     *vec = &rqstp->rq_res.head[0];
-
-       if (vec->iov_len + data->iov_len > PAGE_SIZE) {
-               printk(KERN_WARNING "nfsd: cached reply too large (%zd).\n",
-                               data->iov_len);
-               return 0;
-       }
-       memcpy((char*)vec->iov_base + vec->iov_len, data->iov_base, data->iov_len);
-       vec->iov_len += data->iov_len;
-       return 1;
+       __be32 *p;
+
+       p = xdr_reserve_space(&rqstp->rq_res_stream, data->iov_len);
+       if (unlikely(!p))
+               return false;
+       memcpy(p, data->iov_base, data->iov_len);
+       xdr_commit_encode(&rqstp->rq_res_stream);
+       return true;
 }
 
 /*
index d6122bb..fe61d9b 100644 (file)
@@ -981,6 +981,8 @@ int nfsd_dispatch(struct svc_rqst *rqstp)
        const struct svc_procedure *proc = rqstp->rq_procinfo;
        __be32 *statp = rqstp->rq_accept_statp;
        struct nfsd_cacherep *rp;
+       unsigned int start, len;
+       __be32 *nfs_reply;
 
        /*
         * Give the xdr decoder a chance to change this if it wants
@@ -988,6 +990,13 @@ int nfsd_dispatch(struct svc_rqst *rqstp)
         */
        rqstp->rq_cachetype = proc->pc_cachetype;
 
+       /*
+        * ->pc_decode advances the argument stream past the NFS
+        * Call header, so grab the header's starting location and
+        * size now for the call to nfsd_cache_lookup().
+        */
+       start = xdr_stream_pos(&rqstp->rq_arg_stream);
+       len = xdr_stream_remaining(&rqstp->rq_arg_stream);
        if (!proc->pc_decode(rqstp, &rqstp->rq_arg_stream))
                goto out_decode_err;
 
@@ -1001,7 +1010,7 @@ int nfsd_dispatch(struct svc_rqst *rqstp)
        smp_store_release(&rqstp->rq_status_counter, rqstp->rq_status_counter | 1);
 
        rp = NULL;
-       switch (nfsd_cache_lookup(rqstp, &rp)) {
+       switch (nfsd_cache_lookup(rqstp, start, len, &rp)) {
        case RC_DOIT:
                break;
        case RC_REPLY:
@@ -1010,6 +1019,7 @@ int nfsd_dispatch(struct svc_rqst *rqstp)
                goto out_dropit;
        }
 
+       nfs_reply = xdr_inline_decode(&rqstp->rq_res_stream, 0);
        *statp = proc->pc_func(rqstp);
        if (test_bit(RQ_DROPME, &rqstp->rq_flags))
                goto out_update_drop;
@@ -1023,7 +1033,7 @@ int nfsd_dispatch(struct svc_rqst *rqstp)
         */
        smp_store_release(&rqstp->rq_status_counter, rqstp->rq_status_counter + 1);
 
-       nfsd_cache_update(rqstp, rp, rqstp->rq_cachetype, statp + 1);
+       nfsd_cache_update(rqstp, rp, rqstp->rq_cachetype, nfs_reply);
 out_cached_reply:
        return 1;
 
index 345b8f1..c63b31a 100644 (file)
@@ -171,7 +171,7 @@ int ovl_getattr(struct mnt_idmap *idmap, const struct path *path,
 
        type = ovl_path_real(dentry, &realpath);
        old_cred = ovl_override_creds(dentry->d_sb);
-       err = vfs_getattr(&realpath, stat, request_mask, flags);
+       err = ovl_do_getattr(&realpath, stat, request_mask, flags);
        if (err)
                goto out;
 
@@ -196,8 +196,8 @@ int ovl_getattr(struct mnt_idmap *idmap, const struct path *path,
                                        (!is_dir ? STATX_NLINK : 0);
 
                        ovl_path_lower(dentry, &realpath);
-                       err = vfs_getattr(&realpath, &lowerstat,
-                                         lowermask, flags);
+                       err = ovl_do_getattr(&realpath, &lowerstat, lowermask,
+                                            flags);
                        if (err)
                                goto out;
 
@@ -249,8 +249,8 @@ int ovl_getattr(struct mnt_idmap *idmap, const struct path *path,
 
                        ovl_path_lowerdata(dentry, &realpath);
                        if (realpath.dentry) {
-                               err = vfs_getattr(&realpath, &lowerdatastat,
-                                                 lowermask, flags);
+                               err = ovl_do_getattr(&realpath, &lowerdatastat,
+                                                    lowermask, flags);
                                if (err)
                                        goto out;
                        } else {
index ca88b26..05c3dd5 100644 (file)
@@ -408,6 +408,14 @@ static inline bool ovl_open_flags_need_copy_up(int flags)
        return ((OPEN_FMODE(flags) & FMODE_WRITE) || (flags & O_TRUNC));
 }
 
+static inline int ovl_do_getattr(const struct path *path, struct kstat *stat,
+                                u32 request_mask, unsigned int flags)
+{
+       if (flags & AT_GETATTR_NOSEC)
+               return vfs_getattr_nosec(path, stat, request_mask, flags);
+       return vfs_getattr(path, stat, request_mask, flags);
+}
+
 /* util.c */
 int ovl_get_write_access(struct dentry *dentry);
 void ovl_put_write_access(struct dentry *dentry);
index ddab9ea..3fe2dde 100644 (file)
@@ -430,7 +430,7 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc)
        struct ovl_fs_context *ctx = fc->fs_private;
        struct ovl_fs_context_layer *l;
        char *dup = NULL, *iter;
-       ssize_t nr_lower = 0, nr = 0, nr_data = 0;
+       ssize_t nr_lower, nr;
        bool data_layer = false;
 
        /*
@@ -482,6 +482,7 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc)
        iter = dup;
        l = ctx->lower;
        for (nr = 0; nr < nr_lower; nr++, l++) {
+               ctx->nr++;
                memset(l, 0, sizeof(*l));
 
                err = ovl_mount_dir(iter, &l->path);
@@ -498,10 +499,10 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc)
                        goto out_put;
 
                if (data_layer)
-                       nr_data++;
+                       ctx->nr_data++;
 
                /* Calling strchr() again would overrun. */
-               if ((nr + 1) == nr_lower)
+               if (ctx->nr == nr_lower)
                        break;
 
                err = -EINVAL;
@@ -511,7 +512,7 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc)
                         * This is a regular layer so we require that
                         * there are no data layers.
                         */
-                       if ((ctx->nr_data + nr_data) > 0) {
+                       if (ctx->nr_data > 0) {
                                pr_err("regular lower layers cannot follow data lower layers");
                                goto out_put;
                        }
@@ -524,8 +525,6 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc)
                data_layer = true;
                iter++;
        }
-       ctx->nr = nr_lower;
-       ctx->nr_data += nr_data;
        kfree(dup);
        return 0;
 
index 50a201e..c3f020c 100644 (file)
@@ -978,7 +978,7 @@ int ovl_set_protattr(struct inode *inode, struct dentry *upper,
        return 0;
 }
 
-/**
+/*
  * Caller must hold a reference to inode to prevent it from being freed while
  * it is marked inuse.
  */
index 6f3285f..af7849e 100644 (file)
@@ -64,8 +64,8 @@ struct key_type cifs_spnego_key_type = {
  * strlen(";sec=ntlmsspi") */
 #define MAX_MECH_STR_LEN       13
 
-/* strlen of "host=" */
-#define HOST_KEY_LEN           5
+/* strlen of ";host=" */
+#define HOST_KEY_LEN           6
 
 /* strlen of ";ip4=" or ";ip6=" */
 #define IP_KEY_LEN             5
index 6ffbd81..7558167 100644 (file)
@@ -191,7 +191,13 @@ struct cifs_open_info_data {
                bool reparse_point;
                bool symlink;
        };
-       __u32 reparse_tag;
+       struct {
+               __u32 tag;
+               union {
+                       struct reparse_data_buffer *buf;
+                       struct reparse_posix_data *posix;
+               };
+       } reparse;
        char *symlink_target;
        union {
                struct smb2_file_all_info fi;
@@ -395,8 +401,7 @@ struct smb_version_operations {
                             struct cifs_tcon *tcon,
                             struct cifs_sb_info *cifs_sb,
                             const char *full_path,
-                            char **target_path,
-                            struct kvec *rsp_iov);
+                            char **target_path);
        /* open a file for non-posix mounts */
        int (*open)(const unsigned int xid, struct cifs_open_parms *oparms, __u32 *oplock,
                    void *buf);
@@ -551,6 +556,9 @@ struct smb_version_operations {
        bool (*is_status_io_timeout)(char *buf);
        /* Check for STATUS_NETWORK_NAME_DELETED */
        bool (*is_network_name_deleted)(char *buf, struct TCP_Server_Info *srv);
+       int (*parse_reparse_point)(struct cifs_sb_info *cifs_sb,
+                                  struct kvec *rsp_iov,
+                                  struct cifs_open_info_data *data);
 };
 
 struct smb_version_values {
index a75220d..83ccc51 100644 (file)
@@ -1356,7 +1356,7 @@ typedef struct smb_com_transaction_ioctl_rsp {
        __le32 DataDisplacement;
        __u8 SetupCount;        /* 1 */
        __le16 ReturnedDataLen;
-       __u16 ByteCount;
+       __le16 ByteCount;
 } __attribute__((packed)) TRANSACT_IOCTL_RSP;
 
 #define CIFS_ACL_OWNER 1
@@ -1509,7 +1509,7 @@ struct reparse_posix_data {
        __le16  ReparseDataLength;
        __u16   Reserved;
        __le64  InodeType; /* LNK, FIFO, CHR etc. */
-       char    PathBuffer[];
+       __u8    DataBuffer[];
 } __attribute__((packed));
 
 struct cifs_quota_data {
index d87e2c2..46feaa0 100644 (file)
@@ -210,7 +210,7 @@ int cifs_get_inode_info(struct inode **inode, const char *full_path,
                        const struct cifs_fid *fid);
 bool cifs_reparse_point_to_fattr(struct cifs_sb_info *cifs_sb,
                                 struct cifs_fattr *fattr,
-                                u32 tag);
+                                struct cifs_open_info_data *data);
 extern int smb311_posix_get_inode_info(struct inode **pinode, const char *search_path,
                        struct super_block *sb, unsigned int xid);
 extern int cifs_get_inode_info_unix(struct inode **pinode,
@@ -458,6 +458,12 @@ extern int CIFSSMBUnixQuerySymLink(const unsigned int xid,
                        struct cifs_tcon *tcon,
                        const unsigned char *searchName, char **syminfo,
                        const struct nls_table *nls_codepage, int remap);
+extern int cifs_query_reparse_point(const unsigned int xid,
+                                   struct cifs_tcon *tcon,
+                                   struct cifs_sb_info *cifs_sb,
+                                   const char *full_path,
+                                   u32 *tag, struct kvec *rsp,
+                                   int *rsp_buftype);
 extern int CIFSSMBQuerySymLink(const unsigned int xid, struct cifs_tcon *tcon,
                               __u16 fid, char **symlinkinfo,
                               const struct nls_table *nls_codepage);
@@ -659,6 +665,12 @@ void cifs_put_tcp_super(struct super_block *sb);
 int cifs_update_super_prepath(struct cifs_sb_info *cifs_sb, char *prefix);
 char *extract_hostname(const char *unc);
 char *extract_sharename(const char *unc);
+int parse_reparse_point(struct reparse_data_buffer *buf,
+                       u32 plen, struct cifs_sb_info *cifs_sb,
+                       bool unicode, struct cifs_open_info_data *data);
+int cifs_sfu_make_node(unsigned int xid, struct inode *inode,
+                      struct dentry *dentry, struct cifs_tcon *tcon,
+                      const char *full_path, umode_t mode, dev_t dev);
 
 #ifdef CONFIG_CIFS_DFS_UPCALL
 static inline int get_dfs_path(const unsigned int xid, struct cifs_ses *ses,
index 25503f1..bad91ba 100644 (file)
@@ -2690,136 +2690,97 @@ querySymLinkRetry:
        return rc;
 }
 
-/*
- *     Recent Windows versions now create symlinks more frequently
- *     and they use the "reparse point" mechanism below.  We can of course
- *     do symlinks nicely to Samba and other servers which support the
- *     CIFS Unix Extensions and we can also do SFU symlinks and "client only"
- *     "MF" symlinks optionally, but for recent Windows we really need to
- *     reenable the code below and fix the cifs_symlink callers to handle this.
- *     In the interim this code has been moved to its own config option so
- *     it is not compiled in by default until callers fixed up and more tested.
- */
-int
-CIFSSMBQuerySymLink(const unsigned int xid, struct cifs_tcon *tcon,
-                   __u16 fid, char **symlinkinfo,
-                   const struct nls_table *nls_codepage)
+int cifs_query_reparse_point(const unsigned int xid,
+                            struct cifs_tcon *tcon,
+                            struct cifs_sb_info *cifs_sb,
+                            const char *full_path,
+                            u32 *tag, struct kvec *rsp,
+                            int *rsp_buftype)
 {
-       int rc = 0;
-       int bytes_returned;
-       struct smb_com_transaction_ioctl_req *pSMB;
-       struct smb_com_transaction_ioctl_rsp *pSMBr;
-       bool is_unicode;
-       unsigned int sub_len;
-       char *sub_start;
-       struct reparse_symlink_data *reparse_buf;
-       struct reparse_posix_data *posix_buf;
+       struct cifs_open_parms oparms;
+       TRANSACT_IOCTL_REQ *io_req = NULL;
+       TRANSACT_IOCTL_RSP *io_rsp = NULL;
+       struct cifs_fid fid;
        __u32 data_offset, data_count;
-       char *end_of_smb;
+       __u8 *start, *end;
+       int io_rsp_len;
+       int oplock = 0;
+       int rc;
 
-       cifs_dbg(FYI, "In Windows reparse style QueryLink for fid %u\n", fid);
-       rc = smb_init(SMB_COM_NT_TRANSACT, 23, tcon, (void **) &pSMB,
-                     (void **) &pSMBr);
+       cifs_tcon_dbg(FYI, "%s: path=%s\n", __func__, full_path);
+
+       if (cap_unix(tcon->ses))
+               return -EOPNOTSUPP;
+
+       oparms = (struct cifs_open_parms) {
+               .tcon = tcon,
+               .cifs_sb = cifs_sb,
+               .desired_access = FILE_READ_ATTRIBUTES,
+               .create_options = cifs_create_options(cifs_sb,
+                                                     OPEN_REPARSE_POINT),
+               .disposition = FILE_OPEN,
+               .path = full_path,
+               .fid = &fid,
+       };
+
+       rc = CIFS_open(xid, &oparms, &oplock, NULL);
        if (rc)
                return rc;
 
-       pSMB->TotalParameterCount = 0 ;
-       pSMB->TotalDataCount = 0;
-       pSMB->MaxParameterCount = cpu_to_le32(2);
-       /* BB find exact data count max from sess structure BB */
-       pSMB->MaxDataCount = cpu_to_le32(CIFSMaxBufSize & 0xFFFFFF00);
-       pSMB->MaxSetupCount = 4;
-       pSMB->Reserved = 0;
-       pSMB->ParameterOffset = 0;
-       pSMB->DataCount = 0;
-       pSMB->DataOffset = 0;
-       pSMB->SetupCount = 4;
-       pSMB->SubCommand = cpu_to_le16(NT_TRANSACT_IOCTL);
-       pSMB->ParameterCount = pSMB->TotalParameterCount;
-       pSMB->FunctionCode = cpu_to_le32(FSCTL_GET_REPARSE_POINT);
-       pSMB->IsFsctl = 1; /* FSCTL */
-       pSMB->IsRootFlag = 0;
-       pSMB->Fid = fid; /* file handle always le */
-       pSMB->ByteCount = 0;
+       rc = smb_init(SMB_COM_NT_TRANSACT, 23, tcon,
+                     (void **)&io_req, (void **)&io_rsp);
+       if (rc)
+               goto error;
 
-       rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
-                        (struct smb_hdr *) pSMBr, &bytes_returned, 0);
-       if (rc) {
-               cifs_dbg(FYI, "Send error in QueryReparseLinkInfo = %d\n", rc);
-               goto qreparse_out;
-       }
+       io_req->TotalParameterCount = 0;
+       io_req->TotalDataCount = 0;
+       io_req->MaxParameterCount = cpu_to_le32(2);
+       /* BB find exact data count max from sess structure BB */
+       io_req->MaxDataCount = cpu_to_le32(CIFSMaxBufSize & 0xFFFFFF00);
+       io_req->MaxSetupCount = 4;
+       io_req->Reserved = 0;
+       io_req->ParameterOffset = 0;
+       io_req->DataCount = 0;
+       io_req->DataOffset = 0;
+       io_req->SetupCount = 4;
+       io_req->SubCommand = cpu_to_le16(NT_TRANSACT_IOCTL);
+       io_req->ParameterCount = io_req->TotalParameterCount;
+       io_req->FunctionCode = cpu_to_le32(FSCTL_GET_REPARSE_POINT);
+       io_req->IsFsctl = 1;
+       io_req->IsRootFlag = 0;
+       io_req->Fid = fid.netfid;
+       io_req->ByteCount = 0;
+
+       rc = SendReceive(xid, tcon->ses, (struct smb_hdr *)io_req,
+                        (struct smb_hdr *)io_rsp, &io_rsp_len, 0);
+       if (rc)
+               goto error;
 
-       data_offset = le32_to_cpu(pSMBr->DataOffset);
-       data_count = le32_to_cpu(pSMBr->DataCount);
-       if (get_bcc(&pSMBr->hdr) < 2 || data_offset > 512) {
-               /* BB also check enough total bytes returned */
-               rc = -EIO;      /* bad smb */
-               goto qreparse_out;
-       }
-       if (!data_count || (data_count > 2048)) {
+       data_offset = le32_to_cpu(io_rsp->DataOffset);
+       data_count = le32_to_cpu(io_rsp->DataCount);
+       if (get_bcc(&io_rsp->hdr) < 2 || data_offset > 512 ||
+           !data_count || data_count > 2048) {
                rc = -EIO;
-               cifs_dbg(FYI, "Invalid return data count on get reparse info ioctl\n");
-               goto qreparse_out;
-       }
-       end_of_smb = 2 + get_bcc(&pSMBr->hdr) + (char *)&pSMBr->ByteCount;
-       reparse_buf = (struct reparse_symlink_data *)
-                               ((char *)&pSMBr->hdr.Protocol + data_offset);
-       if ((char *)reparse_buf >= end_of_smb) {
-               rc = -EIO;
-               goto qreparse_out;
-       }
-       if (reparse_buf->ReparseTag == cpu_to_le32(IO_REPARSE_TAG_NFS)) {
-               cifs_dbg(FYI, "NFS style reparse tag\n");
-               posix_buf =  (struct reparse_posix_data *)reparse_buf;
-
-               if (posix_buf->InodeType != cpu_to_le64(NFS_SPECFILE_LNK)) {
-                       cifs_dbg(FYI, "unsupported file type 0x%llx\n",
-                                le64_to_cpu(posix_buf->InodeType));
-                       rc = -EOPNOTSUPP;
-                       goto qreparse_out;
-               }
-               is_unicode = true;
-               sub_len = le16_to_cpu(reparse_buf->ReparseDataLength);
-               if (posix_buf->PathBuffer + sub_len > end_of_smb) {
-                       cifs_dbg(FYI, "reparse buf beyond SMB\n");
-                       rc = -EIO;
-                       goto qreparse_out;
-               }
-               *symlinkinfo = cifs_strndup_from_utf16(posix_buf->PathBuffer,
-                               sub_len, is_unicode, nls_codepage);
-               goto qreparse_out;
-       } else if (reparse_buf->ReparseTag !=
-                       cpu_to_le32(IO_REPARSE_TAG_SYMLINK)) {
-               rc = -EOPNOTSUPP;
-               goto qreparse_out;
+               goto error;
        }
 
-       /* Reparse tag is NTFS symlink */
-       sub_start = le16_to_cpu(reparse_buf->SubstituteNameOffset) +
-                               reparse_buf->PathBuffer;
-       sub_len = le16_to_cpu(reparse_buf->SubstituteNameLength);
-       if (sub_start + sub_len > end_of_smb) {
-               cifs_dbg(FYI, "reparse buf beyond SMB\n");
+       end = 2 + get_bcc(&io_rsp->hdr) + (__u8 *)&io_rsp->ByteCount;
+       start = (__u8 *)&io_rsp->hdr.Protocol + data_offset;
+       if (start >= end) {
                rc = -EIO;
-               goto qreparse_out;
+               goto error;
        }
-       if (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE)
-               is_unicode = true;
-       else
-               is_unicode = false;
-
-       /* BB FIXME investigate remapping reserved chars here */
-       *symlinkinfo = cifs_strndup_from_utf16(sub_start, sub_len, is_unicode,
-                                              nls_codepage);
-       if (!*symlinkinfo)
-               rc = -ENOMEM;
-qreparse_out:
-       cifs_buf_release(pSMB);
 
-       /*
-        * Note: On -EAGAIN error only caller can retry on handle based calls
-        * since file handle passed in no longer valid.
-        */
+       *tag = le32_to_cpu(((struct reparse_data_buffer *)start)->ReparseTag);
+       rsp->iov_base = io_rsp;
+       rsp->iov_len = io_rsp_len;
+       *rsp_buftype = CIFS_LARGE_BUFFER;
+       CIFSSMBClose(xid, tcon, fid.netfid);
+       return 0;
+
+error:
+       cifs_buf_release(io_req);
+       CIFSSMBClose(xid, tcon, fid.netfid);
        return rc;
 }
 
index 57c2a7d..f896f60 100644 (file)
@@ -2065,6 +2065,12 @@ void __cifs_put_smb_ses(struct cifs_ses *ses)
                ses->chans[i].server = NULL;
        }
 
+       /* we now account for primary channel in iface->refcount */
+       if (ses->chans[0].iface) {
+               kref_put(&ses->chans[0].iface->refcount, release_iface);
+               ses->chans[0].server = NULL;
+       }
+
        sesInfoFree(ses);
        cifs_put_tcp_session(server, 0);
 }
index 86fbd3f..47f49be 100644 (file)
@@ -459,8 +459,7 @@ static int cifs_get_unix_fattr(const unsigned char *full_path,
                        return -EOPNOTSUPP;
                rc = server->ops->query_symlink(xid, tcon,
                                                cifs_sb, full_path,
-                                               &fattr->cf_symlink_target,
-                                               NULL);
+                                               &fattr->cf_symlink_target);
                cifs_dbg(FYI, "%s: query_symlink: %d\n", __func__, rc);
        }
        return rc;
@@ -722,10 +721,51 @@ static void smb311_posix_info_to_fattr(struct cifs_fattr *fattr,
                fattr->cf_mode, fattr->cf_uniqueid, fattr->cf_nlink);
 }
 
+static inline dev_t nfs_mkdev(struct reparse_posix_data *buf)
+{
+       u64 v = le64_to_cpu(*(__le64 *)buf->DataBuffer);
+
+       return MKDEV(v >> 32, v & 0xffffffff);
+}
+
 bool cifs_reparse_point_to_fattr(struct cifs_sb_info *cifs_sb,
                                 struct cifs_fattr *fattr,
-                                u32 tag)
+                                struct cifs_open_info_data *data)
 {
+       struct reparse_posix_data *buf = data->reparse.posix;
+       u32 tag = data->reparse.tag;
+
+       if (tag == IO_REPARSE_TAG_NFS && buf) {
+               switch (le64_to_cpu(buf->InodeType)) {
+               case NFS_SPECFILE_CHR:
+                       fattr->cf_mode |= S_IFCHR | cifs_sb->ctx->file_mode;
+                       fattr->cf_dtype = DT_CHR;
+                       fattr->cf_rdev = nfs_mkdev(buf);
+                       break;
+               case NFS_SPECFILE_BLK:
+                       fattr->cf_mode |= S_IFBLK | cifs_sb->ctx->file_mode;
+                       fattr->cf_dtype = DT_BLK;
+                       fattr->cf_rdev = nfs_mkdev(buf);
+                       break;
+               case NFS_SPECFILE_FIFO:
+                       fattr->cf_mode |= S_IFIFO | cifs_sb->ctx->file_mode;
+                       fattr->cf_dtype = DT_FIFO;
+                       break;
+               case NFS_SPECFILE_SOCK:
+                       fattr->cf_mode |= S_IFSOCK | cifs_sb->ctx->file_mode;
+                       fattr->cf_dtype = DT_SOCK;
+                       break;
+               case NFS_SPECFILE_LNK:
+                       fattr->cf_mode = S_IFLNK | cifs_sb->ctx->file_mode;
+                       fattr->cf_dtype = DT_LNK;
+                       break;
+               default:
+                       WARN_ON_ONCE(1);
+                       return false;
+               }
+               return true;
+       }
+
        switch (tag) {
        case IO_REPARSE_TAG_LX_SYMLINK:
                fattr->cf_mode |= S_IFLNK | cifs_sb->ctx->file_mode;
@@ -791,7 +831,7 @@ static void cifs_open_info_to_fattr(struct cifs_fattr *fattr,
        fattr->cf_nlink = le32_to_cpu(info->NumberOfLinks);
 
        if (cifs_open_data_reparse(data) &&
-           cifs_reparse_point_to_fattr(cifs_sb, fattr, data->reparse_tag))
+           cifs_reparse_point_to_fattr(cifs_sb, fattr, data))
                goto out_reparse;
 
        if (fattr->cf_cifsattrs & ATTR_DIRECTORY) {
@@ -856,7 +896,7 @@ cifs_get_file_info(struct file *filp)
                data.adjust_tz = false;
                if (data.symlink_target) {
                        data.symlink = true;
-                       data.reparse_tag = IO_REPARSE_TAG_SYMLINK;
+                       data.reparse.tag = IO_REPARSE_TAG_SYMLINK;
                }
                cifs_open_info_to_fattr(&fattr, &data, inode->i_sb);
                break;
@@ -1025,7 +1065,7 @@ static int reparse_info_to_fattr(struct cifs_open_info_data *data,
        struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
        struct kvec rsp_iov, *iov = NULL;
        int rsp_buftype = CIFS_NO_BUFFER;
-       u32 tag = data->reparse_tag;
+       u32 tag = data->reparse.tag;
        int rc = 0;
 
        if (!tag && server->ops->query_reparse_point) {
@@ -1035,22 +1075,28 @@ static int reparse_info_to_fattr(struct cifs_open_info_data *data,
                if (!rc)
                        iov = &rsp_iov;
        }
-       switch ((data->reparse_tag = tag)) {
+
+       rc = -EOPNOTSUPP;
+       switch ((data->reparse.tag = tag)) {
        case 0: /* SMB1 symlink */
-               iov = NULL;
-               fallthrough;
-       case IO_REPARSE_TAG_NFS:
-       case IO_REPARSE_TAG_SYMLINK:
-               if (!data->symlink_target && server->ops->query_symlink) {
+               if (server->ops->query_symlink) {
                        rc = server->ops->query_symlink(xid, tcon,
                                                        cifs_sb, full_path,
-                                                       &data->symlink_target,
-                                                       iov);
+                                                       &data->symlink_target);
                }
                break;
        case IO_REPARSE_TAG_MOUNT_POINT:
                cifs_create_junction_fattr(fattr, sb);
+               rc = 0;
                goto out;
+       default:
+               if (data->symlink_target) {
+                       rc = 0;
+               } else if (server->ops->parse_reparse_point) {
+                       rc = server->ops->parse_reparse_point(cifs_sb,
+                                                             iov, data);
+               }
+               break;
        }
 
        cifs_open_info_to_fattr(fattr, data, sb);
index 47fc22d..d30ea20 100644 (file)
@@ -153,6 +153,10 @@ static bool reparse_file_needs_reval(const struct cifs_fattr *fattr)
 static void
 cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb)
 {
+       struct cifs_open_info_data data = {
+               .reparse = { .tag = fattr->cf_cifstag, },
+       };
+
        fattr->cf_uid = cifs_sb->ctx->linux_uid;
        fattr->cf_gid = cifs_sb->ctx->linux_gid;
 
@@ -165,7 +169,7 @@ cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb)
         * reasonably map some of them to directories vs. files vs. symlinks
         */
        if ((fattr->cf_cifsattrs & ATTR_REPARSE) &&
-           cifs_reparse_point_to_fattr(cifs_sb, fattr, fattr->cf_cifstag))
+           cifs_reparse_point_to_fattr(cifs_sb, fattr, &data))
                goto out_reparse;
 
        if (fattr->cf_cifsattrs & ATTR_DIRECTORY) {
index 0bb2ac9..816e01c 100644 (file)
@@ -322,28 +322,32 @@ cifs_disable_secondary_channels(struct cifs_ses *ses)
                iface = ses->chans[i].iface;
                server = ses->chans[i].server;
 
+               /*
+                * remove these references first, since we need to unlock
+                * the chan_lock here, since iface_lock is a higher lock
+                */
+               ses->chans[i].iface = NULL;
+               ses->chans[i].server = NULL;
+               spin_unlock(&ses->chan_lock);
+
                if (iface) {
                        spin_lock(&ses->iface_lock);
-                       kref_put(&iface->refcount, release_iface);
-                       ses->chans[i].iface = NULL;
                        iface->num_channels--;
                        if (iface->weight_fulfilled)
                                iface->weight_fulfilled--;
+                       kref_put(&iface->refcount, release_iface);
                        spin_unlock(&ses->iface_lock);
                }
 
-               spin_unlock(&ses->chan_lock);
-               if (server && !server->terminate) {
-                       server->terminate = true;
-                       cifs_signal_cifsd_for_reconnect(server, false);
-               }
-               spin_lock(&ses->chan_lock);
-
                if (server) {
-                       ses->chans[i].server = NULL;
+                       if (!server->terminate) {
+                               server->terminate = true;
+                               cifs_signal_cifsd_for_reconnect(server, false);
+                       }
                        cifs_put_tcp_session(server, false);
                }
 
+               spin_lock(&ses->chan_lock);
        }
 
 done:
index 9bf8735..a9eaba8 100644 (file)
@@ -976,64 +976,37 @@ static int cifs_query_symlink(const unsigned int xid,
                              struct cifs_tcon *tcon,
                              struct cifs_sb_info *cifs_sb,
                              const char *full_path,
-                             char **target_path,
-                             struct kvec *rsp_iov)
+                             char **target_path)
 {
        int rc;
-       int oplock = 0;
-       bool is_reparse_point = !!rsp_iov;
-       struct cifs_fid fid;
-       struct cifs_open_parms oparms;
 
-       cifs_dbg(FYI, "%s: path: %s\n", __func__, full_path);
+       cifs_tcon_dbg(FYI, "%s: path=%s\n", __func__, full_path);
 
-       if (is_reparse_point) {
-               cifs_dbg(VFS, "reparse points not handled for SMB1 symlinks\n");
+       if (!cap_unix(tcon->ses))
                return -EOPNOTSUPP;
-       }
-
-       /* Check for unix extensions */
-       if (cap_unix(tcon->ses)) {
-               rc = CIFSSMBUnixQuerySymLink(xid, tcon, full_path, target_path,
-                                            cifs_sb->local_nls,
-                                            cifs_remap(cifs_sb));
-               if (rc == -EREMOTE)
-                       rc = cifs_unix_dfs_readlink(xid, tcon, full_path,
-                                                   target_path,
-                                                   cifs_sb->local_nls);
-
-               goto out;
-       }
-
-       oparms = (struct cifs_open_parms) {
-               .tcon = tcon,
-               .cifs_sb = cifs_sb,
-               .desired_access = FILE_READ_ATTRIBUTES,
-               .create_options = cifs_create_options(cifs_sb,
-                                                     OPEN_REPARSE_POINT),
-               .disposition = FILE_OPEN,
-               .path = full_path,
-               .fid = &fid,
-       };
-
-       rc = CIFS_open(xid, &oparms, &oplock, NULL);
-       if (rc)
-               goto out;
-
-       rc = CIFSSMBQuerySymLink(xid, tcon, fid.netfid, target_path,
-                                cifs_sb->local_nls);
-       if (rc)
-               goto out_close;
 
-       convert_delimiter(*target_path, '/');
-out_close:
-       CIFSSMBClose(xid, tcon, fid.netfid);
-out:
-       if (!rc)
-               cifs_dbg(FYI, "%s: target path: %s\n", __func__, *target_path);
+       rc = CIFSSMBUnixQuerySymLink(xid, tcon, full_path, target_path,
+                                    cifs_sb->local_nls, cifs_remap(cifs_sb));
+       if (rc == -EREMOTE)
+               rc = cifs_unix_dfs_readlink(xid, tcon, full_path,
+                                           target_path, cifs_sb->local_nls);
        return rc;
 }
 
+static int cifs_parse_reparse_point(struct cifs_sb_info *cifs_sb,
+                                   struct kvec *rsp_iov,
+                                   struct cifs_open_info_data *data)
+{
+       struct reparse_data_buffer *buf;
+       TRANSACT_IOCTL_RSP *io = rsp_iov->iov_base;
+       bool unicode = !!(io->hdr.Flags2 & SMBFLG2_UNICODE);
+       u32 plen = le16_to_cpu(io->ByteCount);
+
+       buf = (struct reparse_data_buffer *)((__u8 *)&io->hdr.Protocol +
+                                            le32_to_cpu(io->DataOffset));
+       return parse_reparse_point(buf, plen, cifs_sb, unicode, data);
+}
+
 static bool
 cifs_is_read_op(__u32 oplock)
 {
@@ -1068,15 +1041,7 @@ cifs_make_node(unsigned int xid, struct inode *inode,
 {
        struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
        struct inode *newinode = NULL;
-       int rc = -EPERM;
-       struct cifs_open_info_data buf = {};
-       struct cifs_io_parms io_parms;
-       __u32 oplock = 0;
-       struct cifs_fid fid;
-       struct cifs_open_parms oparms;
-       unsigned int bytes_written;
-       struct win_dev *pdev;
-       struct kvec iov[2];
+       int rc;
 
        if (tcon->unix_ext) {
                /*
@@ -1110,74 +1075,18 @@ cifs_make_node(unsigned int xid, struct inode *inode,
                        d_instantiate(dentry, newinode);
                return rc;
        }
-
        /*
-        * SMB1 SFU emulation: should work with all servers, but only
-        * support block and char device (no socket & fifo)
+        * Check if mounted with mount parm 'sfu' mount parm.
+        * SFU emulation should work with all servers, but only
+        * supports block and char device (no socket & fifo),
+        * and was used by default in earlier versions of Windows
         */
        if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL))
-               return rc;
-
-       if (!S_ISCHR(mode) && !S_ISBLK(mode))
-               return rc;
-
-       cifs_dbg(FYI, "sfu compat create special file\n");
-
-       oparms = (struct cifs_open_parms) {
-               .tcon = tcon,
-               .cifs_sb = cifs_sb,
-               .desired_access = GENERIC_WRITE,
-               .create_options = cifs_create_options(cifs_sb, CREATE_NOT_DIR |
-                                                     CREATE_OPTION_SPECIAL),
-               .disposition = FILE_CREATE,
-               .path = full_path,
-               .fid = &fid,
-       };
-
-       if (tcon->ses->server->oplocks)
-               oplock = REQ_OPLOCK;
-       else
-               oplock = 0;
-       rc = tcon->ses->server->ops->open(xid, &oparms, &oplock, &buf);
-       if (rc)
-               return rc;
-
-       /*
-        * BB Do not bother to decode buf since no local inode yet to put
-        * timestamps in, but we can reuse it safely.
-        */
-
-       pdev = (struct win_dev *)&buf.fi;
-       io_parms.pid = current->tgid;
-       io_parms.tcon = tcon;
-       io_parms.offset = 0;
-       io_parms.length = sizeof(struct win_dev);
-       iov[1].iov_base = &buf.fi;
-       iov[1].iov_len = sizeof(struct win_dev);
-       if (S_ISCHR(mode)) {
-               memcpy(pdev->type, "IntxCHR", 8);
-               pdev->major = cpu_to_le64(MAJOR(dev));
-               pdev->minor = cpu_to_le64(MINOR(dev));
-               rc = tcon->ses->server->ops->sync_write(xid, &fid, &io_parms,
-                                                       &bytes_written, iov, 1);
-       } else if (S_ISBLK(mode)) {
-               memcpy(pdev->type, "IntxBLK", 8);
-               pdev->major = cpu_to_le64(MAJOR(dev));
-               pdev->minor = cpu_to_le64(MINOR(dev));
-               rc = tcon->ses->server->ops->sync_write(xid, &fid, &io_parms,
-                                                       &bytes_written, iov, 1);
-       }
-       tcon->ses->server->ops->close(xid, tcon, &fid);
-       d_drop(dentry);
-
-       /* FIXME: add code here to set EAs */
-
-       cifs_free_open_info(&buf);
-       return rc;
+               return -EPERM;
+       return cifs_sfu_make_node(xid, inode, dentry, tcon,
+                                 full_path, mode, dev);
 }
 
-
-
 struct smb_version_operations smb1_operations = {
        .send_cancel = send_nt_cancel,
        .compare_fids = cifs_compare_fids,
@@ -1214,6 +1123,7 @@ struct smb_version_operations smb1_operations = {
        .is_path_accessible = cifs_is_path_accessible,
        .can_echo = cifs_can_echo,
        .query_path_info = cifs_query_path_info,
+       .query_reparse_point = cifs_query_reparse_point,
        .query_file_info = cifs_query_file_info,
        .get_srv_inum = cifs_get_srv_inum,
        .set_path_size = CIFSSMBSetEOF,
@@ -1229,6 +1139,7 @@ struct smb_version_operations smb1_operations = {
        .rename = CIFSSMBRename,
        .create_hardlink = CIFSCreateHardLink,
        .query_symlink = cifs_query_symlink,
+       .parse_reparse_point = cifs_parse_reparse_point,
        .open = cifs_open_file,
        .set_fid = cifs_set_fid,
        .close = cifs_close_file,
index 0b89f70..c94940a 100644 (file)
@@ -555,7 +555,7 @@ static int parse_create_response(struct cifs_open_info_data *data,
                break;
        }
        data->reparse_point = reparse_point;
-       data->reparse_tag = tag;
+       data->reparse.tag = tag;
        return rc;
 }
 
index a959ed2..82ab62f 100644 (file)
@@ -2866,115 +2866,119 @@ smb2_get_dfs_refer(const unsigned int xid, struct cifs_ses *ses,
        return rc;
 }
 
-static int
-parse_reparse_posix(struct reparse_posix_data *symlink_buf,
-                     u32 plen, char **target_path,
-                     struct cifs_sb_info *cifs_sb)
+/* See MS-FSCC 2.1.2.6 for the 'NFS' style reparse tags */
+static int parse_reparse_posix(struct reparse_posix_data *buf,
+                              struct cifs_sb_info *cifs_sb,
+                              struct cifs_open_info_data *data)
 {
        unsigned int len;
-
-       /* See MS-FSCC 2.1.2.6 for the 'NFS' style reparse tags */
-       len = le16_to_cpu(symlink_buf->ReparseDataLength);
-
-       if (le64_to_cpu(symlink_buf->InodeType) != NFS_SPECFILE_LNK) {
-               cifs_dbg(VFS, "%lld not a supported symlink type\n",
-                       le64_to_cpu(symlink_buf->InodeType));
+       u64 type;
+
+       switch ((type = le64_to_cpu(buf->InodeType))) {
+       case NFS_SPECFILE_LNK:
+               len = le16_to_cpu(buf->ReparseDataLength);
+               data->symlink_target = cifs_strndup_from_utf16(buf->DataBuffer,
+                                                              len, true,
+                                                              cifs_sb->local_nls);
+               if (!data->symlink_target)
+                       return -ENOMEM;
+               convert_delimiter(data->symlink_target, '/');
+               cifs_dbg(FYI, "%s: target path: %s\n",
+                        __func__, data->symlink_target);
+               break;
+       case NFS_SPECFILE_CHR:
+       case NFS_SPECFILE_BLK:
+       case NFS_SPECFILE_FIFO:
+       case NFS_SPECFILE_SOCK:
+               break;
+       default:
+               cifs_dbg(VFS, "%s: unhandled inode type: 0x%llx\n",
+                        __func__, type);
                return -EOPNOTSUPP;
        }
-
-       *target_path = cifs_strndup_from_utf16(
-                               symlink_buf->PathBuffer,
-                               len, true, cifs_sb->local_nls);
-       if (!(*target_path))
-               return -ENOMEM;
-
-       convert_delimiter(*target_path, '/');
-       cifs_dbg(FYI, "%s: target path: %s\n", __func__, *target_path);
-
        return 0;
 }
 
-static int
-parse_reparse_symlink(struct reparse_symlink_data_buffer *symlink_buf,
-                     u32 plen, char **target_path,
-                     struct cifs_sb_info *cifs_sb)
+static int parse_reparse_symlink(struct reparse_symlink_data_buffer *sym,
+                                u32 plen, bool unicode,
+                                struct cifs_sb_info *cifs_sb,
+                                struct cifs_open_info_data *data)
 {
-       unsigned int sub_len;
-       unsigned int sub_offset;
+       unsigned int len;
+       unsigned int offs;
 
        /* We handle Symbolic Link reparse tag here. See: MS-FSCC 2.1.2.4 */
 
-       sub_offset = le16_to_cpu(symlink_buf->SubstituteNameOffset);
-       sub_len = le16_to_cpu(symlink_buf->SubstituteNameLength);
-       if (sub_offset + 20 > plen ||
-           sub_offset + sub_len + 20 > plen) {
+       offs = le16_to_cpu(sym->SubstituteNameOffset);
+       len = le16_to_cpu(sym->SubstituteNameLength);
+       if (offs + 20 > plen || offs + len + 20 > plen) {
                cifs_dbg(VFS, "srv returned malformed symlink buffer\n");
                return -EIO;
        }
 
-       *target_path = cifs_strndup_from_utf16(
-                               symlink_buf->PathBuffer + sub_offset,
-                               sub_len, true, cifs_sb->local_nls);
-       if (!(*target_path))
+       data->symlink_target = cifs_strndup_from_utf16(sym->PathBuffer + offs,
+                                                      len, unicode,
+                                                      cifs_sb->local_nls);
+       if (!data->symlink_target)
                return -ENOMEM;
 
-       convert_delimiter(*target_path, '/');
-       cifs_dbg(FYI, "%s: target path: %s\n", __func__, *target_path);
+       convert_delimiter(data->symlink_target, '/');
+       cifs_dbg(FYI, "%s: target path: %s\n", __func__, data->symlink_target);
 
        return 0;
 }
 
-static int
-parse_reparse_point(struct reparse_data_buffer *buf,
-                   u32 plen, char **target_path,
-                   struct cifs_sb_info *cifs_sb)
+int parse_reparse_point(struct reparse_data_buffer *buf,
+                       u32 plen, struct cifs_sb_info *cifs_sb,
+                       bool unicode, struct cifs_open_info_data *data)
 {
-       if (plen < sizeof(struct reparse_data_buffer)) {
-               cifs_dbg(VFS, "reparse buffer is too small. Must be at least 8 bytes but was %d\n",
-                        plen);
+       if (plen < sizeof(*buf)) {
+               cifs_dbg(VFS, "%s: reparse buffer is too small. Must be at least 8 bytes but was %d\n",
+                        __func__, plen);
                return -EIO;
        }
 
-       if (plen < le16_to_cpu(buf->ReparseDataLength) +
-           sizeof(struct reparse_data_buffer)) {
-               cifs_dbg(VFS, "srv returned invalid reparse buf length: %d\n",
-                        plen);
+       if (plen < le16_to_cpu(buf->ReparseDataLength) + sizeof(*buf)) {
+               cifs_dbg(VFS, "%s: invalid reparse buf length: %d\n",
+                        __func__, plen);
                return -EIO;
        }
 
+       data->reparse.buf = buf;
+
        /* See MS-FSCC 2.1.2 */
        switch (le32_to_cpu(buf->ReparseTag)) {
        case IO_REPARSE_TAG_NFS:
-               return parse_reparse_posix(
-                       (struct reparse_posix_data *)buf,
-                       plen, target_path, cifs_sb);
+               return parse_reparse_posix((struct reparse_posix_data *)buf,
+                                          cifs_sb, data);
        case IO_REPARSE_TAG_SYMLINK:
                return parse_reparse_symlink(
                        (struct reparse_symlink_data_buffer *)buf,
-                       plen, target_path, cifs_sb);
+                       plen, unicode, cifs_sb, data);
+       case IO_REPARSE_TAG_LX_SYMLINK:
+       case IO_REPARSE_TAG_AF_UNIX:
+       case IO_REPARSE_TAG_LX_FIFO:
+       case IO_REPARSE_TAG_LX_CHR:
+       case IO_REPARSE_TAG_LX_BLK:
+               return 0;
        default:
-               cifs_dbg(VFS, "srv returned unknown symlink buffer tag:0x%08x\n",
-                        le32_to_cpu(buf->ReparseTag));
+               cifs_dbg(VFS, "%s: unhandled reparse tag: 0x%08x\n",
+                        __func__, le32_to_cpu(buf->ReparseTag));
                return -EOPNOTSUPP;
        }
 }
 
-static int smb2_query_symlink(const unsigned int xid,
-                             struct cifs_tcon *tcon,
-                             struct cifs_sb_info *cifs_sb,
-                             const char *full_path,
-                             char **target_path,
-                             struct kvec *rsp_iov)
+static int smb2_parse_reparse_point(struct cifs_sb_info *cifs_sb,
+                                   struct kvec *rsp_iov,
+                                   struct cifs_open_info_data *data)
 {
        struct reparse_data_buffer *buf;
        struct smb2_ioctl_rsp *io = rsp_iov->iov_base;
        u32 plen = le32_to_cpu(io->OutputCount);
 
-       cifs_dbg(FYI, "%s: path: %s\n", __func__, full_path);
-
        buf = (struct reparse_data_buffer *)((u8 *)io +
                                             le32_to_cpu(io->OutputOffset));
-       return parse_reparse_point(buf, plen, target_path, cifs_sb);
+       return parse_reparse_point(buf, plen, cifs_sb, true, data);
 }
 
 static int smb2_query_reparse_point(const unsigned int xid,
@@ -5064,41 +5068,24 @@ smb2_next_header(char *buf)
        return le32_to_cpu(hdr->NextCommand);
 }
 
-static int
-smb2_make_node(unsigned int xid, struct inode *inode,
-              struct dentry *dentry, struct cifs_tcon *tcon,
-              const char *full_path, umode_t mode, dev_t dev)
+int cifs_sfu_make_node(unsigned int xid, struct inode *inode,
+                      struct dentry *dentry, struct cifs_tcon *tcon,
+                      const char *full_path, umode_t mode, dev_t dev)
 {
-       struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
-       int rc = -EPERM;
        struct cifs_open_info_data buf = {};
-       struct cifs_io_parms io_parms = {0};
-       __u32 oplock = 0;
-       struct cifs_fid fid;
+       struct TCP_Server_Info *server = tcon->ses->server;
        struct cifs_open_parms oparms;
+       struct cifs_io_parms io_parms = {};
+       struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+       struct cifs_fid fid;
        unsigned int bytes_written;
        struct win_dev *pdev;
        struct kvec iov[2];
-
-       /*
-        * Check if mounted with mount parm 'sfu' mount parm.
-        * SFU emulation should work with all servers, but only
-        * supports block and char device (no socket & fifo),
-        * and was used by default in earlier versions of Windows
-        */
-       if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL))
-               return rc;
-
-       /*
-        * TODO: Add ability to create instead via reparse point. Windows (e.g.
-        * their current NFS server) uses this approach to expose special files
-        * over SMB2/SMB3 and Samba will do this with SMB3.1.1 POSIX Extensions
-        */
+       __u32 oplock = server->oplocks ? REQ_OPLOCK : 0;
+       int rc;
 
        if (!S_ISCHR(mode) && !S_ISBLK(mode) && !S_ISFIFO(mode))
-               return rc;
-
-       cifs_dbg(FYI, "sfu compat create special file\n");
+               return -EPERM;
 
        oparms = (struct cifs_open_parms) {
                .tcon = tcon,
@@ -5111,11 +5098,7 @@ smb2_make_node(unsigned int xid, struct inode *inode,
                .fid = &fid,
        };
 
-       if (tcon->ses->server->oplocks)
-               oplock = REQ_OPLOCK;
-       else
-               oplock = 0;
-       rc = tcon->ses->server->ops->open(xid, &oparms, &oplock, &buf);
+       rc = server->ops->open(xid, &oparms, &oplock, &buf);
        if (rc)
                return rc;
 
@@ -5123,42 +5106,56 @@ smb2_make_node(unsigned int xid, struct inode *inode,
         * BB Do not bother to decode buf since no local inode yet to put
         * timestamps in, but we can reuse it safely.
         */
-
        pdev = (struct win_dev *)&buf.fi;
        io_parms.pid = current->tgid;
        io_parms.tcon = tcon;
-       io_parms.offset = 0;
-       io_parms.length = sizeof(struct win_dev);
-       iov[1].iov_base = &buf.fi;
-       iov[1].iov_len = sizeof(struct win_dev);
+       io_parms.length = sizeof(*pdev);
+       iov[1].iov_base = pdev;
+       iov[1].iov_len = sizeof(*pdev);
        if (S_ISCHR(mode)) {
                memcpy(pdev->type, "IntxCHR", 8);
                pdev->major = cpu_to_le64(MAJOR(dev));
                pdev->minor = cpu_to_le64(MINOR(dev));
-               rc = tcon->ses->server->ops->sync_write(xid, &fid, &io_parms,
-                                                       &bytes_written, iov, 1);
        } else if (S_ISBLK(mode)) {
                memcpy(pdev->type, "IntxBLK", 8);
                pdev->major = cpu_to_le64(MAJOR(dev));
                pdev->minor = cpu_to_le64(MINOR(dev));
-               rc = tcon->ses->server->ops->sync_write(xid, &fid, &io_parms,
-                                                       &bytes_written, iov, 1);
        } else if (S_ISFIFO(mode)) {
                memcpy(pdev->type, "LnxFIFO", 8);
-               pdev->major = 0;
-               pdev->minor = 0;
-               rc = tcon->ses->server->ops->sync_write(xid, &fid, &io_parms,
-                                                       &bytes_written, iov, 1);
        }
-       tcon->ses->server->ops->close(xid, tcon, &fid);
-       d_drop(dentry);
 
+       rc = server->ops->sync_write(xid, &fid, &io_parms,
+                                    &bytes_written, iov, 1);
+       server->ops->close(xid, tcon, &fid);
+       d_drop(dentry);
        /* FIXME: add code here to set EAs */
-
        cifs_free_open_info(&buf);
        return rc;
 }
 
+static int smb2_make_node(unsigned int xid, struct inode *inode,
+                         struct dentry *dentry, struct cifs_tcon *tcon,
+                         const char *full_path, umode_t mode, dev_t dev)
+{
+       struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+
+       /*
+        * Check if mounted with mount parm 'sfu' mount parm.
+        * SFU emulation should work with all servers, but only
+        * supports block and char device (no socket & fifo),
+        * and was used by default in earlier versions of Windows
+        */
+       if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL))
+               return -EPERM;
+       /*
+        * TODO: Add ability to create instead via reparse point. Windows (e.g.
+        * their current NFS server) uses this approach to expose special files
+        * over SMB2/SMB3 and Samba will do this with SMB3.1.1 POSIX Extensions
+        */
+       return cifs_sfu_make_node(xid, inode, dentry, tcon,
+                                 full_path, mode, dev);
+}
+
 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
 struct smb_version_operations smb20_operations = {
        .compare_fids = smb2_compare_fids,
@@ -5209,7 +5206,7 @@ struct smb_version_operations smb20_operations = {
        .unlink = smb2_unlink,
        .rename = smb2_rename_path,
        .create_hardlink = smb2_create_hardlink,
-       .query_symlink = smb2_query_symlink,
+       .parse_reparse_point = smb2_parse_reparse_point,
        .query_mf_symlink = smb3_query_mf_symlink,
        .create_mf_symlink = smb3_create_mf_symlink,
        .open = smb2_open_file,
@@ -5311,7 +5308,7 @@ struct smb_version_operations smb21_operations = {
        .unlink = smb2_unlink,
        .rename = smb2_rename_path,
        .create_hardlink = smb2_create_hardlink,
-       .query_symlink = smb2_query_symlink,
+       .parse_reparse_point = smb2_parse_reparse_point,
        .query_mf_symlink = smb3_query_mf_symlink,
        .create_mf_symlink = smb3_create_mf_symlink,
        .open = smb2_open_file,
@@ -5416,7 +5413,7 @@ struct smb_version_operations smb30_operations = {
        .unlink = smb2_unlink,
        .rename = smb2_rename_path,
        .create_hardlink = smb2_create_hardlink,
-       .query_symlink = smb2_query_symlink,
+       .parse_reparse_point = smb2_parse_reparse_point,
        .query_mf_symlink = smb3_query_mf_symlink,
        .create_mf_symlink = smb3_create_mf_symlink,
        .open = smb2_open_file,
@@ -5530,7 +5527,7 @@ struct smb_version_operations smb311_operations = {
        .unlink = smb2_unlink,
        .rename = smb2_rename_path,
        .create_hardlink = smb2_create_hardlink,
-       .query_symlink = smb2_query_symlink,
+       .parse_reparse_point = smb2_parse_reparse_point,
        .query_mf_symlink = smb3_query_mf_symlink,
        .create_mf_symlink = smb3_create_mf_symlink,
        .open = smb2_open_file,
index 84ea673..5a3ca62 100644 (file)
@@ -458,6 +458,8 @@ generate_smb3signingkey(struct cifs_ses *ses,
                                  ptriplet->encryption.context,
                                  ses->smb3encryptionkey,
                                  SMB3_ENC_DEC_KEY_SIZE);
+               if (rc)
+                       return rc;
                rc = generate_key(ses, ptriplet->decryption.label,
                                  ptriplet->decryption.context,
                                  ses->smb3decryptionkey,
@@ -466,9 +468,6 @@ generate_smb3signingkey(struct cifs_ses *ses,
                        return rc;
        }
 
-       if (rc)
-               return rc;
-
 #ifdef CONFIG_CIFS_DEBUG_DUMP_KEYS
        cifs_dbg(VFS, "%s: dumping generated AES session keys\n", __func__);
        /*
index 24bb020..f721d26 100644 (file)
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -133,7 +133,8 @@ int vfs_getattr_nosec(const struct path *path, struct kstat *stat,
        idmap = mnt_idmap(path->mnt);
        if (inode->i_op->getattr)
                return inode->i_op->getattr(idmap, path, stat,
-                                           request_mask, query_flags);
+                                           request_mask,
+                                           query_flags | AT_GETATTR_NOSEC);
 
        generic_fillattr(idmap, request_mask, inode, stat);
        return 0;
@@ -166,6 +167,9 @@ int vfs_getattr(const struct path *path, struct kstat *stat,
 {
        int retval;
 
+       if (WARN_ON_ONCE(query_flags & AT_GETATTR_NOSEC))
+               return -EPERM;
+
        retval = security_inode_getattr(path);
        if (retval)
                return retval;
index f8a594a..0b90869 100644 (file)
 /*
  * eventfs_mutex protects the eventfs_inode (ei) dentry. Any access
  * to the ei->dentry must be done under this mutex and after checking
- * if ei->is_freed is not set. The ei->dentry is released under the
- * mutex at the same time ei->is_freed is set. If ei->is_freed is set
- * then the ei->dentry is invalid.
+ * if ei->is_freed is not set. When ei->is_freed is set, the dentry
+ * is on its way to being freed after the last dput() is made on it.
  */
 static DEFINE_MUTEX(eventfs_mutex);
 
 /*
  * The eventfs_inode (ei) itself is protected by SRCU. It is released from
  * its parent's list and will have is_freed set (under eventfs_mutex).
- * After the SRCU grace period is over, the ei may be freed.
+ * After the SRCU grace period is over and the last dput() is called
+ * the ei is freed.
  */
 DEFINE_STATIC_SRCU(eventfs_srcu);
 
@@ -95,7 +95,7 @@ static int eventfs_set_attr(struct mnt_idmap *idmap, struct dentry *dentry,
        if (!(dentry->d_inode->i_mode & S_IFDIR)) {
                if (!ei->entry_attrs) {
                        ei->entry_attrs = kzalloc(sizeof(*ei->entry_attrs) * ei->nr_entries,
-                                                 GFP_KERNEL);
+                                                 GFP_NOFS);
                        if (!ei->entry_attrs) {
                                ret = -ENOMEM;
                                goto out;
@@ -326,7 +326,8 @@ create_file_dentry(struct eventfs_inode *ei, int idx,
        struct eventfs_attr *attr = NULL;
        struct dentry **e_dentry = &ei->d_children[idx];
        struct dentry *dentry;
-       bool invalidate = false;
+
+       WARN_ON_ONCE(!inode_is_locked(parent->d_inode));
 
        mutex_lock(&eventfs_mutex);
        if (ei->is_freed) {
@@ -348,15 +349,8 @@ create_file_dentry(struct eventfs_inode *ei, int idx,
 
        mutex_unlock(&eventfs_mutex);
 
-       /* The lookup already has the parent->d_inode locked */
-       if (!lookup)
-               inode_lock(parent->d_inode);
-
        dentry = create_file(name, mode, attr, parent, data, fops);
 
-       if (!lookup)
-               inode_unlock(parent->d_inode);
-
        mutex_lock(&eventfs_mutex);
 
        if (IS_ERR_OR_NULL(dentry)) {
@@ -365,12 +359,14 @@ create_file_dentry(struct eventfs_inode *ei, int idx,
                 * created the dentry for this e_dentry. In which case
                 * use that one.
                 *
-                * Note, with the mutex held, the e_dentry cannot have content
-                * and the ei->is_freed be true at the same time.
+                * If ei->is_freed is set, the e_dentry is currently on its
+                * way to being freed, don't return it. If e_dentry is NULL
+                * it means it was already freed.
                 */
-               dentry = *e_dentry;
-               if (WARN_ON_ONCE(dentry && ei->is_freed))
+               if (ei->is_freed)
                        dentry = NULL;
+               else
+                       dentry = *e_dentry;
                /* The lookup does not need to up the dentry refcount */
                if (dentry && !lookup)
                        dget(dentry);
@@ -387,17 +383,14 @@ create_file_dentry(struct eventfs_inode *ei, int idx,
                 * Otherwise it means two dentries exist with the same name.
                 */
                WARN_ON_ONCE(!ei->is_freed);
-               invalidate = true;
+               dentry = NULL;
        }
        mutex_unlock(&eventfs_mutex);
 
-       if (invalidate)
-               d_invalidate(dentry);
-
-       if (lookup || invalidate)
+       if (lookup)
                dput(dentry);
 
-       return invalidate ? NULL : dentry;
+       return dentry;
 }
 
 /**
@@ -437,9 +430,10 @@ static struct dentry *
 create_dir_dentry(struct eventfs_inode *pei, struct eventfs_inode *ei,
                  struct dentry *parent, bool lookup)
 {
-       bool invalidate = false;
        struct dentry *dentry = NULL;
 
+       WARN_ON_ONCE(!inode_is_locked(parent->d_inode));
+
        mutex_lock(&eventfs_mutex);
        if (pei->is_freed || ei->is_freed) {
                mutex_unlock(&eventfs_mutex);
@@ -456,15 +450,8 @@ create_dir_dentry(struct eventfs_inode *pei, struct eventfs_inode *ei,
        }
        mutex_unlock(&eventfs_mutex);
 
-       /* The lookup already has the parent->d_inode locked */
-       if (!lookup)
-               inode_lock(parent->d_inode);
-
        dentry = create_dir(ei, parent);
 
-       if (!lookup)
-               inode_unlock(parent->d_inode);
-
        mutex_lock(&eventfs_mutex);
 
        if (IS_ERR_OR_NULL(dentry) && !ei->is_freed) {
@@ -473,8 +460,8 @@ create_dir_dentry(struct eventfs_inode *pei, struct eventfs_inode *ei,
                 * created the dentry for this e_dentry. In which case
                 * use that one.
                 *
-                * Note, with the mutex held, the e_dentry cannot have content
-                * and the ei->is_freed be true at the same time.
+                * If ei->is_freed is set, the e_dentry is currently on its
+                * way to being freed.
                 */
                dentry = ei->dentry;
                if (dentry && !lookup)
@@ -493,16 +480,14 @@ create_dir_dentry(struct eventfs_inode *pei, struct eventfs_inode *ei,
                 * Otherwise it means two dentries exist with the same name.
                 */
                WARN_ON_ONCE(!ei->is_freed);
-               invalidate = true;
+               dentry = NULL;
        }
        mutex_unlock(&eventfs_mutex);
-       if (invalidate)
-               d_invalidate(dentry);
 
-       if (lookup || invalidate)
+       if (lookup)
                dput(dentry);
 
-       return invalidate ? NULL : dentry;
+       return dentry;
 }
 
 /**
@@ -632,7 +617,7 @@ static int add_dentries(struct dentry ***dentries, struct dentry *d, int cnt)
 {
        struct dentry **tmp;
 
-       tmp = krealloc(*dentries, sizeof(d) * (cnt + 2), GFP_KERNEL);
+       tmp = krealloc(*dentries, sizeof(d) * (cnt + 2), GFP_NOFS);
        if (!tmp)
                return -1;
        tmp[cnt] = d;
@@ -698,6 +683,7 @@ static int dcache_dir_open_wrapper(struct inode *inode, struct file *file)
                return -ENOMEM;
        }
 
+       inode_lock(parent->d_inode);
        list_for_each_entry_srcu(ei_child, &ei->children, list,
                                 srcu_read_lock_held(&eventfs_srcu)) {
                d = create_dir_dentry(ei, ei_child, parent, false);
@@ -730,6 +716,7 @@ static int dcache_dir_open_wrapper(struct inode *inode, struct file *file)
                        cnt++;
                }
        }
+       inode_unlock(parent->d_inode);
        srcu_read_unlock(&eventfs_srcu, idx);
        ret = dcache_dir_open(inode, file);
 
index 5b54948..ae648de 100644 (file)
@@ -509,20 +509,15 @@ struct dentry *eventfs_start_creating(const char *name, struct dentry *parent)
        struct dentry *dentry;
        int error;
 
+       /* Must always have a parent. */
+       if (WARN_ON_ONCE(!parent))
+               return ERR_PTR(-EINVAL);
+
        error = simple_pin_fs(&trace_fs_type, &tracefs_mount,
                              &tracefs_mount_count);
        if (error)
                return ERR_PTR(error);
 
-       /*
-        * If the parent is not specified, we create it in the root.
-        * We need the root dentry to do this, which is in the super
-        * block. A pointer to that is in the struct vfsmount that we
-        * have around.
-        */
-       if (!parent)
-               parent = tracefs_mount->mnt_root;
-
        if (unlikely(IS_DEADDIR(parent->d_inode)))
                dentry = ERR_PTR(-ENOENT);
        else
index ed0bc8c..567fb37 100644 (file)
@@ -147,7 +147,7 @@ config XFS_ONLINE_SCRUB_STATS
        bool "XFS online metadata check usage data collection"
        default y
        depends on XFS_ONLINE_SCRUB
-       select XFS_DEBUG
+       select DEBUG_FS
        help
          If you say Y here, the kernel will gather usage data about
          the online metadata check subsystem.  This includes the number
index 3069194..100ab59 100644 (file)
@@ -2275,16 +2275,37 @@ xfs_alloc_min_freelist(
 
        ASSERT(mp->m_alloc_maxlevels > 0);
 
+       /*
+        * For a btree shorter than the maximum height, the worst case is that
+        * every level gets split and a new level is added, then while inserting
+        * another entry to refill the AGFL, every level under the old root gets
+        * split again. This is:
+        *
+        *   (full height split reservation) + (AGFL refill split height)
+        * = (current height + 1) + (current height - 1)
+        * = (new height) + (new height - 2)
+        * = 2 * new height - 2
+        *
+        * For a btree of maximum height, the worst case is that every level
+        * under the root gets split, then while inserting another entry to
+        * refill the AGFL, every level under the root gets split again. This is
+        * also:
+        *
+        *   2 * (current height - 1)
+        * = 2 * (new height - 1)
+        * = 2 * new height - 2
+        */
+
        /* space needed by-bno freespace btree */
        min_free = min_t(unsigned int, levels[XFS_BTNUM_BNOi] + 1,
-                                      mp->m_alloc_maxlevels);
+                                      mp->m_alloc_maxlevels) * 2 - 2;
        /* space needed by-size freespace btree */
        min_free += min_t(unsigned int, levels[XFS_BTNUM_CNTi] + 1,
-                                      mp->m_alloc_maxlevels);
+                                      mp->m_alloc_maxlevels) * 2 - 2;
        /* space needed reverse mapping used space btree */
        if (xfs_has_rmapbt(mp))
                min_free += min_t(unsigned int, levels[XFS_BTNUM_RMAPi] + 1,
-                                               mp->m_rmap_maxlevels);
+                                               mp->m_rmap_maxlevels) * 2 - 2;
 
        return min_free;
 }
index bcfb6a4..f71679c 100644 (file)
@@ -245,21 +245,18 @@ xfs_defer_create_intents(
        return ret;
 }
 
-/* Abort all the intents that were committed. */
 STATIC void
-xfs_defer_trans_abort(
-       struct xfs_trans                *tp,
-       struct list_head                *dop_pending)
+xfs_defer_pending_abort(
+       struct xfs_mount                *mp,
+       struct list_head                *dop_list)
 {
        struct xfs_defer_pending        *dfp;
        const struct xfs_defer_op_type  *ops;
 
-       trace_xfs_defer_trans_abort(tp, _RET_IP_);
-
        /* Abort intent items that don't have a done item. */
-       list_for_each_entry(dfp, dop_pending, dfp_list) {
+       list_for_each_entry(dfp, dop_list, dfp_list) {
                ops = defer_op_types[dfp->dfp_type];
-               trace_xfs_defer_pending_abort(tp->t_mountp, dfp);
+               trace_xfs_defer_pending_abort(mp, dfp);
                if (dfp->dfp_intent && !dfp->dfp_done) {
                        ops->abort_intent(dfp->dfp_intent);
                        dfp->dfp_intent = NULL;
@@ -267,6 +264,16 @@ xfs_defer_trans_abort(
        }
 }
 
+/* Abort all the intents that were committed. */
+STATIC void
+xfs_defer_trans_abort(
+       struct xfs_trans                *tp,
+       struct list_head                *dop_pending)
+{
+       trace_xfs_defer_trans_abort(tp, _RET_IP_);
+       xfs_defer_pending_abort(tp->t_mountp, dop_pending);
+}
+
 /*
  * Capture resources that the caller said not to release ("held") when the
  * transaction commits.  Caller is responsible for zero-initializing @dres.
@@ -756,12 +763,13 @@ xfs_defer_ops_capture(
 
 /* Release all resources that we used to capture deferred ops. */
 void
-xfs_defer_ops_capture_free(
+xfs_defer_ops_capture_abort(
        struct xfs_mount                *mp,
        struct xfs_defer_capture        *dfc)
 {
        unsigned short                  i;
 
+       xfs_defer_pending_abort(mp, &dfc->dfc_dfops);
        xfs_defer_cancel_list(mp, &dfc->dfc_dfops);
 
        for (i = 0; i < dfc->dfc_held.dr_bufs; i++)
@@ -802,7 +810,7 @@ xfs_defer_ops_capture_and_commit(
        /* Commit the transaction and add the capture structure to the list. */
        error = xfs_trans_commit(tp);
        if (error) {
-               xfs_defer_ops_capture_free(mp, dfc);
+               xfs_defer_ops_capture_abort(mp, dfc);
                return error;
        }
 
index 114a3a4..8788ad5 100644 (file)
@@ -121,7 +121,7 @@ int xfs_defer_ops_capture_and_commit(struct xfs_trans *tp,
                struct list_head *capture_list);
 void xfs_defer_ops_continue(struct xfs_defer_capture *d, struct xfs_trans *tp,
                struct xfs_defer_resources *dres);
-void xfs_defer_ops_capture_free(struct xfs_mount *mp,
+void xfs_defer_ops_capture_abort(struct xfs_mount *mp,
                struct xfs_defer_capture *d);
 void xfs_defer_resources_rele(struct xfs_defer_resources *dres);
 
index 543f374..137a65b 100644 (file)
@@ -510,6 +510,9 @@ xfs_dinode_verify(
        if (mode && nextents + naextents > nblocks)
                return __this_address;
 
+       if (nextents + naextents == 0 && nblocks != 0)
+               return __this_address;
+
        if (S_ISDIR(mode) && nextents > mp->m_dir_geo->max_extents)
                return __this_address;
 
index ac6ba64..a013b87 100644 (file)
@@ -562,7 +562,8 @@ xfs_dquot_from_disk(
        struct xfs_dquot        *dqp,
        struct xfs_buf          *bp)
 {
-       struct xfs_disk_dquot   *ddqp = bp->b_addr + dqp->q_bufoffset;
+       struct xfs_dqblk        *dqb = xfs_buf_offset(bp, dqp->q_bufoffset);
+       struct xfs_disk_dquot   *ddqp = &dqb->dd_diskdq;
 
        /*
         * Ensure that we got the type and ID we were looking for.
@@ -1250,7 +1251,7 @@ xfs_qm_dqflush(
        }
 
        /* Flush the incore dquot to the ondisk buffer. */
-       dqblk = bp->b_addr + dqp->q_bufoffset;
+       dqblk = xfs_buf_offset(bp, dqp->q_bufoffset);
        xfs_dquot_to_disk(&dqblk->dd_diskdq, dqp);
 
        /*
index 8966ba8..2c2720c 100644 (file)
@@ -19,6 +19,7 @@
 #include "xfs_log.h"
 #include "xfs_log_priv.h"
 #include "xfs_log_recover.h"
+#include "xfs_error.h"
 
 STATIC void
 xlog_recover_dquot_ra_pass2(
@@ -65,6 +66,7 @@ xlog_recover_dquot_commit_pass2(
 {
        struct xfs_mount                *mp = log->l_mp;
        struct xfs_buf                  *bp;
+       struct xfs_dqblk                *dqb;
        struct xfs_disk_dquot           *ddq, *recddq;
        struct xfs_dq_logformat         *dq_f;
        xfs_failaddr_t                  fa;
@@ -130,14 +132,14 @@ xlog_recover_dquot_commit_pass2(
                return error;
 
        ASSERT(bp);
-       ddq = xfs_buf_offset(bp, dq_f->qlf_boffset);
+       dqb = xfs_buf_offset(bp, dq_f->qlf_boffset);
+       ddq = &dqb->dd_diskdq;
 
        /*
         * If the dquot has an LSN in it, recover the dquot only if it's less
         * than the lsn of the transaction we are replaying.
         */
        if (xfs_has_crc(mp)) {
-               struct xfs_dqblk *dqb = (struct xfs_dqblk *)ddq;
                xfs_lsn_t       lsn = be64_to_cpu(dqb->dd_lsn);
 
                if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) {
@@ -147,10 +149,23 @@ xlog_recover_dquot_commit_pass2(
 
        memcpy(ddq, recddq, item->ri_buf[1].i_len);
        if (xfs_has_crc(mp)) {
-               xfs_update_cksum((char *)ddq, sizeof(struct xfs_dqblk),
+               xfs_update_cksum((char *)dqb, sizeof(struct xfs_dqblk),
                                 XFS_DQUOT_CRC_OFF);
        }
 
+       /* Validate the recovered dquot. */
+       fa = xfs_dqblk_verify(log->l_mp, dqb, dq_f->qlf_id);
+       if (fa) {
+               XFS_CORRUPTION_ERROR("Bad dquot after recovery",
+                               XFS_ERRLEVEL_LOW, mp, dqb,
+                               sizeof(struct xfs_dqblk));
+               xfs_alert(mp,
+ "Metadata corruption detected at %pS, dquot 0x%x",
+                               fa, dq_f->qlf_id);
+               error = -EFSCORRUPTED;
+               goto out_release;
+       }
+
        ASSERT(dq_f->qlf_size == 2);
        ASSERT(bp->b_mount == mp);
        bp->b_flags |= _XBF_LOGRECOVERY;
index 3dc4793..3beb470 100644 (file)
@@ -569,6 +569,14 @@ extern void xfs_setup_inode(struct xfs_inode *ip);
 extern void xfs_setup_iops(struct xfs_inode *ip);
 extern void xfs_diflags_to_iflags(struct xfs_inode *ip, bool init);
 
+static inline void xfs_update_stable_writes(struct xfs_inode *ip)
+{
+       if (bdev_stable_writes(xfs_inode_buftarg(ip)->bt_bdev))
+               mapping_set_stable_writes(VFS_I(ip)->i_mapping);
+       else
+               mapping_clear_stable_writes(VFS_I(ip)->i_mapping);
+}
+
 /*
  * When setting up a newly allocated inode, we need to call
  * xfs_finish_inode_setup() once the inode is fully instantiated at
index 0e5dba2..144198a 100644 (file)
@@ -286,6 +286,7 @@ xlog_recover_inode_commit_pass2(
        struct xfs_log_dinode           *ldip;
        uint                            isize;
        int                             need_free = 0;
+       xfs_failaddr_t                  fa;
 
        if (item->ri_buf[0].i_len == sizeof(struct xfs_inode_log_format)) {
                in_f = item->ri_buf[0].i_addr;
@@ -369,24 +370,26 @@ xlog_recover_inode_commit_pass2(
         * superblock flag to determine whether we need to look at di_flushiter
         * to skip replay when the on disk inode is newer than the log one
         */
-       if (!xfs_has_v3inodes(mp) &&
-           ldip->di_flushiter < be16_to_cpu(dip->di_flushiter)) {
-               /*
-                * Deal with the wrap case, DI_MAX_FLUSH is less
-                * than smaller numbers
-                */
-               if (be16_to_cpu(dip->di_flushiter) == DI_MAX_FLUSH &&
-                   ldip->di_flushiter < (DI_MAX_FLUSH >> 1)) {
-                       /* do nothing */
-               } else {
-                       trace_xfs_log_recover_inode_skip(log, in_f);
-                       error = 0;
-                       goto out_release;
+       if (!xfs_has_v3inodes(mp)) {
+               if (ldip->di_flushiter < be16_to_cpu(dip->di_flushiter)) {
+                       /*
+                        * Deal with the wrap case, DI_MAX_FLUSH is less
+                        * than smaller numbers
+                        */
+                       if (be16_to_cpu(dip->di_flushiter) == DI_MAX_FLUSH &&
+                           ldip->di_flushiter < (DI_MAX_FLUSH >> 1)) {
+                               /* do nothing */
+                       } else {
+                               trace_xfs_log_recover_inode_skip(log, in_f);
+                               error = 0;
+                               goto out_release;
+                       }
                }
+
+               /* Take the opportunity to reset the flush iteration count */
+               ldip->di_flushiter = 0;
        }
 
-       /* Take the opportunity to reset the flush iteration count */
-       ldip->di_flushiter = 0;
 
        if (unlikely(S_ISREG(ldip->di_mode))) {
                if ((ldip->di_format != XFS_DINODE_FMT_EXTENTS) &&
@@ -528,8 +531,19 @@ out_owner_change:
            (dip->di_mode != 0))
                error = xfs_recover_inode_owner_change(mp, dip, in_f,
                                                       buffer_list);
-       /* re-generate the checksum. */
+       /* re-generate the checksum and validate the recovered inode. */
        xfs_dinode_calc_crc(log->l_mp, dip);
+       fa = xfs_dinode_verify(log->l_mp, in_f->ilf_ino, dip);
+       if (fa) {
+               XFS_CORRUPTION_ERROR(
+                       "Bad dinode after recovery",
+                               XFS_ERRLEVEL_LOW, mp, dip, sizeof(*dip));
+               xfs_alert(mp,
+                       "Metadata corruption detected at %pS, inode 0x%llx",
+                       fa, in_f->ilf_ino);
+               error = -EFSCORRUPTED;
+               goto out_release;
+       }
 
        ASSERT(bp->b_mount == mp);
        bp->b_flags |= _XBF_LOGRECOVERY;
index a82470e..6c39196 100644 (file)
@@ -1121,23 +1121,25 @@ xfs_ioctl_setattr_xflags(
        struct fileattr         *fa)
 {
        struct xfs_mount        *mp = ip->i_mount;
+       bool                    rtflag = (fa->fsx_xflags & FS_XFLAG_REALTIME);
        uint64_t                i_flags2;
 
-       /* Can't change realtime flag if any extents are allocated. */
-       if ((ip->i_df.if_nextents || ip->i_delayed_blks) &&
-           XFS_IS_REALTIME_INODE(ip) != (fa->fsx_xflags & FS_XFLAG_REALTIME))
-               return -EINVAL;
+       if (rtflag != XFS_IS_REALTIME_INODE(ip)) {
+               /* Can't change realtime flag if any extents are allocated. */
+               if (ip->i_df.if_nextents || ip->i_delayed_blks)
+                       return -EINVAL;
+       }
 
-       /* If realtime flag is set then must have realtime device */
-       if (fa->fsx_xflags & FS_XFLAG_REALTIME) {
+       if (rtflag) {
+               /* If realtime flag is set then must have realtime device */
                if (mp->m_sb.sb_rblocks == 0 || mp->m_sb.sb_rextsize == 0 ||
                    xfs_extlen_to_rtxmod(mp, ip->i_extsize))
                        return -EINVAL;
-       }
 
-       /* Clear reflink if we are actually able to set the rt flag. */
-       if ((fa->fsx_xflags & FS_XFLAG_REALTIME) && xfs_is_reflink_inode(ip))
-               ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK;
+               /* Clear reflink if we are actually able to set the rt flag. */
+               if (xfs_is_reflink_inode(ip))
+                       ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK;
+       }
 
        /* diflags2 only valid for v3 inodes. */
        i_flags2 = xfs_flags2diflags2(ip, fa->fsx_xflags);
@@ -1148,6 +1150,14 @@ xfs_ioctl_setattr_xflags(
        ip->i_diflags2 = i_flags2;
 
        xfs_diflags_to_iflags(ip, false);
+
+       /*
+        * Make the stable writes flag match that of the device the inode
+        * resides on when flipping the RT flag.
+        */
+       if (rtflag != XFS_IS_REALTIME_INODE(ip) && S_ISREG(VFS_I(ip)->i_mode))
+               xfs_update_stable_writes(ip);
+
        xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
        xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
        XFS_STATS_INC(mp, xs_ig_attrchg);
index fdfda4f..a0d77f5 100644 (file)
@@ -1298,6 +1298,13 @@ xfs_setup_inode(
        gfp_mask = mapping_gfp_mask(inode->i_mapping);
        mapping_set_gfp_mask(inode->i_mapping, (gfp_mask & ~(__GFP_FS)));
 
+       /*
+        * For real-time inodes update the stable write flags to that of the RT
+        * device instead of the data device.
+        */
+       if (S_ISREG(inode->i_mode) && XFS_IS_REALTIME_INODE(ip))
+               xfs_update_stable_writes(ip);
+
        /*
         * If there is no attribute fork no ACL can exist on this inode,
         * and it can't have any file capabilities attached to it either.
index 51c100c..ee206fa 100644 (file)
@@ -1893,9 +1893,7 @@ xlog_write_iclog(
                 * the buffer manually, the code needs to be kept in sync
                 * with the I/O completion path.
                 */
-               xlog_state_done_syncing(iclog);
-               up(&iclog->ic_sema);
-               return;
+               goto sync;
        }
 
        /*
@@ -1925,20 +1923,17 @@ xlog_write_iclog(
                 * avoid shutdown re-entering this path and erroring out again.
                 */
                if (log->l_targ != log->l_mp->m_ddev_targp &&
-                   blkdev_issue_flush(log->l_mp->m_ddev_targp->bt_bdev)) {
-                       xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR);
-                       return;
-               }
+                   blkdev_issue_flush(log->l_mp->m_ddev_targp->bt_bdev))
+                       goto shutdown;
        }
        if (iclog->ic_flags & XLOG_ICL_NEED_FUA)
                iclog->ic_bio.bi_opf |= REQ_FUA;
 
        iclog->ic_flags &= ~(XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA);
 
-       if (xlog_map_iclog_data(&iclog->ic_bio, iclog->ic_data, count)) {
-               xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR);
-               return;
-       }
+       if (xlog_map_iclog_data(&iclog->ic_bio, iclog->ic_data, count))
+               goto shutdown;
+
        if (is_vmalloc_addr(iclog->ic_data))
                flush_kernel_vmap_range(iclog->ic_data, count);
 
@@ -1959,6 +1954,12 @@ xlog_write_iclog(
        }
 
        submit_bio(&iclog->ic_bio);
+       return;
+shutdown:
+       xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR);
+sync:
+       xlog_state_done_syncing(iclog);
+       up(&iclog->ic_sema);
 }
 
 /*
index 13b94d2..a1e18b2 100644 (file)
@@ -2511,7 +2511,7 @@ xlog_abort_defer_ops(
 
        list_for_each_entry_safe(dfc, next, capture_list, dfc_list) {
                list_del_init(&dfc->dfc_list);
-               xfs_defer_ops_capture_free(mp, dfc);
+               xfs_defer_ops_capture_abort(mp, dfc);
        }
 }
 
index 658edee..e5b62dc 100644 (file)
@@ -784,6 +784,7 @@ xfs_reflink_end_cow_extent(
                }
        }
        del = got;
+       xfs_trim_extent(&del, *offset_fsb, end_fsb - *offset_fsb);
 
        /* Grab the corresponding mapping in the data fork. */
        nmaps = 1;
index afeed6e..1216d72 100644 (file)
@@ -542,6 +542,7 @@ int acpi_device_set_power(struct acpi_device *device, int state);
 int acpi_bus_init_power(struct acpi_device *device);
 int acpi_device_fix_up_power(struct acpi_device *device);
 void acpi_device_fix_up_power_extended(struct acpi_device *adev);
+void acpi_device_fix_up_power_children(struct acpi_device *adev);
 int acpi_bus_update_power(acpi_handle handle, int *state_p);
 int acpi_device_update_power(struct acpi_device *device, int *state_p);
 bool acpi_bus_power_manageable(acpi_handle handle);
index 995513f..0655aa5 100644 (file)
@@ -70,7 +70,7 @@ static __always_inline int queued_spin_is_locked(struct qspinlock *lock)
  */
 static __always_inline int queued_spin_value_unlocked(struct qspinlock lock)
 {
-       return !atomic_read(&lock.val);
+       return !lock.val.counter;
 }
 
 /**
index 2580e05..004b38a 100644 (file)
@@ -15,7 +15,6 @@ extern int blk_pre_runtime_suspend(struct request_queue *q);
 extern void blk_post_runtime_suspend(struct request_queue *q, int err);
 extern void blk_pre_runtime_resume(struct request_queue *q);
 extern void blk_post_runtime_resume(struct request_queue *q);
-extern void blk_set_runtime_active(struct request_queue *q);
 #else
 static inline void blk_pm_runtime_init(struct request_queue *q,
                                       struct device *dev) {}
index b4825d3..6762dac 100644 (file)
@@ -56,7 +56,7 @@ extern struct idr btf_idr;
 extern spinlock_t btf_idr_lock;
 extern struct kobject *btf_kobj;
 extern struct bpf_mem_alloc bpf_global_ma, bpf_global_percpu_ma;
-extern bool bpf_global_ma_set, bpf_global_percpu_ma_set;
+extern bool bpf_global_ma_set;
 
 typedef u64 (*bpf_callback_t)(u64, u64, u64, u64, u64);
 typedef int (*bpf_iter_init_seq_priv_t)(void *private_data,
@@ -909,10 +909,14 @@ bpf_ctx_record_field_size(struct bpf_insn_access_aux *aux, u32 size)
        aux->ctx_field_size = size;
 }
 
+static bool bpf_is_ldimm64(const struct bpf_insn *insn)
+{
+       return insn->code == (BPF_LD | BPF_IMM | BPF_DW);
+}
+
 static inline bool bpf_pseudo_func(const struct bpf_insn *insn)
 {
-       return insn->code == (BPF_LD | BPF_IMM | BPF_DW) &&
-              insn->src_reg == BPF_PSEUDO_FUNC;
+       return bpf_is_ldimm64(insn) && insn->src_reg == BPF_PSEUDO_FUNC;
 }
 
 struct bpf_prog_ops {
index 24213a9..aa4d19d 100644 (file)
@@ -301,6 +301,17 @@ struct bpf_func_state {
        struct tnum callback_ret_range;
        bool in_async_callback_fn;
        bool in_exception_callback_fn;
+       /* For callback calling functions that limit number of possible
+        * callback executions (e.g. bpf_loop) keeps track of current
+        * simulated iteration number.
+        * Value in frame N refers to number of times callback with frame
+        * N+1 was simulated, e.g. for the following call:
+        *
+        *   bpf_loop(..., fn, ...); | suppose current frame is N
+        *                           | fn would be simulated in frame N+1
+        *                           | number of simulations is tracked in frame N
+        */
+       u32 callback_depth;
 
        /* The following fields should be last. See copy_func_state() */
        int acquired_refs;
@@ -400,6 +411,7 @@ struct bpf_verifier_state {
        struct bpf_idx_pair *jmp_history;
        u32 jmp_history_cnt;
        u32 dfs_depth;
+       u32 callback_unroll_depth;
 };
 
 #define bpf_get_spilled_reg(slot, frame, mask)                         \
@@ -511,6 +523,10 @@ struct bpf_insn_aux_data {
         * this instruction, regardless of any heuristics
         */
        bool force_checkpoint;
+       /* true if instruction is a call to a helper function that
+        * accepts callback function as a parameter.
+        */
+       bool calls_callback;
 };
 
 #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */
index d305db7..efc0c0b 100644 (file)
@@ -195,6 +195,7 @@ enum cpuhp_state {
        CPUHP_AP_ARM_CORESIGHT_CTI_STARTING,
        CPUHP_AP_ARM64_ISNDEP_STARTING,
        CPUHP_AP_SMPCFD_DYING,
+       CPUHP_AP_HRTIMERS_DYING,
        CPUHP_AP_X86_TBOOT_DYING,
        CPUHP_AP_ARM_CACHE_B15_RAC_DYING,
        CPUHP_AP_ONLINE,
index 45fca09..69501e0 100644 (file)
@@ -50,9 +50,7 @@
            "   .previous"                                              "\n"    \
        )
 
-#ifdef CONFIG_IA64
-#define KSYM_FUNC(name)                @fptr(name)
-#elif defined(CONFIG_PARISC) && defined(CONFIG_64BIT)
+#if defined(CONFIG_PARISC) && defined(CONFIG_64BIT)
 #define KSYM_FUNC(name)                P%name
 #else
 #define KSYM_FUNC(name)                name
index 5a8387a..bf43f3f 100644 (file)
@@ -679,6 +679,7 @@ struct hid_device {                                                 /* device report descriptor */
        struct list_head debug_list;
        spinlock_t  debug_list_lock;
        wait_queue_head_t debug_wait;
+       struct kref                     ref;
 
        unsigned int id;                                                /* system unique id */
 
@@ -687,6 +688,8 @@ struct hid_device {                                                 /* device report descriptor */
 #endif /* CONFIG_BPF */
 };
 
+void hiddev_free(struct kref *ref);
+
 #define to_hid_device(pdev) \
        container_of(pdev, struct hid_device, dev)
 
index 0ee1401..f2044d5 100644 (file)
@@ -531,9 +531,9 @@ extern void sysrq_timer_list_show(void);
 
 int hrtimers_prepare_cpu(unsigned int cpu);
 #ifdef CONFIG_HOTPLUG_CPU
-int hrtimers_dead_cpu(unsigned int cpu);
+int hrtimers_cpu_dying(unsigned int cpu);
 #else
-#define hrtimers_dead_cpu      NULL
+#define hrtimers_cpu_dying     NULL
 #endif
 
 #endif
index 8fa23bd..007fd9c 100644 (file)
@@ -420,7 +420,7 @@ static inline u32 linkmode_adv_to_mii_t1_adv_m_t(unsigned long *advertising)
  * A function that translates value of following registers to the linkmode:
  * IEEE 802.3-2018 45.2.3.10 "EEE control and capability 1" register (3.20)
  * IEEE 802.3-2018 45.2.7.13 "EEE advertisement 1" register (7.60)
- * IEEE 802.3-2018 45.2.7.14 "EEE "link partner ability 1 register (7.61)
+ * IEEE 802.3-2018 45.2.7.14 "EEE link partner ability 1" register (7.61)
  */
 static inline void mii_eee_cap1_mod_linkmode_t(unsigned long *adv, u32 val)
 {
index a16c9cc..2564e20 100644 (file)
@@ -1797,6 +1797,13 @@ enum netdev_ml_priv_type {
        ML_PRIV_CAN,
 };
 
+enum netdev_stat_type {
+       NETDEV_PCPU_STAT_NONE,
+       NETDEV_PCPU_STAT_LSTATS, /* struct pcpu_lstats */
+       NETDEV_PCPU_STAT_TSTATS, /* struct pcpu_sw_netstats */
+       NETDEV_PCPU_STAT_DSTATS, /* struct pcpu_dstats */
+};
+
 /**
  *     struct net_device - The DEVICE structure.
  *
@@ -1991,10 +1998,14 @@ enum netdev_ml_priv_type {
  *
  *     @ml_priv:       Mid-layer private
  *     @ml_priv_type:  Mid-layer private type
- *     @lstats:        Loopback statistics
- *     @tstats:        Tunnel statistics
- *     @dstats:        Dummy statistics
- *     @vstats:        Virtual ethernet statistics
+ *
+ *     @pcpu_stat_type:        Type of device statistics which the core should
+ *                             allocate/free: none, lstats, tstats, dstats. none
+ *                             means the driver is handling statistics allocation/
+ *                             freeing internally.
+ *     @lstats:                Loopback statistics: packets, bytes
+ *     @tstats:                Tunnel statistics: RX/TX packets, RX/TX bytes
+ *     @dstats:                Dummy statistics: RX/TX/drop packets, RX/TX bytes
  *
  *     @garp_port:     GARP
  *     @mrp_port:      MRP
@@ -2354,6 +2365,7 @@ struct net_device {
        void                            *ml_priv;
        enum netdev_ml_priv_type        ml_priv_type;
 
+       enum netdev_stat_type           pcpu_stat_type:8;
        union {
                struct pcpu_lstats __percpu             *lstats;
                struct pcpu_sw_netstats __percpu        *tstats;
@@ -2755,6 +2767,16 @@ struct pcpu_sw_netstats {
        struct u64_stats_sync   syncp;
 } __aligned(4 * sizeof(u64));
 
+struct pcpu_dstats {
+       u64                     rx_packets;
+       u64                     rx_bytes;
+       u64                     rx_drops;
+       u64                     tx_packets;
+       u64                     tx_bytes;
+       u64                     tx_drops;
+       struct u64_stats_sync   syncp;
+} __aligned(8 * sizeof(u64));
+
 struct pcpu_lstats {
        u64_stats_t packets;
        u64_stats_t bytes;
index bcc1ea4..06142ff 100644 (file)
@@ -204,6 +204,8 @@ enum mapping_flags {
        AS_NO_WRITEBACK_TAGS = 5,
        AS_LARGE_FOLIO_SUPPORT = 6,
        AS_RELEASE_ALWAYS,      /* Call ->release_folio(), even if no private data */
+       AS_STABLE_WRITES,       /* must wait for writeback before modifying
+                                  folio contents */
 };
 
 /**
@@ -289,6 +291,21 @@ static inline void mapping_clear_release_always(struct address_space *mapping)
        clear_bit(AS_RELEASE_ALWAYS, &mapping->flags);
 }
 
+static inline bool mapping_stable_writes(const struct address_space *mapping)
+{
+       return test_bit(AS_STABLE_WRITES, &mapping->flags);
+}
+
+static inline void mapping_set_stable_writes(struct address_space *mapping)
+{
+       set_bit(AS_STABLE_WRITES, &mapping->flags);
+}
+
+static inline void mapping_clear_stable_writes(struct address_space *mapping)
+{
+       clear_bit(AS_STABLE_WRITES, &mapping->flags);
+}
+
 static inline gfp_t mapping_gfp_mask(struct address_space * mapping)
 {
        return mapping->gfp_mask;
index afb028c..5547ba6 100644 (file)
@@ -843,11 +843,11 @@ struct perf_event {
 };
 
 /*
- *           ,-----------------------[1:n]----------------------.
- *           V                                                  V
- * perf_event_context <-[1:n]-> perf_event_pmu_context <--- perf_event
- *           ^                      ^     |                     |
- *           `--------[1:n]---------'     `-[n:1]-> pmu <-[1:n]-'
+ *           ,-----------------------[1:n]------------------------.
+ *           V                                                    V
+ * perf_event_context <-[1:n]-> perf_event_pmu_context <-[1:n]- perf_event
+ *                                        |                       |
+ *                                        `--[n:1]-> pmu <-[1:n]--'
  *
  *
  * struct perf_event_pmu_context  lifetime is refcount based and RCU freed
@@ -865,6 +865,9 @@ struct perf_event {
  * ctx->mutex pinning the configuration. Since we hold a reference on
  * group_leader (through the filedesc) it can't go away, therefore it's
  * associated pmu_ctx must exist and cannot change due to ctx->mutex.
+ *
+ * perf_event holds a refcount on perf_event_context
+ * perf_event holds a refcount on perf_event_pmu_context
  */
 struct perf_event_pmu_context {
        struct pmu                      *pmu;
index c36e7a3..3be2cb5 100644 (file)
@@ -14,6 +14,7 @@
 
 #ifdef CONFIG_GCC_PLUGIN_STACKLEAK
 #include <asm/stacktrace.h>
+#include <linux/linkage.h>
 
 /*
  * The lowest address on tsk's stack which we can plausibly erase.
@@ -76,6 +77,11 @@ static inline void stackleak_task_init(struct task_struct *t)
 # endif
 }
 
+asmlinkage void noinstr stackleak_erase(void);
+asmlinkage void noinstr stackleak_erase_on_task_stack(void);
+asmlinkage void noinstr stackleak_erase_off_task_stack(void);
+void __no_caller_saved_registers noinstr stackleak_track_stack(void);
+
 #else /* !CONFIG_GCC_PLUGIN_STACKLEAK */
 static inline void stackleak_task_init(struct task_struct *t) { }
 #endif
index b513749..e4de6bc 100644 (file)
@@ -144,10 +144,6 @@ struct usb_phy {
         */
        int     (*set_wakeup)(struct usb_phy *x, bool enabled);
 
-       /* notify phy port status change */
-       int     (*notify_port_status)(struct usb_phy *x, int port,
-                                     u16 portstatus, u16 portchange);
-
        /* notify phy connect status change */
        int     (*notify_connect)(struct usb_phy *x,
                        enum usb_device_speed speed);
@@ -320,15 +316,6 @@ usb_phy_set_wakeup(struct usb_phy *x, bool enabled)
                return 0;
 }
 
-static inline int
-usb_phy_notify_port_status(struct usb_phy *x, int port, u16 portstatus, u16 portchange)
-{
-       if (x && x->notify_port_status)
-               return x->notify_port_status(x, port, portstatus, portchange);
-       else
-               return 0;
-}
-
 static inline int
 usb_phy_notify_connect(struct usb_phy *x, enum usb_device_speed speed)
 {
index d0f2797..a09e13a 100644 (file)
@@ -5,13 +5,6 @@
 #include <linux/pci.h>
 #include <linux/virtio_pci.h>
 
-struct virtio_pci_modern_common_cfg {
-       struct virtio_pci_common_cfg cfg;
-
-       __le16 queue_notify_data;       /* read-write */
-       __le16 queue_reset;             /* read-write */
-};
-
 /**
  * struct virtio_pci_modern_device - info for modern PCI virtio
  * @pci_dev:       Ptr to the PCI device struct
index 3bbd13a..b157c5c 100644 (file)
@@ -178,9 +178,9 @@ static inline __be32 nft_reg_load_be32(const u32 *sreg)
        return *(__force __be32 *)sreg;
 }
 
-static inline void nft_reg_store64(u32 *dreg, u64 val)
+static inline void nft_reg_store64(u64 *dreg, u64 val)
 {
-       put_unaligned(val, (u64 *)dreg);
+       put_unaligned(val, dreg);
 }
 
 static inline u64 nft_reg_load64(const u32 *sreg)
index 0ba2e6b..9ec0163 100644 (file)
@@ -10,6 +10,7 @@ int netkit_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog);
 int netkit_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
 int netkit_prog_detach(const union bpf_attr *attr, struct bpf_prog *prog);
 int netkit_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr);
+INDIRECT_CALLABLE_DECLARE(struct net_device *netkit_peer_dev(struct net_device *dev));
 #else
 static inline int netkit_prog_attach(const union bpf_attr *attr,
                                     struct bpf_prog *prog)
@@ -34,5 +35,10 @@ static inline int netkit_prog_query(const union bpf_attr *attr,
 {
        return -EINVAL;
 }
+
+static inline struct net_device *netkit_peer_dev(struct net_device *dev)
+{
+       return NULL;
+}
 #endif /* CONFIG_NETKIT */
 #endif /* __NET_NETKIT_H */
index 8a6dbfb..77f87c6 100644 (file)
@@ -58,6 +58,11 @@ static inline struct nf_flowtable *tcf_ct_ft(const struct tc_action *a)
        return to_ct_params(a)->nf_ft;
 }
 
+static inline struct nf_conntrack_helper *tcf_ct_helper(const struct tc_action *a)
+{
+       return to_ct_params(a)->helper;
+}
+
 #else
 static inline uint16_t tcf_ct_zone(const struct tc_action *a) { return 0; }
 static inline int tcf_ct_action(const struct tc_action *a) { return 0; }
@@ -65,6 +70,10 @@ static inline struct nf_flowtable *tcf_ct_ft(const struct tc_action *a)
 {
        return NULL;
 }
+static inline struct nf_conntrack_helper *tcf_ct_helper(const struct tc_action *a)
+{
+       return NULL;
+}
 #endif /* CONFIG_NF_CONNTRACK */
 
 #if IS_ENABLED(CONFIG_NET_ACT_CT)
index 4c53a5e..f7e537f 100644 (file)
        E_(rxrpc_rtt_tx_ping,                   "PING")
 
 #define rxrpc_rtt_rx_traces \
-       EM(rxrpc_rtt_rx_cancel,                 "CNCL") \
+       EM(rxrpc_rtt_rx_other_ack,              "OACK") \
        EM(rxrpc_rtt_rx_obsolete,               "OBSL") \
        EM(rxrpc_rtt_rx_lost,                   "LOST") \
        EM(rxrpc_rtt_rx_ping_response,          "PONG") \
index c25fc96..d24e8e1 100644 (file)
  */
 #define BTRFS_METADATA_ITEM_KEY        169
 
+/*
+ * Special inline ref key which stores the id of the subvolume which originally
+ * created the extent. This subvolume owns the extent permanently from the
+ * perspective of simple quotas. Needed to know which subvolume to free quota
+ * usage from when the extent is deleted.
+ *
+ * Stored as an inline ref rather to avoid wasting space on a separate item on
+ * top of the existing extent item. However, unlike the other inline refs,
+ * there is one one owner ref per extent rather than one per extent.
+ *
+ * Because of this, it goes at the front of the list of inline refs, and thus
+ * must have a lower type value than any other inline ref type (to satisfy the
+ * disk format rule that inline refs have non-decreasing type).
+ */
+#define BTRFS_EXTENT_OWNER_REF_KEY     172
+
 #define BTRFS_TREE_BLOCK_REF_KEY       176
 
 #define BTRFS_EXTENT_DATA_REF_KEY      178
 
 #define BTRFS_SHARED_DATA_REF_KEY      184
 
-/*
- * Special inline ref key which stores the id of the subvolume which originally
- * created the extent. This subvolume owns the extent permanently from the
- * perspective of simple quotas. Needed to know which subvolume to free quota
- * usage from when the extent is deleted.
- */
-#define BTRFS_EXTENT_OWNER_REF_KEY     188
-
 /*
  * block groups give us hints into the extent allocation trees.  Which
  * blocks are free etc etc
index 6c80f96..282e90a 100644 (file)
 #define AT_HANDLE_FID          AT_REMOVEDIR    /* file handle is needed to
                                        compare object identity and may not
                                        be usable to open_by_handle_at(2) */
+#if defined(__KERNEL__)
+#define AT_GETATTR_NOSEC       0x80000000
+#endif
 
 #endif /* _UAPI_LINUX_FCNTL_H */
index f703afc..44f4dd2 100644 (file)
@@ -166,6 +166,17 @@ struct virtio_pci_common_cfg {
        __le32 queue_used_hi;           /* read-write */
 };
 
+/*
+ * Warning: do not use sizeof on this: use offsetofend for
+ * specific fields you need.
+ */
+struct virtio_pci_modern_common_cfg {
+       struct virtio_pci_common_cfg cfg;
+
+       __le16 queue_notify_data;       /* read-write */
+       __le16 queue_reset;             /* read-write */
+};
+
 /* Fields in VIRTIO_PCI_CAP_PCI_CFG: */
 struct virtio_pci_cfg_cap {
        struct virtio_pci_cap cap;
index 23932b0..3b07409 100644 (file)
@@ -88,7 +88,6 @@ void xen_irq_resume(void);
 
 /* Clear an irq's pending state, in preparation for polling on it */
 void xen_clear_irq_pending(int irq);
-void xen_set_irq_pending(int irq);
 bool xen_test_irq_pending(int irq);
 
 /* Poll waiting for an irq to become pending.  In the usual case, the
@@ -101,8 +100,8 @@ void xen_poll_irq_timeout(int irq, u64 timeout);
 
 /* Determine the IRQ which is bound to an event channel */
 unsigned int irq_from_evtchn(evtchn_port_t evtchn);
-int irq_from_virq(unsigned int cpu, unsigned int virq);
-evtchn_port_t evtchn_from_irq(unsigned irq);
+int irq_evtchn_from_virq(unsigned int cpu, unsigned int virq,
+                        evtchn_port_t *evtchn);
 
 int xen_set_callback_via(uint64_t via);
 int xen_evtchn_do_upcall(void);
@@ -122,9 +121,6 @@ int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
 /* De-allocates the above mentioned physical interrupt. */
 int xen_destroy_irq(int irq);
 
-/* Return irq from pirq */
-int xen_irq_from_pirq(unsigned pirq);
-
 /* Return the pirq allocated to the irq. */
 int xen_pirq_from_irq(unsigned irq);
 
index f04a430..976e950 100644 (file)
@@ -145,13 +145,8 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *f)
        if (has_lock && (ctx->flags & IORING_SETUP_SQPOLL)) {
                struct io_sq_data *sq = ctx->sq_data;
 
-               if (mutex_trylock(&sq->lock)) {
-                       if (sq->thread) {
-                               sq_pid = task_pid_nr(sq->thread);
-                               sq_cpu = task_cpu(sq->thread);
-                       }
-                       mutex_unlock(&sq->lock);
-               }
+               sq_pid = sq->task_pid;
+               sq_cpu = sq->sq_cpu;
        }
 
        seq_printf(m, "SqThread:\t%d\n", sq_pid);
index 08e3b17..eccea85 100644 (file)
@@ -254,7 +254,7 @@ int io_linkat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
        newf = u64_to_user_ptr(READ_ONCE(sqe->addr2));
        lnk->flags = READ_ONCE(sqe->hardlink_flags);
 
-       lnk->oldpath = getname(oldf);
+       lnk->oldpath = getname_uflags(oldf, lnk->flags);
        if (IS_ERR(lnk->oldpath))
                return PTR_ERR(lnk->oldpath);
 
index 7034be5..f521c59 100644 (file)
@@ -1258,7 +1258,7 @@ int io_import_fixed(int ddir, struct iov_iter *iter,
                 */
                const struct bio_vec *bvec = imu->bvec;
 
-               if (offset <= bvec->bv_len) {
+               if (offset < bvec->bv_len) {
                        /*
                         * Note, huge pages buffers consists of one large
                         * bvec entry and should always go this way. The other
index bd6c2c7..65b5dbe 100644 (file)
@@ -214,6 +214,7 @@ static bool io_sqd_handle_event(struct io_sq_data *sqd)
                        did_sig = get_signal(&ksig);
                cond_resched();
                mutex_lock(&sqd->lock);
+               sqd->sq_cpu = raw_smp_processor_id();
        }
        return did_sig || test_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state);
 }
@@ -229,10 +230,15 @@ static int io_sq_thread(void *data)
        snprintf(buf, sizeof(buf), "iou-sqp-%d", sqd->task_pid);
        set_task_comm(current, buf);
 
-       if (sqd->sq_cpu != -1)
+       /* reset to our pid after we've set task_comm, for fdinfo */
+       sqd->task_pid = current->pid;
+
+       if (sqd->sq_cpu != -1) {
                set_cpus_allowed_ptr(current, cpumask_of(sqd->sq_cpu));
-       else
+       } else {
                set_cpus_allowed_ptr(current, cpu_online_mask);
+               sqd->sq_cpu = raw_smp_processor_id();
+       }
 
        mutex_lock(&sqd->lock);
        while (1) {
@@ -261,6 +267,7 @@ static int io_sq_thread(void *data)
                                mutex_unlock(&sqd->lock);
                                cond_resched();
                                mutex_lock(&sqd->lock);
+                               sqd->sq_cpu = raw_smp_processor_id();
                        }
                        continue;
                }
@@ -294,6 +301,7 @@ static int io_sq_thread(void *data)
                                mutex_unlock(&sqd->lock);
                                schedule();
                                mutex_lock(&sqd->lock);
+                               sqd->sq_cpu = raw_smp_processor_id();
                        }
                        list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
                                atomic_andnot(IORING_SQ_NEED_WAKEUP,
index 91e82e3..7a98cd1 100644 (file)
@@ -531,7 +531,7 @@ int audit_exe_compare(struct task_struct *tsk, struct audit_fsnotify_mark *mark)
        if (tsk != current)
                return 0;
 
-       if (WARN_ON_ONCE(!current->mm))
+       if (!current->mm)
                return 0;
        exe_file = get_mm_exe_file(current->mm);
        if (!exe_file)
index 08626b5..cd3afe5 100644 (file)
@@ -64,8 +64,8 @@
 #define OFF    insn->off
 #define IMM    insn->imm
 
-struct bpf_mem_alloc bpf_global_ma, bpf_global_percpu_ma;
-bool bpf_global_ma_set, bpf_global_percpu_ma_set;
+struct bpf_mem_alloc bpf_global_ma;
+bool bpf_global_ma_set;
 
 /* No hurry in this branch
  *
@@ -2934,9 +2934,7 @@ static int __init bpf_global_ma_init(void)
 
        ret = bpf_mem_alloc_init(&bpf_global_ma, 0, false);
        bpf_global_ma_set = !ret;
-       ret = bpf_mem_alloc_init(&bpf_global_percpu_ma, 0, true);
-       bpf_global_percpu_ma_set = !ret;
-       return !bpf_global_ma_set || !bpf_global_percpu_ma_set;
+       return ret;
 }
 late_initcall(bpf_global_ma_init);
 #endif
index bd1c42e..af2819d 100644 (file)
@@ -26,6 +26,7 @@
 #include <linux/poison.h>
 #include <linux/module.h>
 #include <linux/cpumask.h>
+#include <linux/bpf_mem_alloc.h>
 #include <net/xdp.h>
 
 #include "disasm.h"
@@ -41,6 +42,9 @@ static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
 #undef BPF_LINK_TYPE
 };
 
+struct bpf_mem_alloc bpf_global_percpu_ma;
+static bool bpf_global_percpu_ma_set;
+
 /* bpf_check() is a static code analyzer that walks eBPF program
  * instruction by instruction and updates register/stack state.
  * All paths of conditional branches are analyzed until 'bpf_exit' insn.
@@ -336,6 +340,7 @@ struct bpf_kfunc_call_arg_meta {
 struct btf *btf_vmlinux;
 
 static DEFINE_MUTEX(bpf_verifier_lock);
+static DEFINE_MUTEX(bpf_percpu_ma_lock);
 
 static const struct bpf_line_info *
 find_linfo(const struct bpf_verifier_env *env, u32 insn_off)
@@ -542,13 +547,12 @@ static bool is_dynptr_ref_function(enum bpf_func_id func_id)
        return func_id == BPF_FUNC_dynptr_data;
 }
 
-static bool is_callback_calling_kfunc(u32 btf_id);
+static bool is_sync_callback_calling_kfunc(u32 btf_id);
 static bool is_bpf_throw_kfunc(struct bpf_insn *insn);
 
-static bool is_callback_calling_function(enum bpf_func_id func_id)
+static bool is_sync_callback_calling_function(enum bpf_func_id func_id)
 {
        return func_id == BPF_FUNC_for_each_map_elem ||
-              func_id == BPF_FUNC_timer_set_callback ||
               func_id == BPF_FUNC_find_vma ||
               func_id == BPF_FUNC_loop ||
               func_id == BPF_FUNC_user_ringbuf_drain;
@@ -559,6 +563,18 @@ static bool is_async_callback_calling_function(enum bpf_func_id func_id)
        return func_id == BPF_FUNC_timer_set_callback;
 }
 
+static bool is_callback_calling_function(enum bpf_func_id func_id)
+{
+       return is_sync_callback_calling_function(func_id) ||
+              is_async_callback_calling_function(func_id);
+}
+
+static bool is_sync_callback_calling_insn(struct bpf_insn *insn)
+{
+       return (bpf_helper_call(insn) && is_sync_callback_calling_function(insn->imm)) ||
+              (bpf_pseudo_kfunc_call(insn) && is_sync_callback_calling_kfunc(insn->imm));
+}
+
 static bool is_storage_get_function(enum bpf_func_id func_id)
 {
        return func_id == BPF_FUNC_sk_storage_get ||
@@ -1803,6 +1819,7 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state,
        dst_state->first_insn_idx = src->first_insn_idx;
        dst_state->last_insn_idx = src->last_insn_idx;
        dst_state->dfs_depth = src->dfs_depth;
+       dst_state->callback_unroll_depth = src->callback_unroll_depth;
        dst_state->used_as_loop_entry = src->used_as_loop_entry;
        for (i = 0; i <= src->curframe; i++) {
                dst = dst_state->frame[i];
@@ -3434,13 +3451,11 @@ static void mark_insn_zext(struct bpf_verifier_env *env,
        reg->subreg_def = DEF_NOT_SUBREG;
 }
 
-static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
-                        enum reg_arg_type t)
+static int __check_reg_arg(struct bpf_verifier_env *env, struct bpf_reg_state *regs, u32 regno,
+                          enum reg_arg_type t)
 {
-       struct bpf_verifier_state *vstate = env->cur_state;
-       struct bpf_func_state *state = vstate->frame[vstate->curframe];
        struct bpf_insn *insn = env->prog->insnsi + env->insn_idx;
-       struct bpf_reg_state *reg, *regs = state->regs;
+       struct bpf_reg_state *reg;
        bool rw64;
 
        if (regno >= MAX_BPF_REG) {
@@ -3481,6 +3496,15 @@ static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
        return 0;
 }
 
+static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
+                        enum reg_arg_type t)
+{
+       struct bpf_verifier_state *vstate = env->cur_state;
+       struct bpf_func_state *state = vstate->frame[vstate->curframe];
+
+       return __check_reg_arg(env, state->regs, regno, t);
+}
+
 static void mark_jmp_point(struct bpf_verifier_env *env, int idx)
 {
        env->insn_aux_data[idx].jmp_point = true;
@@ -3516,12 +3540,29 @@ static int push_jmp_history(struct bpf_verifier_env *env,
 
 /* Backtrack one insn at a time. If idx is not at the top of recorded
  * history then previous instruction came from straight line execution.
+ * Return -ENOENT if we exhausted all instructions within given state.
+ *
+ * It's legal to have a bit of a looping with the same starting and ending
+ * insn index within the same state, e.g.: 3->4->5->3, so just because current
+ * instruction index is the same as state's first_idx doesn't mean we are
+ * done. If there is still some jump history left, we should keep going. We
+ * need to take into account that we might have a jump history between given
+ * state's parent and itself, due to checkpointing. In this case, we'll have
+ * history entry recording a jump from last instruction of parent state and
+ * first instruction of given state.
  */
 static int get_prev_insn_idx(struct bpf_verifier_state *st, int i,
                             u32 *history)
 {
        u32 cnt = *history;
 
+       if (i == st->first_insn_idx) {
+               if (cnt == 0)
+                       return -ENOENT;
+               if (cnt == 1 && st->jmp_history[0].idx == i)
+                       return -ENOENT;
+       }
+
        if (cnt && st->jmp_history[cnt - 1].idx == i) {
                i = st->jmp_history[cnt - 1].prev_idx;
                (*history)--;
@@ -3702,6 +3743,8 @@ static void fmt_stack_mask(char *buf, ssize_t buf_sz, u64 stack_mask)
        }
 }
 
+static bool calls_callback(struct bpf_verifier_env *env, int insn_idx);
+
 /* For given verifier state backtrack_insn() is called from the last insn to
  * the first insn. Its purpose is to compute a bitmask of registers and
  * stack slots that needs precision in the parent verifier state.
@@ -3877,16 +3920,13 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx,
                                        return -EFAULT;
                                return 0;
                        }
-               } else if ((bpf_helper_call(insn) &&
-                           is_callback_calling_function(insn->imm) &&
-                           !is_async_callback_calling_function(insn->imm)) ||
-                          (bpf_pseudo_kfunc_call(insn) && is_callback_calling_kfunc(insn->imm))) {
-                       /* callback-calling helper or kfunc call, which means
-                        * we are exiting from subprog, but unlike the subprog
-                        * call handling above, we shouldn't propagate
-                        * precision of r1-r5 (if any requested), as they are
-                        * not actually arguments passed directly to callback
-                        * subprogs
+               } else if (is_sync_callback_calling_insn(insn) && idx != subseq_idx - 1) {
+                       /* exit from callback subprog to callback-calling helper or
+                        * kfunc call. Use idx/subseq_idx check to discern it from
+                        * straight line code backtracking.
+                        * Unlike the subprog call handling above, we shouldn't
+                        * propagate precision of r1-r5 (if any requested), as they are
+                        * not actually arguments passed directly to callback subprogs
                         */
                        if (bt_reg_mask(bt) & ~BPF_REGMASK_ARGS) {
                                verbose(env, "BUG regs %x\n", bt_reg_mask(bt));
@@ -3921,10 +3961,18 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx,
                } else if (opcode == BPF_EXIT) {
                        bool r0_precise;
 
+                       /* Backtracking to a nested function call, 'idx' is a part of
+                        * the inner frame 'subseq_idx' is a part of the outer frame.
+                        * In case of a regular function call, instructions giving
+                        * precision to registers R1-R5 should have been found already.
+                        * In case of a callback, it is ok to have R1-R5 marked for
+                        * backtracking, as these registers are set by the function
+                        * invoking callback.
+                        */
+                       if (subseq_idx >= 0 && calls_callback(env, subseq_idx))
+                               for (i = BPF_REG_1; i <= BPF_REG_5; i++)
+                                       bt_clear_reg(bt, i);
                        if (bt_reg_mask(bt) & BPF_REGMASK_ARGS) {
-                               /* if backtracing was looking for registers R1-R5
-                                * they should have been found already.
-                                */
                                verbose(env, "BUG regs %x\n", bt_reg_mask(bt));
                                WARN_ONCE(1, "verifier backtracking bug");
                                return -EFAULT;
@@ -4401,10 +4449,10 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno)
                                 * Nothing to be tracked further in the parent state.
                                 */
                                return 0;
-                       if (i == first_idx)
-                               break;
                        subseq_idx = i;
                        i = get_prev_insn_idx(st, i, &history);
+                       if (i == -ENOENT)
+                               break;
                        if (i >= env->prog->len) {
                                /* This can happen if backtracking reached insn 0
                                 * and there are still reg_mask or stack_mask
@@ -9328,7 +9376,7 @@ static void clear_caller_saved_regs(struct bpf_verifier_env *env,
        /* after the call registers r0 - r5 were scratched */
        for (i = 0; i < CALLER_SAVED_REGS; i++) {
                mark_reg_not_init(env, regs, caller_saved[i]);
-               check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
+               __check_reg_arg(env, regs, caller_saved[i], DST_OP_NO_MARK);
        }
 }
 
@@ -9341,11 +9389,10 @@ static int set_callee_state(struct bpf_verifier_env *env,
                            struct bpf_func_state *caller,
                            struct bpf_func_state *callee, int insn_idx);
 
-static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
-                            int *insn_idx, int subprog,
-                            set_callee_state_fn set_callee_state_cb)
+static int setup_func_entry(struct bpf_verifier_env *env, int subprog, int callsite,
+                           set_callee_state_fn set_callee_state_cb,
+                           struct bpf_verifier_state *state)
 {
-       struct bpf_verifier_state *state = env->cur_state;
        struct bpf_func_state *caller, *callee;
        int err;
 
@@ -9355,54 +9402,72 @@ static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn
                return -E2BIG;
        }
 
-       caller = state->frame[state->curframe];
        if (state->frame[state->curframe + 1]) {
                verbose(env, "verifier bug. Frame %d already allocated\n",
                        state->curframe + 1);
                return -EFAULT;
        }
 
+       caller = state->frame[state->curframe];
+       callee = kzalloc(sizeof(*callee), GFP_KERNEL);
+       if (!callee)
+               return -ENOMEM;
+       state->frame[state->curframe + 1] = callee;
+
+       /* callee cannot access r0, r6 - r9 for reading and has to write
+        * into its own stack before reading from it.
+        * callee can read/write into caller's stack
+        */
+       init_func_state(env, callee,
+                       /* remember the callsite, it will be used by bpf_exit */
+                       callsite,
+                       state->curframe + 1 /* frameno within this callchain */,
+                       subprog /* subprog number within this prog */);
+       /* Transfer references to the callee */
+       err = copy_reference_state(callee, caller);
+       err = err ?: set_callee_state_cb(env, caller, callee, callsite);
+       if (err)
+               goto err_out;
+
+       /* only increment it after check_reg_arg() finished */
+       state->curframe++;
+
+       return 0;
+
+err_out:
+       free_func_state(callee);
+       state->frame[state->curframe + 1] = NULL;
+       return err;
+}
+
+static int push_callback_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
+                             int insn_idx, int subprog,
+                             set_callee_state_fn set_callee_state_cb)
+{
+       struct bpf_verifier_state *state = env->cur_state, *callback_state;
+       struct bpf_func_state *caller, *callee;
+       int err;
+
+       caller = state->frame[state->curframe];
        err = btf_check_subprog_call(env, subprog, caller->regs);
        if (err == -EFAULT)
                return err;
-       if (subprog_is_global(env, subprog)) {
-               if (err) {
-                       verbose(env, "Caller passes invalid args into func#%d\n",
-                               subprog);
-                       return err;
-               } else {
-                       if (env->log.level & BPF_LOG_LEVEL)
-                               verbose(env,
-                                       "Func#%d is global and valid. Skipping.\n",
-                                       subprog);
-                       clear_caller_saved_regs(env, caller->regs);
-
-                       /* All global functions return a 64-bit SCALAR_VALUE */
-                       mark_reg_unknown(env, caller->regs, BPF_REG_0);
-                       caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
-
-                       /* continue with next insn after call */
-                       return 0;
-               }
-       }
 
        /* set_callee_state is used for direct subprog calls, but we are
         * interested in validating only BPF helpers that can call subprogs as
         * callbacks
         */
-       if (set_callee_state_cb != set_callee_state) {
-               env->subprog_info[subprog].is_cb = true;
-               if (bpf_pseudo_kfunc_call(insn) &&
-                   !is_callback_calling_kfunc(insn->imm)) {
-                       verbose(env, "verifier bug: kfunc %s#%d not marked as callback-calling\n",
-                               func_id_name(insn->imm), insn->imm);
-                       return -EFAULT;
-               } else if (!bpf_pseudo_kfunc_call(insn) &&
-                          !is_callback_calling_function(insn->imm)) { /* helper */
-                       verbose(env, "verifier bug: helper %s#%d not marked as callback-calling\n",
-                               func_id_name(insn->imm), insn->imm);
-                       return -EFAULT;
-               }
+       env->subprog_info[subprog].is_cb = true;
+       if (bpf_pseudo_kfunc_call(insn) &&
+           !is_sync_callback_calling_kfunc(insn->imm)) {
+               verbose(env, "verifier bug: kfunc %s#%d not marked as callback-calling\n",
+                       func_id_name(insn->imm), insn->imm);
+               return -EFAULT;
+       } else if (!bpf_pseudo_kfunc_call(insn) &&
+                  !is_callback_calling_function(insn->imm)) { /* helper */
+               verbose(env, "verifier bug: helper %s#%d not marked as callback-calling\n",
+                       func_id_name(insn->imm), insn->imm);
+               return -EFAULT;
        }
 
        if (insn->code == (BPF_JMP | BPF_CALL) &&
@@ -9413,53 +9478,83 @@ static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn
                /* there is no real recursion here. timer callbacks are async */
                env->subprog_info[subprog].is_async_cb = true;
                async_cb = push_async_cb(env, env->subprog_info[subprog].start,
-                                        *insn_idx, subprog);
+                                        insn_idx, subprog);
                if (!async_cb)
                        return -EFAULT;
                callee = async_cb->frame[0];
                callee->async_entry_cnt = caller->async_entry_cnt + 1;
 
                /* Convert bpf_timer_set_callback() args into timer callback args */
-               err = set_callee_state_cb(env, caller, callee, *insn_idx);
+               err = set_callee_state_cb(env, caller, callee, insn_idx);
                if (err)
                        return err;
 
+               return 0;
+       }
+
+       /* for callback functions enqueue entry to callback and
+        * proceed with next instruction within current frame.
+        */
+       callback_state = push_stack(env, env->subprog_info[subprog].start, insn_idx, false);
+       if (!callback_state)
+               return -ENOMEM;
+
+       err = setup_func_entry(env, subprog, insn_idx, set_callee_state_cb,
+                              callback_state);
+       if (err)
+               return err;
+
+       callback_state->callback_unroll_depth++;
+       callback_state->frame[callback_state->curframe - 1]->callback_depth++;
+       caller->callback_depth = 0;
+       return 0;
+}
+
+static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
+                          int *insn_idx)
+{
+       struct bpf_verifier_state *state = env->cur_state;
+       struct bpf_func_state *caller;
+       int err, subprog, target_insn;
+
+       target_insn = *insn_idx + insn->imm + 1;
+       subprog = find_subprog(env, target_insn);
+       if (subprog < 0) {
+               verbose(env, "verifier bug. No program starts at insn %d\n", target_insn);
+               return -EFAULT;
+       }
+
+       caller = state->frame[state->curframe];
+       err = btf_check_subprog_call(env, subprog, caller->regs);
+       if (err == -EFAULT)
+               return err;
+       if (subprog_is_global(env, subprog)) {
+               if (err) {
+                       verbose(env, "Caller passes invalid args into func#%d\n", subprog);
+                       return err;
+               }
+
+               if (env->log.level & BPF_LOG_LEVEL)
+                       verbose(env, "Func#%d is global and valid. Skipping.\n", subprog);
                clear_caller_saved_regs(env, caller->regs);
+
+               /* All global functions return a 64-bit SCALAR_VALUE */
                mark_reg_unknown(env, caller->regs, BPF_REG_0);
                caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
+
                /* continue with next insn after call */
                return 0;
        }
 
-       callee = kzalloc(sizeof(*callee), GFP_KERNEL);
-       if (!callee)
-               return -ENOMEM;
-       state->frame[state->curframe + 1] = callee;
-
-       /* callee cannot access r0, r6 - r9 for reading and has to write
-        * into its own stack before reading from it.
-        * callee can read/write into caller's stack
+       /* for regular function entry setup new frame and continue
+        * from that frame.
         */
-       init_func_state(env, callee,
-                       /* remember the callsite, it will be used by bpf_exit */
-                       *insn_idx /* callsite */,
-                       state->curframe + 1 /* frameno within this callchain */,
-                       subprog /* subprog number within this prog */);
-
-       /* Transfer references to the callee */
-       err = copy_reference_state(callee, caller);
-       if (err)
-               goto err_out;
-
-       err = set_callee_state_cb(env, caller, callee, *insn_idx);
+       err = setup_func_entry(env, subprog, *insn_idx, set_callee_state, state);
        if (err)
-               goto err_out;
+               return err;
 
        clear_caller_saved_regs(env, caller->regs);
 
-       /* only increment it after check_reg_arg() finished */
-       state->curframe++;
-
        /* and go analyze first insn of the callee */
        *insn_idx = env->subprog_info[subprog].start - 1;
 
@@ -9467,14 +9562,10 @@ static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn
                verbose(env, "caller:\n");
                print_verifier_state(env, caller, true);
                verbose(env, "callee:\n");
-               print_verifier_state(env, callee, true);
+               print_verifier_state(env, state->frame[state->curframe], true);
        }
-       return 0;
 
-err_out:
-       free_func_state(callee);
-       state->frame[state->curframe + 1] = NULL;
-       return err;
+       return 0;
 }
 
 int map_set_for_each_callback_args(struct bpf_verifier_env *env,
@@ -9518,22 +9609,6 @@ static int set_callee_state(struct bpf_verifier_env *env,
        return 0;
 }
 
-static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
-                          int *insn_idx)
-{
-       int subprog, target_insn;
-
-       target_insn = *insn_idx + insn->imm + 1;
-       subprog = find_subprog(env, target_insn);
-       if (subprog < 0) {
-               verbose(env, "verifier bug. No program starts at insn %d\n",
-                       target_insn);
-               return -EFAULT;
-       }
-
-       return __check_func_call(env, insn, insn_idx, subprog, set_callee_state);
-}
-
 static int set_map_elem_callback_state(struct bpf_verifier_env *env,
                                       struct bpf_func_state *caller,
                                       struct bpf_func_state *callee,
@@ -9726,9 +9801,10 @@ static bool in_rbtree_lock_required_cb(struct bpf_verifier_env *env)
 
 static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
 {
-       struct bpf_verifier_state *state = env->cur_state;
+       struct bpf_verifier_state *state = env->cur_state, *prev_st;
        struct bpf_func_state *caller, *callee;
        struct bpf_reg_state *r0;
+       bool in_callback_fn;
        int err;
 
        callee = state->frame[state->curframe];
@@ -9757,6 +9833,11 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
                        verbose_invalid_scalar(env, r0, &range, "callback return", "R0");
                        return -EINVAL;
                }
+               if (!calls_callback(env, callee->callsite)) {
+                       verbose(env, "BUG: in callback at %d, callsite %d !calls_callback\n",
+                               *insn_idx, callee->callsite);
+                       return -EFAULT;
+               }
        } else {
                /* return to the caller whatever r0 had in the callee */
                caller->regs[BPF_REG_0] = *r0;
@@ -9774,7 +9855,16 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
                        return err;
        }
 
-       *insn_idx = callee->callsite + 1;
+       /* for callbacks like bpf_loop or bpf_for_each_map_elem go back to callsite,
+        * there function call logic would reschedule callback visit. If iteration
+        * converges is_state_visited() would prune that visit eventually.
+        */
+       in_callback_fn = callee->in_callback_fn;
+       if (in_callback_fn)
+               *insn_idx = callee->callsite;
+       else
+               *insn_idx = callee->callsite + 1;
+
        if (env->log.level & BPF_LOG_LEVEL) {
                verbose(env, "returning from callee:\n");
                print_verifier_state(env, callee, true);
@@ -9785,6 +9875,24 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
         * bpf_throw, this will be done by copy_verifier_state for extra frames. */
        free_func_state(callee);
        state->frame[state->curframe--] = NULL;
+
+       /* for callbacks widen imprecise scalars to make programs like below verify:
+        *
+        *   struct ctx { int i; }
+        *   void cb(int idx, struct ctx *ctx) { ctx->i++; ... }
+        *   ...
+        *   struct ctx = { .i = 0; }
+        *   bpf_loop(100, cb, &ctx, 0);
+        *
+        * This is similar to what is done in process_iter_next_call() for open
+        * coded iterators.
+        */
+       prev_st = in_callback_fn ? find_prev_entry(env, state, *insn_idx) : NULL;
+       if (prev_st) {
+               err = widen_imprecise_scalars(env, prev_st, state);
+               if (err)
+                       return err;
+       }
        return 0;
 }
 
@@ -10187,24 +10295,37 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
                }
                break;
        case BPF_FUNC_for_each_map_elem:
-               err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
-                                       set_map_elem_callback_state);
+               err = push_callback_call(env, insn, insn_idx, meta.subprogno,
+                                        set_map_elem_callback_state);
                break;
        case BPF_FUNC_timer_set_callback:
-               err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
-                                       set_timer_callback_state);
+               err = push_callback_call(env, insn, insn_idx, meta.subprogno,
+                                        set_timer_callback_state);
                break;
        case BPF_FUNC_find_vma:
-               err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
-                                       set_find_vma_callback_state);
+               err = push_callback_call(env, insn, insn_idx, meta.subprogno,
+                                        set_find_vma_callback_state);
                break;
        case BPF_FUNC_snprintf:
                err = check_bpf_snprintf_call(env, regs);
                break;
        case BPF_FUNC_loop:
                update_loop_inline_state(env, meta.subprogno);
-               err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
-                                       set_loop_callback_state);
+               /* Verifier relies on R1 value to determine if bpf_loop() iteration
+                * is finished, thus mark it precise.
+                */
+               err = mark_chain_precision(env, BPF_REG_1);
+               if (err)
+                       return err;
+               if (cur_func(env)->callback_depth < regs[BPF_REG_1].umax_value) {
+                       err = push_callback_call(env, insn, insn_idx, meta.subprogno,
+                                                set_loop_callback_state);
+               } else {
+                       cur_func(env)->callback_depth = 0;
+                       if (env->log.level & BPF_LOG_LEVEL2)
+                               verbose(env, "frame%d bpf_loop iteration limit reached\n",
+                                       env->cur_state->curframe);
+               }
                break;
        case BPF_FUNC_dynptr_from_mem:
                if (regs[BPF_REG_1].type != PTR_TO_MAP_VALUE) {
@@ -10300,8 +10421,8 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
                break;
        }
        case BPF_FUNC_user_ringbuf_drain:
-               err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
-                                       set_user_ringbuf_callback_state);
+               err = push_callback_call(env, insn, insn_idx, meta.subprogno,
+                                        set_user_ringbuf_callback_state);
                break;
        }
 
@@ -11189,7 +11310,7 @@ static bool is_bpf_graph_api_kfunc(u32 btf_id)
               btf_id == special_kfunc_list[KF_bpf_refcount_acquire_impl];
 }
 
-static bool is_callback_calling_kfunc(u32 btf_id)
+static bool is_sync_callback_calling_kfunc(u32 btf_id)
 {
        return btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl];
 }
@@ -11941,6 +12062,21 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
                return -EACCES;
        }
 
+       /* Check the arguments */
+       err = check_kfunc_args(env, &meta, insn_idx);
+       if (err < 0)
+               return err;
+
+       if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) {
+               err = push_callback_call(env, insn, insn_idx, meta.subprogno,
+                                        set_rbtree_add_callback_state);
+               if (err) {
+                       verbose(env, "kfunc %s#%d failed callback verification\n",
+                               func_name, meta.func_id);
+                       return err;
+               }
+       }
+
        rcu_lock = is_kfunc_bpf_rcu_read_lock(&meta);
        rcu_unlock = is_kfunc_bpf_rcu_read_unlock(&meta);
 
@@ -11976,10 +12112,6 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
                return -EINVAL;
        }
 
-       /* Check the arguments */
-       err = check_kfunc_args(env, &meta, insn_idx);
-       if (err < 0)
-               return err;
        /* In case of release function, we get register number of refcounted
         * PTR_TO_BTF_ID in bpf_kfunc_arg_meta, do the release now.
         */
@@ -12013,16 +12145,6 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
                }
        }
 
-       if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) {
-               err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
-                                       set_rbtree_add_callback_state);
-               if (err) {
-                       verbose(env, "kfunc %s#%d failed callback verification\n",
-                               func_name, meta.func_id);
-                       return err;
-               }
-       }
-
        if (meta.func_id == special_kfunc_list[KF_bpf_throw]) {
                if (!bpf_jit_supports_exceptions()) {
                        verbose(env, "JIT does not support calling kfunc %s#%d\n",
@@ -12074,8 +12196,19 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
                                if (meta.func_id == special_kfunc_list[KF_bpf_obj_new_impl] && !bpf_global_ma_set)
                                        return -ENOMEM;
 
-                               if (meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl] && !bpf_global_percpu_ma_set)
-                                       return -ENOMEM;
+                               if (meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
+                                       if (!bpf_global_percpu_ma_set) {
+                                               mutex_lock(&bpf_percpu_ma_lock);
+                                               if (!bpf_global_percpu_ma_set) {
+                                                       err = bpf_mem_alloc_init(&bpf_global_percpu_ma, 0, true);
+                                                       if (!err)
+                                                               bpf_global_percpu_ma_set = true;
+                                               }
+                                               mutex_unlock(&bpf_percpu_ma_lock);
+                                               if (err)
+                                                       return err;
+                                       }
+                               }
 
                                if (((u64)(u32)meta.arg_constant.value) != meta.arg_constant.value) {
                                        verbose(env, "local type ID argument must be in range [0, U32_MAX]\n");
@@ -15375,6 +15508,15 @@ static bool is_force_checkpoint(struct bpf_verifier_env *env, int insn_idx)
        return env->insn_aux_data[insn_idx].force_checkpoint;
 }
 
+static void mark_calls_callback(struct bpf_verifier_env *env, int idx)
+{
+       env->insn_aux_data[idx].calls_callback = true;
+}
+
+static bool calls_callback(struct bpf_verifier_env *env, int insn_idx)
+{
+       return env->insn_aux_data[insn_idx].calls_callback;
+}
 
 enum {
        DONE_EXPLORING = 0,
@@ -15386,8 +15528,7 @@ enum {
  * w - next instruction
  * e - edge
  */
-static int push_insn(int t, int w, int e, struct bpf_verifier_env *env,
-                    bool loop_ok)
+static int push_insn(int t, int w, int e, struct bpf_verifier_env *env)
 {
        int *insn_stack = env->cfg.insn_stack;
        int *insn_state = env->cfg.insn_state;
@@ -15419,7 +15560,7 @@ static int push_insn(int t, int w, int e, struct bpf_verifier_env *env,
                insn_stack[env->cfg.cur_stack++] = w;
                return KEEP_EXPLORING;
        } else if ((insn_state[w] & 0xF0) == DISCOVERED) {
-               if (loop_ok && env->bpf_capable)
+               if (env->bpf_capable)
                        return DONE_EXPLORING;
                verbose_linfo(env, t, "%d: ", t);
                verbose_linfo(env, w, "%d: ", w);
@@ -15439,24 +15580,20 @@ static int visit_func_call_insn(int t, struct bpf_insn *insns,
                                struct bpf_verifier_env *env,
                                bool visit_callee)
 {
-       int ret;
+       int ret, insn_sz;
 
-       ret = push_insn(t, t + 1, FALLTHROUGH, env, false);
+       insn_sz = bpf_is_ldimm64(&insns[t]) ? 2 : 1;
+       ret = push_insn(t, t + insn_sz, FALLTHROUGH, env);
        if (ret)
                return ret;
 
-       mark_prune_point(env, t + 1);
+       mark_prune_point(env, t + insn_sz);
        /* when we exit from subprog, we need to record non-linear history */
-       mark_jmp_point(env, t + 1);
+       mark_jmp_point(env, t + insn_sz);
 
        if (visit_callee) {
                mark_prune_point(env, t);
-               ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env,
-                               /* It's ok to allow recursion from CFG point of
-                                * view. __check_func_call() will do the actual
-                                * check.
-                                */
-                               bpf_pseudo_func(insns + t));
+               ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env);
        }
        return ret;
 }
@@ -15469,15 +15606,17 @@ static int visit_func_call_insn(int t, struct bpf_insn *insns,
 static int visit_insn(int t, struct bpf_verifier_env *env)
 {
        struct bpf_insn *insns = env->prog->insnsi, *insn = &insns[t];
-       int ret, off;
+       int ret, off, insn_sz;
 
        if (bpf_pseudo_func(insn))
                return visit_func_call_insn(t, insns, env, true);
 
        /* All non-branch instructions have a single fall-through edge. */
        if (BPF_CLASS(insn->code) != BPF_JMP &&
-           BPF_CLASS(insn->code) != BPF_JMP32)
-               return push_insn(t, t + 1, FALLTHROUGH, env, false);
+           BPF_CLASS(insn->code) != BPF_JMP32) {
+               insn_sz = bpf_is_ldimm64(insn) ? 2 : 1;
+               return push_insn(t, t + insn_sz, FALLTHROUGH, env);
+       }
 
        switch (BPF_OP(insn->code)) {
        case BPF_EXIT:
@@ -15491,6 +15630,21 @@ static int visit_insn(int t, struct bpf_verifier_env *env)
                         * async state will be pushed for further exploration.
                         */
                        mark_prune_point(env, t);
+               /* For functions that invoke callbacks it is not known how many times
+                * callback would be called. Verifier models callback calling functions
+                * by repeatedly visiting callback bodies and returning to origin call
+                * instruction.
+                * In order to stop such iteration verifier needs to identify when a
+                * state identical some state from a previous iteration is reached.
+                * Check below forces creation of checkpoint before callback calling
+                * instruction to allow search for such identical states.
+                */
+               if (is_sync_callback_calling_insn(insn)) {
+                       mark_calls_callback(env, t);
+                       mark_force_checkpoint(env, t);
+                       mark_prune_point(env, t);
+                       mark_jmp_point(env, t);
+               }
                if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
                        struct bpf_kfunc_call_arg_meta meta;
 
@@ -15523,8 +15677,7 @@ static int visit_insn(int t, struct bpf_verifier_env *env)
                        off = insn->imm;
 
                /* unconditional jump with single edge */
-               ret = push_insn(t, t + off + 1, FALLTHROUGH, env,
-                               true);
+               ret = push_insn(t, t + off + 1, FALLTHROUGH, env);
                if (ret)
                        return ret;
 
@@ -15537,11 +15690,11 @@ static int visit_insn(int t, struct bpf_verifier_env *env)
                /* conditional jump with two edges */
                mark_prune_point(env, t);
 
-               ret = push_insn(t, t + 1, FALLTHROUGH, env, true);
+               ret = push_insn(t, t + 1, FALLTHROUGH, env);
                if (ret)
                        return ret;
 
-               return push_insn(t, t + insn->off + 1, BRANCH, env, true);
+               return push_insn(t, t + insn->off + 1, BRANCH, env);
        }
 }
 
@@ -15607,11 +15760,21 @@ walk_cfg:
        }
 
        for (i = 0; i < insn_cnt; i++) {
+               struct bpf_insn *insn = &env->prog->insnsi[i];
+
                if (insn_state[i] != EXPLORED) {
                        verbose(env, "unreachable insn %d\n", i);
                        ret = -EINVAL;
                        goto err_free;
                }
+               if (bpf_is_ldimm64(insn)) {
+                       if (insn_state[i + 1] != 0) {
+                               verbose(env, "jump into the middle of ldimm64 insn %d\n", i);
+                               ret = -EINVAL;
+                               goto err_free;
+                       }
+                       i++; /* skip second half of ldimm64 */
+               }
        }
        ret = 0; /* cfg looks good */
 
@@ -16951,10 +17114,16 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
                                }
                                goto skip_inf_loop_check;
                        }
+                       if (calls_callback(env, insn_idx)) {
+                               if (states_equal(env, &sl->state, cur, true))
+                                       goto hit;
+                               goto skip_inf_loop_check;
+                       }
                        /* attempt to detect infinite loop to avoid unnecessary doomed work */
                        if (states_maybe_looping(&sl->state, cur) &&
                            states_equal(env, &sl->state, cur, false) &&
-                           !iter_active_depths_differ(&sl->state, cur)) {
+                           !iter_active_depths_differ(&sl->state, cur) &&
+                           sl->state.callback_unroll_depth == cur->callback_unroll_depth) {
                                verbose_linfo(env, insn_idx, "; ");
                                verbose(env, "infinite loop detected at insn %d\n", insn_idx);
                                verbose(env, "cur state:");
index 1d5b9de..4b9ff41 100644 (file)
@@ -3885,14 +3885,6 @@ static __poll_t cgroup_pressure_poll(struct kernfs_open_file *of,
        return psi_trigger_poll(&ctx->psi.trigger, of->file, pt);
 }
 
-static int cgroup_pressure_open(struct kernfs_open_file *of)
-{
-       if (of->file->f_mode & FMODE_WRITE && !capable(CAP_SYS_RESOURCE))
-               return -EPERM;
-
-       return 0;
-}
-
 static void cgroup_pressure_release(struct kernfs_open_file *of)
 {
        struct cgroup_file_ctx *ctx = of->priv;
@@ -5299,7 +5291,6 @@ static struct cftype cgroup_psi_files[] = {
        {
                .name = "io.pressure",
                .file_offset = offsetof(struct cgroup, psi_files[PSI_IO]),
-               .open = cgroup_pressure_open,
                .seq_show = cgroup_io_pressure_show,
                .write = cgroup_io_pressure_write,
                .poll = cgroup_pressure_poll,
@@ -5308,7 +5299,6 @@ static struct cftype cgroup_psi_files[] = {
        {
                .name = "memory.pressure",
                .file_offset = offsetof(struct cgroup, psi_files[PSI_MEM]),
-               .open = cgroup_pressure_open,
                .seq_show = cgroup_memory_pressure_show,
                .write = cgroup_memory_pressure_write,
                .poll = cgroup_pressure_poll,
@@ -5317,7 +5307,6 @@ static struct cftype cgroup_psi_files[] = {
        {
                .name = "cpu.pressure",
                .file_offset = offsetof(struct cgroup, psi_files[PSI_CPU]),
-               .open = cgroup_pressure_open,
                .seq_show = cgroup_cpu_pressure_show,
                .write = cgroup_cpu_pressure_write,
                .poll = cgroup_pressure_poll,
@@ -5327,7 +5316,6 @@ static struct cftype cgroup_psi_files[] = {
        {
                .name = "irq.pressure",
                .file_offset = offsetof(struct cgroup, psi_files[PSI_IRQ]),
-               .open = cgroup_pressure_open,
                .seq_show = cgroup_irq_pressure_show,
                .write = cgroup_irq_pressure_write,
                .poll = cgroup_pressure_poll,
index 9e4c678..a86972a 100644 (file)
@@ -2113,7 +2113,7 @@ static struct cpuhp_step cpuhp_hp_states[] = {
        [CPUHP_HRTIMERS_PREPARE] = {
                .name                   = "hrtimers:prepare",
                .startup.single         = hrtimers_prepare_cpu,
-               .teardown.single        = hrtimers_dead_cpu,
+               .teardown.single        = NULL,
        },
        [CPUHP_SMPCFD_PREPARE] = {
                .name                   = "smpcfd:prepare",
@@ -2205,6 +2205,12 @@ static struct cpuhp_step cpuhp_hp_states[] = {
                .startup.single         = NULL,
                .teardown.single        = smpcfd_dying_cpu,
        },
+       [CPUHP_AP_HRTIMERS_DYING] = {
+               .name                   = "hrtimers:dying",
+               .startup.single         = NULL,
+               .teardown.single        = hrtimers_cpu_dying,
+       },
+
        /* Entry state on starting. Interrupts enabled from here on. Transient
         * state for synchronsization */
        [CPUHP_AP_ONLINE] = {
index 683dc08..b704d83 100644 (file)
@@ -4828,6 +4828,11 @@ find_get_pmu_context(struct pmu *pmu, struct perf_event_context *ctx,
        void *task_ctx_data = NULL;
 
        if (!ctx->task) {
+               /*
+                * perf_pmu_migrate_context() / __perf_pmu_install_event()
+                * relies on the fact that find_get_pmu_context() cannot fail
+                * for CPU contexts.
+                */
                struct perf_cpu_pmu_context *cpc;
 
                cpc = per_cpu_ptr(pmu->cpu_pmu_context, event->cpu);
@@ -12889,6 +12894,9 @@ static void __perf_pmu_install_event(struct pmu *pmu,
                                     int cpu, struct perf_event *event)
 {
        struct perf_event_pmu_context *epc;
+       struct perf_event_context *old_ctx = event->ctx;
+
+       get_ctx(ctx); /* normally find_get_context() */
 
        event->cpu = cpu;
        epc = find_get_pmu_context(pmu, ctx, event);
@@ -12897,6 +12905,11 @@ static void __perf_pmu_install_event(struct pmu *pmu,
        if (event->state >= PERF_EVENT_STATE_OFF)
                event->state = PERF_EVENT_STATE_INACTIVE;
        perf_install_in_context(ctx, event, cpu);
+
+       /*
+        * Now that event->ctx is updated and visible, put the old ctx.
+        */
+       put_ctx(old_ctx);
 }
 
 static void __perf_pmu_install(struct perf_event_context *ctx,
@@ -12935,6 +12948,10 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu)
        struct perf_event_context *src_ctx, *dst_ctx;
        LIST_HEAD(events);
 
+       /*
+        * Since per-cpu context is persistent, no need to grab an extra
+        * reference.
+        */
        src_ctx = &per_cpu_ptr(&perf_cpu_context, src_cpu)->ctx;
        dst_ctx = &per_cpu_ptr(&perf_cpu_context, dst_cpu)->ctx;
 
index 52695c5..dad981a 100644 (file)
@@ -700,7 +700,8 @@ retry:
        owner = uval & FUTEX_TID_MASK;
 
        if (pending_op && !pi && !owner) {
-               futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
+               futex_wake(uaddr, FLAGS_SIZE_32 | FLAGS_SHARED, 1,
+                          FUTEX_BITSET_MATCH_ANY);
                return 0;
        }
 
@@ -752,8 +753,10 @@ retry:
         * Wake robust non-PI futexes here. The wakeup of
         * PI futexes happens in exit_pi_state():
         */
-       if (!pi && (uval & FUTEX_WAITERS))
-               futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
+       if (!pi && (uval & FUTEX_WAITERS)) {
+               futex_wake(uaddr, FLAGS_SIZE_32 | FLAGS_SHARED, 1,
+                          FUTEX_BITSET_MATCH_ANY);
+       }
 
        return 0;
 }
index e85b5ad..151bd3d 100644 (file)
@@ -3497,7 +3497,8 @@ static int alloc_chain_hlocks(int req)
                size = chain_block_size(curr);
                if (likely(size >= req)) {
                        del_chain_block(0, size, chain_block_next(curr));
-                       add_chain_block(curr + req, size - req);
+                       if (size > req)
+                               add_chain_block(curr + req, size - req);
                        return curr;
                }
        }
index 2048138..d7a3c63 100644 (file)
@@ -3666,41 +3666,140 @@ static inline void
 dequeue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) { }
 #endif
 
+static void reweight_eevdf(struct cfs_rq *cfs_rq, struct sched_entity *se,
+                          unsigned long weight)
+{
+       unsigned long old_weight = se->load.weight;
+       u64 avruntime = avg_vruntime(cfs_rq);
+       s64 vlag, vslice;
+
+       /*
+        * VRUNTIME
+        * ========
+        *
+        * COROLLARY #1: The virtual runtime of the entity needs to be
+        * adjusted if re-weight at !0-lag point.
+        *
+        * Proof: For contradiction assume this is not true, so we can
+        * re-weight without changing vruntime at !0-lag point.
+        *
+        *             Weight   VRuntime   Avg-VRuntime
+        *     before    w          v            V
+        *      after    w'         v'           V'
+        *
+        * Since lag needs to be preserved through re-weight:
+        *
+        *      lag = (V - v)*w = (V'- v')*w', where v = v'
+        *      ==>     V' = (V - v)*w/w' + v           (1)
+        *
+        * Let W be the total weight of the entities before reweight,
+        * since V' is the new weighted average of entities:
+        *
+        *      V' = (WV + w'v - wv) / (W + w' - w)     (2)
+        *
+        * by using (1) & (2) we obtain:
+        *
+        *      (WV + w'v - wv) / (W + w' - w) = (V - v)*w/w' + v
+        *      ==> (WV-Wv+Wv+w'v-wv)/(W+w'-w) = (V - v)*w/w' + v
+        *      ==> (WV - Wv)/(W + w' - w) + v = (V - v)*w/w' + v
+        *      ==>     (V - v)*W/(W + w' - w) = (V - v)*w/w' (3)
+        *
+        * Since we are doing at !0-lag point which means V != v, we
+        * can simplify (3):
+        *
+        *      ==>     W / (W + w' - w) = w / w'
+        *      ==>     Ww' = Ww + ww' - ww
+        *      ==>     W * (w' - w) = w * (w' - w)
+        *      ==>     W = w   (re-weight indicates w' != w)
+        *
+        * So the cfs_rq contains only one entity, hence vruntime of
+        * the entity @v should always equal to the cfs_rq's weighted
+        * average vruntime @V, which means we will always re-weight
+        * at 0-lag point, thus breach assumption. Proof completed.
+        *
+        *
+        * COROLLARY #2: Re-weight does NOT affect weighted average
+        * vruntime of all the entities.
+        *
+        * Proof: According to corollary #1, Eq. (1) should be:
+        *
+        *      (V - v)*w = (V' - v')*w'
+        *      ==>    v' = V' - (V - v)*w/w'           (4)
+        *
+        * According to the weighted average formula, we have:
+        *
+        *      V' = (WV - wv + w'v') / (W - w + w')
+        *         = (WV - wv + w'(V' - (V - v)w/w')) / (W - w + w')
+        *         = (WV - wv + w'V' - Vw + wv) / (W - w + w')
+        *         = (WV + w'V' - Vw) / (W - w + w')
+        *
+        *      ==>  V'*(W - w + w') = WV + w'V' - Vw
+        *      ==>     V' * (W - w) = (W - w) * V      (5)
+        *
+        * If the entity is the only one in the cfs_rq, then reweight
+        * always occurs at 0-lag point, so V won't change. Or else
+        * there are other entities, hence W != w, then Eq. (5) turns
+        * into V' = V. So V won't change in either case, proof done.
+        *
+        *
+        * So according to corollary #1 & #2, the effect of re-weight
+        * on vruntime should be:
+        *
+        *      v' = V' - (V - v) * w / w'              (4)
+        *         = V  - (V - v) * w / w'
+        *         = V  - vl * w / w'
+        *         = V  - vl'
+        */
+       if (avruntime != se->vruntime) {
+               vlag = (s64)(avruntime - se->vruntime);
+               vlag = div_s64(vlag * old_weight, weight);
+               se->vruntime = avruntime - vlag;
+       }
+
+       /*
+        * DEADLINE
+        * ========
+        *
+        * When the weight changes, the virtual time slope changes and
+        * we should adjust the relative virtual deadline accordingly.
+        *
+        *      d' = v' + (d - v)*w/w'
+        *         = V' - (V - v)*w/w' + (d - v)*w/w'
+        *         = V  - (V - v)*w/w' + (d - v)*w/w'
+        *         = V  + (d - V)*w/w'
+        */
+       vslice = (s64)(se->deadline - avruntime);
+       vslice = div_s64(vslice * old_weight, weight);
+       se->deadline = avruntime + vslice;
+}
+
 static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
                            unsigned long weight)
 {
-       unsigned long old_weight = se->load.weight;
+       bool curr = cfs_rq->curr == se;
 
        if (se->on_rq) {
                /* commit outstanding execution time */
-               if (cfs_rq->curr == se)
+               if (curr)
                        update_curr(cfs_rq);
                else
-                       avg_vruntime_sub(cfs_rq, se);
+                       __dequeue_entity(cfs_rq, se);
                update_load_sub(&cfs_rq->load, se->load.weight);
        }
        dequeue_load_avg(cfs_rq, se);
 
-       update_load_set(&se->load, weight);
-
        if (!se->on_rq) {
                /*
                 * Because we keep se->vlag = V - v_i, while: lag_i = w_i*(V - v_i),
                 * we need to scale se->vlag when w_i changes.
                 */
-               se->vlag = div_s64(se->vlag * old_weight, weight);
+               se->vlag = div_s64(se->vlag * se->load.weight, weight);
        } else {
-               s64 deadline = se->deadline - se->vruntime;
-               /*
-                * When the weight changes, the virtual time slope changes and
-                * we should adjust the relative virtual deadline accordingly.
-                */
-               deadline = div_s64(deadline * old_weight, weight);
-               se->deadline = se->vruntime + deadline;
-               if (se != cfs_rq->curr)
-                       min_deadline_cb_propagate(&se->run_node, NULL);
+               reweight_eevdf(cfs_rq, se, weight);
        }
 
+       update_load_set(&se->load, weight);
+
 #ifdef CONFIG_SMP
        do {
                u32 divider = get_pelt_divider(&se->avg);
@@ -3712,8 +3811,17 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
        enqueue_load_avg(cfs_rq, se);
        if (se->on_rq) {
                update_load_add(&cfs_rq->load, se->load.weight);
-               if (cfs_rq->curr != se)
-                       avg_vruntime_add(cfs_rq, se);
+               if (!curr) {
+                       /*
+                        * The entity's vruntime has been adjusted, so let's check
+                        * whether the rq-wide min_vruntime needs updated too. Since
+                        * the calculations above require stable min_vruntime rather
+                        * than up-to-date one, we do the update at the end of the
+                        * reweight process.
+                        */
+                       __enqueue_entity(cfs_rq, se);
+                       update_min_vruntime(cfs_rq);
+               }
        }
 }
 
@@ -3857,14 +3965,11 @@ static void update_cfs_group(struct sched_entity *se)
 
 #ifndef CONFIG_SMP
        shares = READ_ONCE(gcfs_rq->tg->shares);
-
-       if (likely(se->load.weight == shares))
-               return;
 #else
-       shares   = calc_group_shares(gcfs_rq);
+       shares = calc_group_shares(gcfs_rq);
 #endif
-
-       reweight_entity(cfs_rq_of(se), se, shares);
+       if (unlikely(se->load.weight != shares))
+               reweight_entity(cfs_rq_of(se), se, shares);
 }
 
 #else /* CONFIG_FAIR_GROUP_SCHED */
@@ -11079,12 +11184,16 @@ static int should_we_balance(struct lb_env *env)
                        continue;
                }
 
-               /* Are we the first idle CPU? */
+               /*
+                * Are we the first idle core in a non-SMT domain or higher,
+                * or the first idle CPU in a SMT domain?
+                */
                return cpu == env->dst_cpu;
        }
 
-       if (idle_smt == env->dst_cpu)
-               return true;
+       /* Are we the first idle CPU with busy siblings? */
+       if (idle_smt != -1)
+               return idle_smt == env->dst_cpu;
 
        /* Are we the first CPU of this group ? */
        return group_balance_cpu(sg) == env->dst_cpu;
index 420d9cb..e219fcf 100644 (file)
@@ -2394,6 +2394,10 @@ static inline int prctl_set_mdwe(unsigned long bits, unsigned long arg3,
        if (bits & PR_MDWE_NO_INHERIT && !(bits & PR_MDWE_REFUSE_EXEC_GAIN))
                return -EINVAL;
 
+       /* PARISC cannot allow mdwe as it needs writable stacks */
+       if (IS_ENABLED(CONFIG_PARISC))
+               return -EINVAL;
+
        current_bits = get_current_mdwe();
        if (current_bits && current_bits != bits)
                return -EPERM; /* Cannot unset the flags */
index 238262e..7607939 100644 (file)
@@ -2219,29 +2219,22 @@ static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
        }
 }
 
-int hrtimers_dead_cpu(unsigned int scpu)
+int hrtimers_cpu_dying(unsigned int dying_cpu)
 {
        struct hrtimer_cpu_base *old_base, *new_base;
-       int i;
+       int i, ncpu = cpumask_first(cpu_active_mask);
 
-       BUG_ON(cpu_online(scpu));
-       tick_cancel_sched_timer(scpu);
+       tick_cancel_sched_timer(dying_cpu);
+
+       old_base = this_cpu_ptr(&hrtimer_bases);
+       new_base = &per_cpu(hrtimer_bases, ncpu);
 
-       /*
-        * this BH disable ensures that raise_softirq_irqoff() does
-        * not wakeup ksoftirqd (and acquire the pi-lock) while
-        * holding the cpu_base lock
-        */
-       local_bh_disable();
-       local_irq_disable();
-       old_base = &per_cpu(hrtimer_bases, scpu);
-       new_base = this_cpu_ptr(&hrtimer_bases);
        /*
         * The caller is globally serialized and nobody else
         * takes two locks at once, deadlock is not possible.
         */
-       raw_spin_lock(&new_base->lock);
-       raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
+       raw_spin_lock(&old_base->lock);
+       raw_spin_lock_nested(&new_base->lock, SINGLE_DEPTH_NESTING);
 
        for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
                migrate_hrtimer_list(&old_base->clock_base[i],
@@ -2252,15 +2245,13 @@ int hrtimers_dead_cpu(unsigned int scpu)
         * The migration might have changed the first expiring softirq
         * timer on this CPU. Update it.
         */
-       hrtimer_update_softirq_timer(new_base, false);
+       __hrtimer_get_next_event(new_base, HRTIMER_ACTIVE_SOFT);
+       /* Tell the other CPU to retrigger the next event */
+       smp_call_function_single(ncpu, retrigger_next_event, NULL, 0);
 
-       raw_spin_unlock(&old_base->lock);
        raw_spin_unlock(&new_base->lock);
+       raw_spin_unlock(&old_base->lock);
 
-       /* Check, if we got expired work to do */
-       __hrtimer_peek_ahead_timers();
-       local_irq_enable();
-       local_bh_enable();
        return 0;
 }
 
index dd1b998..4f9112b 100644 (file)
@@ -111,9 +111,6 @@ static const char *names_0[] = {
        E(ENOSPC),
        E(ENOSR),
        E(ENOSTR),
-#ifdef ENOSYM
-       E(ENOSYM),
-#endif
        E(ENOSYS),
        E(ENOTBLK),
        E(ENOTCONN),
@@ -144,9 +141,6 @@ static const char *names_0[] = {
 #endif
        E(EREMOTE),
        E(EREMOTEIO),
-#ifdef EREMOTERELEASE
-       E(EREMOTERELEASE),
-#endif
        E(ERESTART),
        E(ERFKILL),
        E(EROFS),
index de7d11c..8ff6824 100644 (file)
@@ -409,7 +409,7 @@ size_t copy_page_to_iter_nofault(struct page *page, unsigned offset, size_t byte
                void *kaddr = kmap_local_page(page);
                size_t n = min(bytes, (size_t)PAGE_SIZE - offset);
 
-               n = iterate_and_advance(i, bytes, kaddr,
+               n = iterate_and_advance(i, n, kaddr + offset,
                                        copy_to_user_iter_nofault,
                                        memcpy_to_iter);
                kunmap_local(kaddr);
index a0d0609..8dcb8ca 100644 (file)
@@ -312,7 +312,7 @@ size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size
 
 typedef struct {
     short ncount[FSE_MAX_SYMBOL_VALUE + 1];
-    FSE_DTable dtable[1]; /* Dynamically sized */
+    FSE_DTable dtable[]; /* Dynamically sized */
 } FSE_DecompressWksp;
 
 
index 630077d..6262d55 100644 (file)
@@ -924,7 +924,7 @@ static bool __damos_filter_out(struct damon_ctx *ctx, struct damon_target *t,
                matched = true;
                break;
        default:
-               break;
+               return false;
        }
 
        return matched == filter->matching;
index 45bd0fd..be66723 100644 (file)
@@ -162,6 +162,9 @@ damon_sysfs_scheme_regions_alloc(void)
        struct damon_sysfs_scheme_regions *regions = kmalloc(sizeof(*regions),
                        GFP_KERNEL);
 
+       if (!regions)
+               return NULL;
+
        regions->kobj = (struct kobject){};
        INIT_LIST_HEAD(&regions->regions_list);
        regions->nr_regions = 0;
@@ -1823,6 +1826,8 @@ static int damon_sysfs_before_damos_apply(struct damon_ctx *ctx,
                return 0;
 
        region = damon_sysfs_scheme_region_alloc(r);
+       if (!region)
+               return 0;
        list_add_tail(&region->list, &sysfs_regions->regions_list);
        sysfs_regions->nr_regions++;
        if (kobject_init_and_add(&region->kobj,
index e278467..7472404 100644 (file)
@@ -1172,7 +1172,7 @@ static int damon_sysfs_update_target(struct damon_target *target,
                struct damon_ctx *ctx,
                struct damon_sysfs_target *sys_target)
 {
-       int err;
+       int err = 0;
 
        if (damon_target_has_pid(ctx)) {
                err = damon_sysfs_update_target_pid(target, sys_target->pid);
@@ -1203,8 +1203,10 @@ static int damon_sysfs_set_targets(struct damon_ctx *ctx,
 
        damon_for_each_target_safe(t, next, ctx) {
                if (i < sysfs_targets->nr) {
-                       damon_sysfs_update_target(t, ctx,
+                       err = damon_sysfs_update_target(t, ctx,
                                        sysfs_targets->targets_arr[i]);
+                       if (err)
+                               return err;
                } else {
                        if (damon_target_has_pid(ctx))
                                put_pid(t->pid);
index 9710f43..32eedf3 100644 (file)
@@ -3443,7 +3443,7 @@ static vm_fault_t filemap_map_folio_range(struct vm_fault *vmf,
                 * handled in the specific fault path, and it'll prohibit the
                 * fault-around logic.
                 */
-               if (!pte_none(vmf->pte[count]))
+               if (!pte_none(ptep_get(&vmf->pte[count])))
                        goto skip;
 
                count++;
index f31f024..4f54244 100644 (file)
@@ -2769,13 +2769,15 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
                        int nr = folio_nr_pages(folio);
 
                        xas_split(&xas, folio, folio_order(folio));
-                       if (folio_test_swapbacked(folio)) {
-                               __lruvec_stat_mod_folio(folio, NR_SHMEM_THPS,
-                                                       -nr);
-                       } else {
-                               __lruvec_stat_mod_folio(folio, NR_FILE_THPS,
-                                                       -nr);
-                               filemap_nr_thps_dec(mapping);
+                       if (folio_test_pmd_mappable(folio)) {
+                               if (folio_test_swapbacked(folio)) {
+                                       __lruvec_stat_mod_folio(folio,
+                                                       NR_SHMEM_THPS, -nr);
+                               } else {
+                                       __lruvec_stat_mod_folio(folio,
+                                                       NR_FILE_THPS, -nr);
+                                       filemap_nr_thps_dec(mapping);
+                               }
                        }
                }
 
index 7efcc68..6a83100 100644 (file)
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -468,7 +468,7 @@ static int break_ksm_pmd_entry(pmd_t *pmd, unsigned long addr, unsigned long nex
                        page = pfn_swap_entry_to_page(entry);
        }
        /* return 1 if the page is an normal ksm page or KSM-placed zero page */
-       ret = (page && PageKsm(page)) || is_ksm_zero_pte(*pte);
+       ret = (page && PageKsm(page)) || is_ksm_zero_pte(ptent);
        pte_unmap_unlock(pte, ptl);
        return ret;
 }
index 774bd6e..1c1061d 100644 (file)
@@ -2936,7 +2936,8 @@ void mem_cgroup_commit_charge(struct folio *folio, struct mem_cgroup *memcg)
  * Moreover, it should not come from DMA buffer and is not readily
  * reclaimable. So those GFP bits should be masked off.
  */
-#define OBJCGS_CLEAR_MASK      (__GFP_DMA | __GFP_RECLAIMABLE | __GFP_ACCOUNT)
+#define OBJCGS_CLEAR_MASK      (__GFP_DMA | __GFP_RECLAIMABLE | \
+                                __GFP_ACCOUNT | __GFP_NOFAIL)
 
 /*
  * mod_objcg_mlstate() may be called with irq enabled, so
index 46f2f5d..ee2fd6a 100644 (file)
@@ -3107,7 +3107,7 @@ EXPORT_SYMBOL_GPL(folio_wait_writeback_killable);
  */
 void folio_wait_stable(struct folio *folio)
 {
-       if (folio_inode(folio)->i_sb->s_iflags & SB_I_STABLE_WRITES)
+       if (mapping_stable_writes(folio_mapping(folio)))
                folio_wait_writeback(folio);
 }
 EXPORT_SYMBOL_GPL(folio_wait_stable);
index 96d9eae..0b6ca55 100644 (file)
@@ -312,7 +312,7 @@ static int mfill_atomic_pte_poison(pmd_t *dst_pmd,
 
        ret = -EEXIST;
        /* Refuse to overwrite any PTE, even a PTE marker (e.g. UFFD WP). */
-       if (!pte_none(*dst_pte))
+       if (!pte_none(ptep_get(dst_pte)))
                goto out_unlock;
 
        set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte);
index aa01f6e..744b4d7 100644 (file)
--- a/mm/util.c
+++ b/mm/util.c
@@ -414,6 +414,15 @@ static int mmap_is_legacy(struct rlimit *rlim_stack)
 
 static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack)
 {
+#ifdef CONFIG_STACK_GROWSUP
+       /*
+        * For an upwards growing stack the calculation is much simpler.
+        * Memory for the maximum stack size is reserved at the top of the
+        * task. mmap_base starts directly below the stack and grows
+        * downwards.
+        */
+       return PAGE_ALIGN_DOWN(mmap_upper_limit(rlim_stack) - rnd);
+#else
        unsigned long gap = rlim_stack->rlim_cur;
        unsigned long pad = stack_guard_gap;
 
@@ -431,6 +440,7 @@ static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack)
                gap = MAX_GAP;
 
        return PAGE_ALIGN(STACK_TOP - gap - rnd);
+#endif
 }
 
 void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
index b5c406a..abb090f 100644 (file)
@@ -37,7 +37,7 @@ static int nf_br_ip_fragment(struct net *net, struct sock *sk,
        ktime_t tstamp = skb->tstamp;
        struct ip_frag_state state;
        struct iphdr *iph;
-       int err;
+       int err = 0;
 
        /* for offloaded checksums cleanup checksum before fragmentation */
        if (skb->ip_summed == CHECKSUM_PARTIAL &&
index 0d54843..c879246 100644 (file)
@@ -1119,7 +1119,9 @@ static int __dev_alloc_name(struct net *net, const char *name, char *res)
        if (i == max_netdevices)
                return -ENFILE;
 
-       snprintf(res, IFNAMSIZ, name, i);
+       /* 'res' and 'name' could overlap, use 'buf' as an intermediate buffer */
+       strscpy(buf, name, IFNAMSIZ);
+       snprintf(res, IFNAMSIZ, buf, i);
        return i;
 }
 
@@ -10049,6 +10051,54 @@ void netif_tx_stop_all_queues(struct net_device *dev)
 }
 EXPORT_SYMBOL(netif_tx_stop_all_queues);
 
+static int netdev_do_alloc_pcpu_stats(struct net_device *dev)
+{
+       void __percpu *v;
+
+       /* Drivers implementing ndo_get_peer_dev must support tstat
+        * accounting, so that skb_do_redirect() can bump the dev's
+        * RX stats upon network namespace switch.
+        */
+       if (dev->netdev_ops->ndo_get_peer_dev &&
+           dev->pcpu_stat_type != NETDEV_PCPU_STAT_TSTATS)
+               return -EOPNOTSUPP;
+
+       switch (dev->pcpu_stat_type) {
+       case NETDEV_PCPU_STAT_NONE:
+               return 0;
+       case NETDEV_PCPU_STAT_LSTATS:
+               v = dev->lstats = netdev_alloc_pcpu_stats(struct pcpu_lstats);
+               break;
+       case NETDEV_PCPU_STAT_TSTATS:
+               v = dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
+               break;
+       case NETDEV_PCPU_STAT_DSTATS:
+               v = dev->dstats = netdev_alloc_pcpu_stats(struct pcpu_dstats);
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       return v ? 0 : -ENOMEM;
+}
+
+static void netdev_do_free_pcpu_stats(struct net_device *dev)
+{
+       switch (dev->pcpu_stat_type) {
+       case NETDEV_PCPU_STAT_NONE:
+               return;
+       case NETDEV_PCPU_STAT_LSTATS:
+               free_percpu(dev->lstats);
+               break;
+       case NETDEV_PCPU_STAT_TSTATS:
+               free_percpu(dev->tstats);
+               break;
+       case NETDEV_PCPU_STAT_DSTATS:
+               free_percpu(dev->dstats);
+               break;
+       }
+}
+
 /**
  * register_netdevice() - register a network device
  * @dev: device to register
@@ -10109,9 +10159,13 @@ int register_netdevice(struct net_device *dev)
                goto err_uninit;
        }
 
+       ret = netdev_do_alloc_pcpu_stats(dev);
+       if (ret)
+               goto err_uninit;
+
        ret = dev_index_reserve(net, dev->ifindex);
        if (ret < 0)
-               goto err_uninit;
+               goto err_free_pcpu;
        dev->ifindex = ret;
 
        /* Transfer changeable features to wanted_features and enable
@@ -10217,6 +10271,8 @@ err_uninit_notify:
        call_netdevice_notifiers(NETDEV_PRE_UNINIT, dev);
 err_ifindex_release:
        dev_index_release(net, dev->ifindex);
+err_free_pcpu:
+       netdev_do_free_pcpu_stats(dev);
 err_uninit:
        if (dev->netdev_ops->ndo_uninit)
                dev->netdev_ops->ndo_uninit(dev);
@@ -10469,6 +10525,7 @@ void netdev_run_todo(void)
                WARN_ON(rcu_access_pointer(dev->ip_ptr));
                WARN_ON(rcu_access_pointer(dev->ip6_ptr));
 
+               netdev_do_free_pcpu_stats(dev);
                if (dev->priv_destructor)
                        dev->priv_destructor(dev);
                if (dev->needs_free_netdev)
index 383f96b..7e4d7c3 100644 (file)
@@ -81,6 +81,7 @@
 #include <net/xdp.h>
 #include <net/mptcp.h>
 #include <net/netfilter/nf_conntrack_bpf.h>
+#include <net/netkit.h>
 #include <linux/un.h>
 
 #include "dev.h"
@@ -2468,6 +2469,16 @@ static const struct bpf_func_proto bpf_clone_redirect_proto = {
 DEFINE_PER_CPU(struct bpf_redirect_info, bpf_redirect_info);
 EXPORT_PER_CPU_SYMBOL_GPL(bpf_redirect_info);
 
+static struct net_device *skb_get_peer_dev(struct net_device *dev)
+{
+       const struct net_device_ops *ops = dev->netdev_ops;
+
+       if (likely(ops->ndo_get_peer_dev))
+               return INDIRECT_CALL_1(ops->ndo_get_peer_dev,
+                                      netkit_peer_dev, dev);
+       return NULL;
+}
+
 int skb_do_redirect(struct sk_buff *skb)
 {
        struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
@@ -2481,17 +2492,15 @@ int skb_do_redirect(struct sk_buff *skb)
        if (unlikely(!dev))
                goto out_drop;
        if (flags & BPF_F_PEER) {
-               const struct net_device_ops *ops = dev->netdev_ops;
-
-               if (unlikely(!ops->ndo_get_peer_dev ||
-                            !skb_at_tc_ingress(skb)))
+               if (unlikely(!skb_at_tc_ingress(skb)))
                        goto out_drop;
-               dev = ops->ndo_get_peer_dev(dev);
+               dev = skb_get_peer_dev(dev);
                if (unlikely(!dev ||
                             !(dev->flags & IFF_UP) ||
                             net_eq(net, dev_net(dev))))
                        goto out_drop;
                skb->dev = dev;
+               dev_sw_netstats_rx_add(dev, skb->len);
                return -EAGAIN;
        }
        return flags & BPF_F_NEIGH ?
index ceb684b..4c2e77b 100644 (file)
@@ -180,18 +180,17 @@ static void gso_test_func(struct kunit *test)
        }
 
        if (tcase->frag_skbs) {
-               unsigned int total_size = 0, total_true_size = 0, alloc_size = 0;
+               unsigned int total_size = 0, total_true_size = 0;
                struct sk_buff *frag_skb, *prev = NULL;
 
-               page = alloc_page(GFP_KERNEL);
-               KUNIT_ASSERT_NOT_NULL(test, page);
-               page_ref_add(page, tcase->nr_frag_skbs - 1);
-
                for (i = 0; i < tcase->nr_frag_skbs; i++) {
                        unsigned int frag_size;
 
+                       page = alloc_page(GFP_KERNEL);
+                       KUNIT_ASSERT_NOT_NULL(test, page);
+
                        frag_size = tcase->frag_skbs[i];
-                       frag_skb = build_skb(page_address(page) + alloc_size,
+                       frag_skb = build_skb(page_address(page),
                                             frag_size + shinfo_size);
                        KUNIT_ASSERT_NOT_NULL(test, frag_skb);
                        __skb_put(frag_skb, frag_size);
@@ -204,11 +203,8 @@ static void gso_test_func(struct kunit *test)
 
                        total_size += frag_size;
                        total_true_size += frag_skb->truesize;
-                       alloc_size += frag_size + shinfo_size;
                }
 
-               KUNIT_ASSERT_LE(test, alloc_size, PAGE_SIZE);
-
                skb->len += total_size;
                skb->data_len += total_size;
                skb->truesize += total_true_size;
index f01aee8..7d0e7aa 100644 (file)
@@ -1481,5 +1481,6 @@ static void __exit inet_diag_exit(void)
 module_init(inet_diag_init);
 module_exit(inet_diag_exit);
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("INET/INET6: socket monitoring via SOCK_DIAG");
 MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2 /* AF_INET */);
 MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 10 /* AF_INET6 */);
index 598c1b1..a532f74 100644 (file)
@@ -751,12 +751,12 @@ int __inet_hash(struct sock *sk, struct sock *osk)
                if (err)
                        goto unlock;
        }
+       sock_set_flag(sk, SOCK_RCU_FREE);
        if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport &&
                sk->sk_family == AF_INET6)
                __sk_nulls_add_node_tail_rcu(sk, &ilb2->nulls_head);
        else
                __sk_nulls_add_node_rcu(sk, &ilb2->nulls_head);
-       sock_set_flag(sk, SOCK_RCU_FREE);
        sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
 unlock:
        spin_unlock(&ilb2->lock);
index 63a40e4..fe2140c 100644 (file)
@@ -257,5 +257,6 @@ static void __exit raw_diag_exit(void)
 module_init(raw_diag_init);
 module_exit(raw_diag_exit);
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("RAW socket monitoring via SOCK_DIAG");
 MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-255 /* AF_INET - IPPROTO_RAW */);
 MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 10-255 /* AF_INET6 - IPPROTO_RAW */);
index 3290a44..16615d1 100644 (file)
@@ -780,7 +780,7 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow
                        goto reject_redirect;
        }
 
-       n = __ipv4_neigh_lookup(rt->dst.dev, new_gw);
+       n = __ipv4_neigh_lookup(rt->dst.dev, (__force u32)new_gw);
        if (!n)
                n = neigh_create(&arp_tbl, &new_gw, rt->dst.dev);
        if (!IS_ERR(n)) {
index 01b50fa..4cbe4b4 100644 (file)
@@ -247,4 +247,5 @@ static void __exit tcp_diag_exit(void)
 module_init(tcp_diag_init);
 module_exit(tcp_diag_exit);
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("TCP socket monitoring via SOCK_DIAG");
 MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-6 /* AF_INET - IPPROTO_TCP */);
index de3f2d3..dc41a22 100644 (file)
@@ -296,5 +296,6 @@ static void __exit udp_diag_exit(void)
 module_init(udp_diag_init);
 module_exit(udp_diag_exit);
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("UDP socket monitoring via SOCK_DIAG");
 MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-17 /* AF_INET - IPPROTO_UDP */);
 MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-136 /* AF_INET - IPPROTO_UDPLITE */);
index 8df1bdb..5409c2e 100644 (file)
@@ -245,4 +245,5 @@ static void __exit mptcp_diag_exit(void)
 module_init(mptcp_diag_init);
 module_exit(mptcp_diag_exit);
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("MPTCP socket monitoring via SOCK_DIAG");
 MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-262 /* AF_INET - IPPROTO_MPTCP */);
index 1529ec3..bf4d96f 100644 (file)
@@ -1515,8 +1515,9 @@ void mptcp_pm_remove_addrs(struct mptcp_sock *msk, struct list_head *rm_list)
        struct mptcp_pm_addr_entry *entry;
 
        list_for_each_entry(entry, rm_list, list) {
-               remove_anno_list_by_saddr(msk, &entry->addr);
-               if (alist.nr < MPTCP_RM_IDS_MAX)
+               if ((remove_anno_list_by_saddr(msk, &entry->addr) ||
+                    lookup_subflow_by_saddr(&msk->conn_list, &entry->addr)) &&
+                   alist.nr < MPTCP_RM_IDS_MAX)
                        alist.ids[alist.nr++] = entry->addr.id;
        }
 
index a0b8356..bc81ea5 100644 (file)
@@ -1230,6 +1230,8 @@ static void mptcp_update_infinite_map(struct mptcp_sock *msk,
        mptcp_do_fallback(ssk);
 }
 
+#define MPTCP_MAX_GSO_SIZE (GSO_LEGACY_MAX_SIZE - (MAX_TCP_HEADER + 1))
+
 static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
                              struct mptcp_data_frag *dfrag,
                              struct mptcp_sendmsg_info *info)
@@ -1256,6 +1258,8 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
                return -EAGAIN;
 
        /* compute send limit */
+       if (unlikely(ssk->sk_gso_max_size > MPTCP_MAX_GSO_SIZE))
+               ssk->sk_gso_max_size = MPTCP_MAX_GSO_SIZE;
        info->mss_now = tcp_send_mss(ssk, &info->size_goal, info->flags);
        copy = info->size_goal;
 
@@ -3398,10 +3402,11 @@ static void mptcp_release_cb(struct sock *sk)
        if (__test_and_clear_bit(MPTCP_CLEAN_UNA, &msk->cb_flags))
                __mptcp_clean_una_wakeup(sk);
        if (unlikely(msk->cb_flags)) {
-               /* be sure to set the current sk state before tacking actions
-                * depending on sk_state, that is processing MPTCP_ERROR_REPORT
+               /* be sure to set the current sk state before taking actions
+                * depending on sk_state (MPTCP_ERROR_REPORT)
+                * On sk release avoid actions depending on the first subflow
                 */
-               if (__test_and_clear_bit(MPTCP_CONNECTED, &msk->cb_flags))
+               if (__test_and_clear_bit(MPTCP_CONNECTED, &msk->cb_flags) && msk->first)
                        __mptcp_set_connected(sk);
                if (__test_and_clear_bit(MPTCP_ERROR_REPORT, &msk->cb_flags))
                        __mptcp_error_report(sk);
index 77f5e89..3536807 100644 (file)
@@ -738,8 +738,11 @@ static int mptcp_setsockopt_v4_set_tos(struct mptcp_sock *msk, int optname,
        val = READ_ONCE(inet_sk(sk)->tos);
        mptcp_for_each_subflow(msk, subflow) {
                struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+               bool slow;
 
+               slow = lock_sock_fast(ssk);
                __ip_sock_set_tos(ssk, val);
+               unlock_sock_fast(ssk, slow);
        }
        release_sock(sk);
 
index f8854bf..62fb103 100644 (file)
@@ -89,11 +89,6 @@ static int ncsi_aen_handler_lsc(struct ncsi_dev_priv *ndp,
        if ((had_link == has_link) || chained)
                return 0;
 
-       if (had_link)
-               netif_carrier_off(ndp->ndev.dev);
-       else
-               netif_carrier_on(ndp->ndev.dev);
-
        if (!ndp->multi_package && !nc->package->multi_channel) {
                if (had_link) {
                        ndp->flags |= NCSI_DEV_RESHUFFLE;
index 35d2f9c..4c133e0 100644 (file)
@@ -61,6 +61,8 @@ MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET);
        ip_set_dereference((inst)->ip_set_list)[id]
 #define ip_set_ref_netlink(inst,id)    \
        rcu_dereference_raw((inst)->ip_set_list)[id]
+#define ip_set_dereference_nfnl(p)     \
+       rcu_dereference_check(p, lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET))
 
 /* The set types are implemented in modules and registered set types
  * can be found in ip_set_type_list. Adding/deleting types is
@@ -708,15 +710,10 @@ __ip_set_put_netlink(struct ip_set *set)
 static struct ip_set *
 ip_set_rcu_get(struct net *net, ip_set_id_t index)
 {
-       struct ip_set *set;
        struct ip_set_net *inst = ip_set_pernet(net);
 
-       rcu_read_lock();
-       /* ip_set_list itself needs to be protected */
-       set = rcu_dereference(inst->ip_set_list)[index];
-       rcu_read_unlock();
-
-       return set;
+       /* ip_set_list and the set pointer need to be protected */
+       return ip_set_dereference_nfnl(inst->ip_set_list)[index];
 }
 
 static inline void
@@ -1397,6 +1394,9 @@ static int ip_set_swap(struct sk_buff *skb, const struct nfnl_info *info,
        ip_set(inst, to_id) = from;
        write_unlock_bh(&ip_set_ref_lock);
 
+       /* Make sure all readers of the old set pointers are completed. */
+       synchronize_rcu();
+
        return 0;
 }
 
index a761ee6..c0a4298 100644 (file)
@@ -7263,10 +7263,11 @@ static int nf_tables_delsetelem(struct sk_buff *skb,
 
                if (err < 0) {
                        NL_SET_BAD_ATTR(extack, attr);
-                       break;
+                       return err;
                }
        }
-       return err;
+
+       return 0;
 }
 
 /*
@@ -9679,16 +9680,14 @@ void nft_trans_gc_queue_sync_done(struct nft_trans_gc *trans)
        call_rcu(&trans->rcu, nft_trans_gc_trans_free);
 }
 
-static struct nft_trans_gc *nft_trans_gc_catchall(struct nft_trans_gc *gc,
-                                                 unsigned int gc_seq,
-                                                 bool sync)
+struct nft_trans_gc *nft_trans_gc_catchall_async(struct nft_trans_gc *gc,
+                                                unsigned int gc_seq)
 {
-       struct nft_set_elem_catchall *catchall, *next;
+       struct nft_set_elem_catchall *catchall;
        const struct nft_set *set = gc->set;
-       struct nft_elem_priv *elem_priv;
        struct nft_set_ext *ext;
 
-       list_for_each_entry_safe(catchall, next, &set->catchall_list, list) {
+       list_for_each_entry_rcu(catchall, &set->catchall_list, list) {
                ext = nft_set_elem_ext(set, catchall->elem);
 
                if (!nft_set_elem_expired(ext))
@@ -9698,35 +9697,42 @@ static struct nft_trans_gc *nft_trans_gc_catchall(struct nft_trans_gc *gc,
 
                nft_set_elem_dead(ext);
 dead_elem:
-               if (sync)
-                       gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC);
-               else
-                       gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
-
+               gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
                if (!gc)
                        return NULL;
 
-               elem_priv = catchall->elem;
-               if (sync) {
-                       nft_setelem_data_deactivate(gc->net, gc->set, elem_priv);
-                       nft_setelem_catchall_destroy(catchall);
-               }
-
-               nft_trans_gc_elem_add(gc, elem_priv);
+               nft_trans_gc_elem_add(gc, catchall->elem);
        }
 
        return gc;
 }
 
-struct nft_trans_gc *nft_trans_gc_catchall_async(struct nft_trans_gc *gc,
-                                                unsigned int gc_seq)
-{
-       return nft_trans_gc_catchall(gc, gc_seq, false);
-}
-
 struct nft_trans_gc *nft_trans_gc_catchall_sync(struct nft_trans_gc *gc)
 {
-       return nft_trans_gc_catchall(gc, 0, true);
+       struct nft_set_elem_catchall *catchall, *next;
+       const struct nft_set *set = gc->set;
+       struct nft_elem_priv *elem_priv;
+       struct nft_set_ext *ext;
+
+       WARN_ON_ONCE(!lockdep_commit_lock_is_held(gc->net));
+
+       list_for_each_entry_safe(catchall, next, &set->catchall_list, list) {
+               ext = nft_set_elem_ext(set, catchall->elem);
+
+               if (!nft_set_elem_expired(ext))
+                       continue;
+
+               gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL);
+               if (!gc)
+                       return NULL;
+
+               elem_priv = catchall->elem;
+               nft_setelem_data_deactivate(gc->net, gc->set, elem_priv);
+               nft_setelem_catchall_destroy(catchall);
+               nft_trans_gc_elem_add(gc, elem_priv);
+       }
+
+       return gc;
 }
 
 static void nf_tables_module_autoload_cleanup(struct net *net)
index e596d1a..f6e791a 100644 (file)
@@ -38,13 +38,14 @@ void nft_byteorder_eval(const struct nft_expr *expr,
 
        switch (priv->size) {
        case 8: {
+               u64 *dst64 = (void *)dst;
                u64 src64;
 
                switch (priv->op) {
                case NFT_BYTEORDER_NTOH:
                        for (i = 0; i < priv->len / 8; i++) {
                                src64 = nft_reg_load64(&src[i]);
-                               nft_reg_store64(&dst[i],
+                               nft_reg_store64(&dst64[i],
                                                be64_to_cpu((__force __be64)src64));
                        }
                        break;
@@ -52,7 +53,7 @@ void nft_byteorder_eval(const struct nft_expr *expr,
                        for (i = 0; i < priv->len / 8; i++) {
                                src64 = (__force __u64)
                                        cpu_to_be64(nft_reg_load64(&src[i]));
-                               nft_reg_store64(&dst[i], src64);
+                               nft_reg_store64(&dst64[i], src64);
                        }
                        break;
                }
index f7da7c4..ba0d368 100644 (file)
@@ -63,7 +63,7 @@ nft_meta_get_eval_time(enum nft_meta_keys key,
 {
        switch (key) {
        case NFT_META_TIME_NS:
-               nft_reg_store64(dest, ktime_get_real_ns());
+               nft_reg_store64((u64 *)dest, ktime_get_real_ns());
                break;
        case NFT_META_TIME_DAY:
                nft_reg_store8(dest, nft_meta_weekday());
index 6f1186a..baa3fea 100644 (file)
@@ -624,14 +624,12 @@ static void nft_rbtree_gc(struct nft_set *set)
 {
        struct nft_rbtree *priv = nft_set_priv(set);
        struct nft_rbtree_elem *rbe, *rbe_end = NULL;
-       struct nftables_pernet *nft_net;
        struct rb_node *node, *next;
        struct nft_trans_gc *gc;
        struct net *net;
 
        set  = nft_set_container_of(priv);
        net  = read_pnet(&set->net);
-       nft_net = nft_pernet(net);
 
        gc = nft_trans_gc_alloc(set, 0, GFP_KERNEL);
        if (!gc)
index f6b200c..9a7980e 100644 (file)
@@ -262,4 +262,5 @@ static void __exit packet_diag_exit(void)
 module_init(packet_diag_init);
 module_exit(packet_diag_exit);
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("PACKET socket monitoring via SOCK_DIAG");
 MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 17 /* AF_PACKET */);
index 981ca5b..1d95f8b 100644 (file)
@@ -73,6 +73,7 @@ static void rxrpc_destroy_client_conn_ids(struct rxrpc_local *local)
 static struct rxrpc_bundle *rxrpc_alloc_bundle(struct rxrpc_call *call,
                                               gfp_t gfp)
 {
+       static atomic_t rxrpc_bundle_id;
        struct rxrpc_bundle *bundle;
 
        bundle = kzalloc(sizeof(*bundle), gfp);
@@ -85,6 +86,7 @@ static struct rxrpc_bundle *rxrpc_alloc_bundle(struct rxrpc_call *call,
                bundle->upgrade         = test_bit(RXRPC_CALL_UPGRADE, &call->flags);
                bundle->service_id      = call->dest_srx.srx_service;
                bundle->security_level  = call->security_level;
+               bundle->debug_id        = atomic_inc_return(&rxrpc_bundle_id);
                refcount_set(&bundle->ref, 1);
                atomic_set(&bundle->active, 1);
                INIT_LIST_HEAD(&bundle->waiting_calls);
@@ -105,7 +107,8 @@ struct rxrpc_bundle *rxrpc_get_bundle(struct rxrpc_bundle *bundle,
 
 static void rxrpc_free_bundle(struct rxrpc_bundle *bundle)
 {
-       trace_rxrpc_bundle(bundle->debug_id, 1, rxrpc_bundle_free);
+       trace_rxrpc_bundle(bundle->debug_id, refcount_read(&bundle->ref),
+                          rxrpc_bundle_free);
        rxrpc_put_peer(bundle->peer, rxrpc_peer_put_bundle);
        key_put(bundle->key);
        kfree(bundle);
@@ -239,7 +242,6 @@ dont_reuse:
  */
 int rxrpc_look_up_bundle(struct rxrpc_call *call, gfp_t gfp)
 {
-       static atomic_t rxrpc_bundle_id;
        struct rxrpc_bundle *bundle, *candidate;
        struct rxrpc_local *local = call->local;
        struct rb_node *p, **pp, *parent;
@@ -306,7 +308,6 @@ int rxrpc_look_up_bundle(struct rxrpc_call *call, gfp_t gfp)
        }
 
        _debug("new bundle");
-       candidate->debug_id = atomic_inc_return(&rxrpc_bundle_id);
        rb_link_node(&candidate->local_node, parent, pp);
        rb_insert_color(&candidate->local_node, &local->client_bundles);
        call->bundle = rxrpc_get_bundle(candidate, rxrpc_bundle_get_client_call);
index 030d64f..92495e7 100644 (file)
@@ -643,12 +643,8 @@ static void rxrpc_complete_rtt_probe(struct rxrpc_call *call,
                        clear_bit(i + RXRPC_CALL_RTT_PEND_SHIFT, &call->rtt_avail);
                        smp_mb(); /* Read data before setting avail bit */
                        set_bit(i, &call->rtt_avail);
-                       if (type != rxrpc_rtt_rx_cancel)
-                               rxrpc_peer_add_rtt(call, type, i, acked_serial, ack_serial,
-                                                  sent_at, resp_time);
-                       else
-                               trace_rxrpc_rtt_rx(call, rxrpc_rtt_rx_cancel, i,
-                                                  orig_serial, acked_serial, 0, 0);
+                       rxrpc_peer_add_rtt(call, type, i, acked_serial, ack_serial,
+                                          sent_at, resp_time);
                        matched = true;
                }
 
@@ -801,28 +797,21 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
                           summary.ack_reason, nr_acks);
        rxrpc_inc_stat(call->rxnet, stat_rx_acks[ack.reason]);
 
-       switch (ack.reason) {
-       case RXRPC_ACK_PING_RESPONSE:
-               rxrpc_complete_rtt_probe(call, skb->tstamp, acked_serial, ack_serial,
-                                        rxrpc_rtt_rx_ping_response);
-               break;
-       case RXRPC_ACK_REQUESTED:
-               rxrpc_complete_rtt_probe(call, skb->tstamp, acked_serial, ack_serial,
-                                        rxrpc_rtt_rx_requested_ack);
-               break;
-       default:
-               if (acked_serial != 0)
+       if (acked_serial != 0) {
+               switch (ack.reason) {
+               case RXRPC_ACK_PING_RESPONSE:
                        rxrpc_complete_rtt_probe(call, skb->tstamp, acked_serial, ack_serial,
-                                                rxrpc_rtt_rx_cancel);
-               break;
-       }
-
-       if (ack.reason == RXRPC_ACK_PING) {
-               rxrpc_send_ACK(call, RXRPC_ACK_PING_RESPONSE, ack_serial,
-                              rxrpc_propose_ack_respond_to_ping);
-       } else if (sp->hdr.flags & RXRPC_REQUEST_ACK) {
-               rxrpc_send_ACK(call, RXRPC_ACK_REQUESTED, ack_serial,
-                              rxrpc_propose_ack_respond_to_ack);
+                                                rxrpc_rtt_rx_ping_response);
+                       break;
+               case RXRPC_ACK_REQUESTED:
+                       rxrpc_complete_rtt_probe(call, skb->tstamp, acked_serial, ack_serial,
+                                                rxrpc_rtt_rx_requested_ack);
+                       break;
+               default:
+                       rxrpc_complete_rtt_probe(call, skb->tstamp, acked_serial, ack_serial,
+                                                rxrpc_rtt_rx_other_ack);
+                       break;
+               }
        }
 
        /* If we get an EXCEEDS_WINDOW ACK from the server, it probably
@@ -835,7 +824,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
            rxrpc_is_client_call(call)) {
                rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED,
                                          0, -ENETRESET);
-               return;
+               goto send_response;
        }
 
        /* If we get an OUT_OF_SEQUENCE ACK from the server, that can also
@@ -849,7 +838,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
            rxrpc_is_client_call(call)) {
                rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED,
                                          0, -ENETRESET);
-               return;
+               goto send_response;
        }
 
        /* Discard any out-of-order or duplicate ACKs (outside lock). */
@@ -857,7 +846,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
                trace_rxrpc_rx_discard_ack(call->debug_id, ack_serial,
                                           first_soft_ack, call->acks_first_seq,
                                           prev_pkt, call->acks_prev_seq);
-               return;
+               goto send_response;
        }
 
        info.rxMTU = 0;
@@ -897,7 +886,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
        case RXRPC_CALL_SERVER_AWAIT_ACK:
                break;
        default:
-               return;
+               goto send_response;
        }
 
        if (before(hard_ack, call->acks_hard_ack) ||
@@ -909,7 +898,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
        if (after(hard_ack, call->acks_hard_ack)) {
                if (rxrpc_rotate_tx_window(call, hard_ack, &summary)) {
                        rxrpc_end_tx_phase(call, false, rxrpc_eproto_unexpected_ack);
-                       return;
+                       goto send_response;
                }
        }
 
@@ -927,6 +916,14 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
                                   rxrpc_propose_ack_ping_for_lost_reply);
 
        rxrpc_congestion_management(call, skb, &summary, acked_serial);
+
+send_response:
+       if (ack.reason == RXRPC_ACK_PING)
+               rxrpc_send_ACK(call, RXRPC_ACK_PING_RESPONSE, ack_serial,
+                              rxrpc_propose_ack_respond_to_ping);
+       else if (sp->hdr.flags & RXRPC_REQUEST_ACK)
+               rxrpc_send_ACK(call, RXRPC_ACK_REQUESTED, ack_serial,
+                              rxrpc_propose_ack_respond_to_ack);
 }
 
 /*
index 0db0ecf..b3f4a50 100644 (file)
@@ -1549,6 +1549,9 @@ static int tcf_ct_offload_act_setup(struct tc_action *act, void *entry_data,
        if (bind) {
                struct flow_action_entry *entry = entry_data;
 
+               if (tcf_ct_helper(act))
+                       return -EOPNOTSUPP;
+
                entry->id = FLOW_ACTION_CT;
                entry->ct.action = tcf_ct_action(act);
                entry->ct.zone = tcf_ct_zone(act);
index c3d6b92..eb05131 100644 (file)
@@ -527,4 +527,5 @@ static void __exit sctp_diag_exit(void)
 module_init(sctp_diag_init);
 module_exit(sctp_diag_exit);
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("SCTP socket monitoring via SOCK_DIAG");
 MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-132);
index da97f94..2a13888 100644 (file)
@@ -598,8 +598,12 @@ static int smcr_clnt_conf_first_link(struct smc_sock *smc)
        struct smc_llc_qentry *qentry;
        int rc;
 
-       /* receive CONFIRM LINK request from server over RoCE fabric */
-       qentry = smc_llc_wait(link->lgr, NULL, SMC_LLC_WAIT_TIME,
+       /* Receive CONFIRM LINK request from server over RoCE fabric.
+        * Increasing the client's timeout by twice as much as the server's
+        * timeout by default can temporarily avoid decline messages of
+        * both sides crossing or colliding
+        */
+       qentry = smc_llc_wait(link->lgr, NULL, 2 * SMC_LLC_WAIT_TIME,
                              SMC_LLC_CONFIRM_LINK);
        if (!qentry) {
                struct smc_clc_msg_decline dclc;
index 7ff2152..a584613 100644 (file)
@@ -268,5 +268,6 @@ static void __exit smc_diag_exit(void)
 module_init(smc_diag_init);
 module_exit(smc_diag_exit);
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("SMC socket monitoring via SOCK_DIAG");
 MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 43 /* AF_SMC */);
 MODULE_ALIAS_GENL_FAMILY(SMCR_GENL_FAMILY_NAME);
index 73137f4..1873345 100644 (file)
@@ -113,4 +113,5 @@ module_init(tipc_diag_init);
 module_exit(tipc_diag_exit);
 
 MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("TIPC socket monitoring via SOCK_DIAG");
 MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, AF_TIPC);
index 5bc076f..c763008 100644 (file)
@@ -102,6 +102,7 @@ static int tipc_add_tlv(struct sk_buff *skb, u16 type, void *data, u16 len)
                return -EMSGSIZE;
 
        skb_put(skb, TLV_SPACE(len));
+       memset(tlv, 0, TLV_SPACE(len));
        tlv->tlv_type = htons(type);
        tlv->tlv_len = htons(TLV_LENGTH(len));
        if (len && data)
index a78e8e7..316f761 100644 (file)
@@ -1232,11 +1232,14 @@ void tls_sw_splice_eof(struct socket *sock)
        lock_sock(sk);
 
 retry:
+       /* same checks as in tls_sw_push_pending_record() */
        rec = ctx->open_rec;
        if (!rec)
                goto unlock;
 
        msg_pl = &rec->msg_plaintext;
+       if (msg_pl->sg.size == 0)
+               goto unlock;
 
        /* Check the BPF advisor and perform transmission. */
        ret = bpf_exec_tx_verdict(msg_pl, sk, false, TLS_RECORD_TYPE_DATA,
index 45506a9..a357dc5 100644 (file)
@@ -2581,15 +2581,16 @@ static int unix_stream_recv_urg(struct unix_stream_read_state *state)
 
        if (!(state->flags & MSG_PEEK))
                WRITE_ONCE(u->oob_skb, NULL);
-
+       else
+               skb_get(oob_skb);
        unix_state_unlock(sk);
 
        chunk = state->recv_actor(oob_skb, 0, chunk, state);
 
-       if (!(state->flags & MSG_PEEK)) {
+       if (!(state->flags & MSG_PEEK))
                UNIXCB(oob_skb).consumed += 1;
-               kfree_skb(oob_skb);
-       }
+
+       consume_skb(oob_skb);
 
        mutex_unlock(&u->iolock);
 
index 616b55c..bec09a3 100644 (file)
@@ -339,4 +339,5 @@ static void __exit unix_diag_exit(void)
 module_init(unix_diag_init);
 module_exit(unix_diag_exit);
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("UNIX socket monitoring via SOCK_DIAG");
 MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 1 /* AF_LOCAL */);
index a2823b1..2e29994 100644 (file)
@@ -174,5 +174,6 @@ static void __exit vsock_diag_exit(void)
 module_init(vsock_diag_init);
 module_exit(vsock_diag_exit);
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("VMware Virtual Sockets monitoring via SOCK_DIAG");
 MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG,
                               40 /* AF_VSOCK */);
index 22b36c8..9f89553 100644 (file)
@@ -211,4 +211,5 @@ static void __exit xsk_diag_exit(void)
 module_init(xsk_diag_init);
 module_exit(xsk_diag_exit);
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("XDP socket monitoring via SOCK_DIAG");
 MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, AF_XDP);
index 68d0134..1a965fe 100644 (file)
@@ -487,14 +487,14 @@ UIMAGE_OPTS-y ?=
 UIMAGE_TYPE ?= kernel
 UIMAGE_LOADADDR ?= arch_must_set_this
 UIMAGE_ENTRYADDR ?= $(UIMAGE_LOADADDR)
-UIMAGE_NAME ?= 'Linux-$(KERNELRELEASE)'
+UIMAGE_NAME ?= Linux-$(KERNELRELEASE)
 
 quiet_cmd_uimage = UIMAGE  $@
       cmd_uimage = $(BASH) $(MKIMAGE) -A $(UIMAGE_ARCH) -O linux \
                        -C $(UIMAGE_COMPRESSION) $(UIMAGE_OPTS-y) \
                        -T $(UIMAGE_TYPE) \
                        -a $(UIMAGE_LOADADDR) -e $(UIMAGE_ENTRYADDR) \
-                       -n $(UIMAGE_NAME) -d $< $@
+                       -n '$(UIMAGE_NAME)' -d $< $@
 
 # XZ
 # ---------------------------------------------------------------------------
index 84f5fb7..d83ba5d 100755 (executable)
@@ -97,8 +97,7 @@ my (@stack, $re, $dre, $sub, $x, $xs, $funcre, $min_stack);
                #   11160:       a7 fb ff 60             aghi   %r15,-160
                # or
                #  100092:       e3 f0 ff c8 ff 71       lay     %r15,-56(%r15)
-               $re = qr/.*(?:lay|ag?hi).*\%r15,-(([0-9]{2}|[3-9])[0-9]{2})
-                     (?:\(\%r15\))?$/ox;
+               $re = qr/.*(?:lay|ag?hi).*\%r15,-([0-9]+)(?:\(\%r15\))?$/o;
        } elsif ($arch eq 'sparc' || $arch eq 'sparc64') {
                # f0019d10:       9d e3 bf 90     save  %sp, -112, %sp
                $re = qr/.*save.*%sp, -(([0-9]{2}|[3-9])[0-9]{2}), %sp/o;
index 39e86be..ff0b192 100644 (file)
@@ -17,7 +17,7 @@
  *     if (argc <= 1)
  *             printf("%s: no command arguments :(\n", *argv);
  *     else
- *             printf("%s: %d command arguments!\n", *argv, args - 1);
+ *             printf("%s: %d command arguments!\n", *argv, argc - 1);
  * }
  *
  * after:
@@ -47,7 +47,7 @@
  *             // perturb_local_entropy()
  *     } else {
  *             local_entropy ^= 3896280633962944730;
- *             printf("%s: %d command arguments!\n", *argv, args - 1);
+ *             printf("%s: %d command arguments!\n", *argv, argc - 1);
  *     }
  *
  *     // latent_entropy_execute() 4.
index 366395c..910bd21 100644 (file)
@@ -278,8 +278,6 @@ static bool is_flexible_array(const_tree field)
 {
        const_tree fieldtype;
        const_tree typesize;
-       const_tree elemtype;
-       const_tree elemsize;
 
        fieldtype = TREE_TYPE(field);
        typesize = TYPE_SIZE(fieldtype);
@@ -287,20 +285,12 @@ static bool is_flexible_array(const_tree field)
        if (TREE_CODE(fieldtype) != ARRAY_TYPE)
                return false;
 
-       elemtype = TREE_TYPE(fieldtype);
-       elemsize = TYPE_SIZE(elemtype);
-
        /* size of type is represented in bits */
 
        if (typesize == NULL_TREE && TYPE_DOMAIN(fieldtype) != NULL_TREE &&
            TYPE_MAX_VALUE(TYPE_DOMAIN(fieldtype)) == NULL_TREE)
                return true;
 
-       if (typesize != NULL_TREE &&
-           (TREE_CONSTANT(typesize) && (!tree_to_uhwi(typesize) ||
-            tree_to_uhwi(typesize) == tree_to_uhwi(elemsize))))
-               return true;
-
        return false;
 }
 
index 0572330..a76925b 100644 (file)
@@ -122,9 +122,9 @@ static long long sym_get_range_val(struct symbol *sym, int base)
 static void sym_validate_range(struct symbol *sym)
 {
        struct property *prop;
+       struct symbol *range_sym;
        int base;
        long long val, val2;
-       char str[64];
 
        switch (sym->type) {
        case S_INT:
@@ -140,17 +140,15 @@ static void sym_validate_range(struct symbol *sym)
        if (!prop)
                return;
        val = strtoll(sym->curr.val, NULL, base);
-       val2 = sym_get_range_val(prop->expr->left.sym, base);
+       range_sym = prop->expr->left.sym;
+       val2 = sym_get_range_val(range_sym, base);
        if (val >= val2) {
-               val2 = sym_get_range_val(prop->expr->right.sym, base);
+               range_sym = prop->expr->right.sym;
+               val2 = sym_get_range_val(range_sym, base);
                if (val <= val2)
                        return;
        }
-       if (sym->type == S_INT)
-               sprintf(str, "%lld", val2);
-       else
-               sprintf(str, "0x%llx", val2);
-       sym->curr.val = xstrdup(str);
+       sym->curr.val = range_sym->curr.val;
 }
 
 static void sym_set_changed(struct symbol *sym)
index 973b5e5..cb6406f 100644 (file)
@@ -1383,13 +1383,15 @@ static void section_rela(struct module *mod, struct elf_info *elf,
        const Elf_Rela *rela;
 
        for (rela = start; rela < stop; rela++) {
+               Elf_Sym *tsym;
                Elf_Addr taddr, r_offset;
                unsigned int r_type, r_sym;
 
                r_offset = TO_NATIVE(rela->r_offset);
                get_rel_type_and_sym(elf, rela->r_info, &r_type, &r_sym);
 
-               taddr = TO_NATIVE(rela->r_addend);
+               tsym = elf->symtab_start + r_sym;
+               taddr = tsym->st_value + TO_NATIVE(rela->r_addend);
 
                switch (elf->hdr->e_machine) {
                case EM_RISCV:
@@ -1404,7 +1406,7 @@ static void section_rela(struct module *mod, struct elf_info *elf,
                        break;
                }
 
-               check_section_mismatch(mod, elf, elf->symtab_start + r_sym,
+               check_section_mismatch(mod, elf, tsym,
                                       fsecndx, fromsec, r_offset, taddr);
        }
 }
index 757a4d1..a9ef6d8 100644 (file)
@@ -21,6 +21,10 @@ static int cs35l56_hda_i2c_probe(struct i2c_client *clt)
                return -ENOMEM;
 
        cs35l56->base.dev = &clt->dev;
+
+#ifdef CS35L56_WAKE_HOLD_TIME_US
+       cs35l56->base.can_hibernate = true;
+#endif
        cs35l56->base.regmap = devm_regmap_init_i2c(clt, &cs35l56_regmap_i2c);
        if (IS_ERR(cs35l56->base.regmap)) {
                ret = PTR_ERR(cs35l56->base.regmap);
index 0326491..db90feb 100644 (file)
@@ -2135,6 +2135,9 @@ static int azx_probe(struct pci_dev *pci,
        if (chip->driver_caps & AZX_DCAPS_I915_COMPONENT) {
                err = snd_hdac_i915_init(azx_bus(chip));
                if (err < 0) {
+                       if (err == -EPROBE_DEFER)
+                               goto out_free;
+
                        /* if the controller is bound only with HDMI/DP
                         * (for HSW and BDW), we need to abort the probe;
                         * for other chips, still continue probing as other
index 669ae3d..5618b1d 100644 (file)
@@ -9832,6 +9832,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x103c, 0x8898, "HP EliteBook 845 G8 Notebook PC", ALC285_FIXUP_HP_LIMIT_INT_MIC_BOOST),
        SND_PCI_QUIRK(0x103c, 0x88d0, "HP Pavilion 15-eh1xxx (mainboard 88D0)", ALC287_FIXUP_HP_GPIO_LED),
        SND_PCI_QUIRK(0x103c, 0x8902, "HP OMEN 16", ALC285_FIXUP_HP_MUTE_LED),
+       SND_PCI_QUIRK(0x103c, 0x890e, "HP 255 G8 Notebook PC", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2),
        SND_PCI_QUIRK(0x103c, 0x8919, "HP Pavilion Aero Laptop 13-be0xxx", ALC287_FIXUP_HP_GPIO_LED),
        SND_PCI_QUIRK(0x103c, 0x896d, "HP ZBook Firefly 16 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
        SND_PCI_QUIRK(0x103c, 0x896e, "HP EliteBook x360 830 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
@@ -9867,6 +9868,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x103c, 0x8abb, "HP ZBook Firefly 14 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
        SND_PCI_QUIRK(0x103c, 0x8ad1, "HP EliteBook 840 14 inch G9 Notebook PC", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
        SND_PCI_QUIRK(0x103c, 0x8ad2, "HP EliteBook 860 16 inch G9 Notebook PC", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
+       SND_PCI_QUIRK(0x103c, 0x8b2f, "HP 255 15.6 inch G10 Notebook PC", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2),
        SND_PCI_QUIRK(0x103c, 0x8b42, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
        SND_PCI_QUIRK(0x103c, 0x8b43, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
        SND_PCI_QUIRK(0x103c, 0x8b44, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
@@ -9900,12 +9902,16 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x103c, 0x8c70, "HP EliteBook 835 G11", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED),
        SND_PCI_QUIRK(0x103c, 0x8c71, "HP EliteBook 845 G11", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED),
        SND_PCI_QUIRK(0x103c, 0x8c72, "HP EliteBook 865 G11", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED),
+       SND_PCI_QUIRK(0x103c, 0x8ca4, "HP ZBook Fury", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
+       SND_PCI_QUIRK(0x103c, 0x8ca7, "HP ZBook Fury", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
+       SND_PCI_QUIRK(0x103c, 0x8cf5, "HP ZBook Studio 16", ALC245_FIXUP_CS35L41_SPI_4_HP_GPIO_LED),
        SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC),
        SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300),
        SND_PCI_QUIRK(0x1043, 0x106d, "Asus K53BE", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
        SND_PCI_QUIRK(0x1043, 0x10a1, "ASUS UX391UA", ALC294_FIXUP_ASUS_SPK),
        SND_PCI_QUIRK(0x1043, 0x10c0, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC),
        SND_PCI_QUIRK(0x1043, 0x10d0, "ASUS X540LA/X540LJ", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE),
+       SND_PCI_QUIRK(0x1043, 0x10d3, "ASUS K6500ZC", ALC294_FIXUP_ASUS_SPK),
        SND_PCI_QUIRK(0x1043, 0x115d, "Asus 1015E", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
        SND_PCI_QUIRK(0x1043, 0x11c0, "ASUS X556UR", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1043, 0x125e, "ASUS Q524UQK", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE),
@@ -9944,13 +9950,17 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x1043, 0x19e1, "ASUS UX581LV", ALC295_FIXUP_ASUS_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1043, 0x1a13, "Asus G73Jw", ALC269_FIXUP_ASUS_G73JW),
        SND_PCI_QUIRK(0x1043, 0x1a30, "ASUS X705UD", ALC256_FIXUP_ASUS_MIC),
+       SND_PCI_QUIRK(0x1043, 0x1a63, "ASUS UX3405MA", ALC245_FIXUP_CS35L41_SPI_2),
        SND_PCI_QUIRK(0x1043, 0x1a83, "ASUS UM5302LA", ALC294_FIXUP_CS35L41_I2C_2),
        SND_PCI_QUIRK(0x1043, 0x1a8f, "ASUS UX582ZS", ALC245_FIXUP_CS35L41_SPI_2),
        SND_PCI_QUIRK(0x1043, 0x1b11, "ASUS UX431DA", ALC294_FIXUP_ASUS_COEF_1B),
        SND_PCI_QUIRK(0x1043, 0x1b13, "Asus U41SV", ALC269_FIXUP_INV_DMIC),
        SND_PCI_QUIRK(0x1043, 0x1b93, "ASUS G614JVR/JIR", ALC245_FIXUP_CS35L41_SPI_2),
        SND_PCI_QUIRK(0x1043, 0x1bbd, "ASUS Z550MA", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE),
+       SND_PCI_QUIRK(0x1043, 0x1c03, "ASUS UM3406HA", ALC287_FIXUP_CS35L41_I2C_2),
        SND_PCI_QUIRK(0x1043, 0x1c23, "Asus X55U", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
+       SND_PCI_QUIRK(0x1043, 0x1c33, "ASUS UX5304MA", ALC245_FIXUP_CS35L41_SPI_2),
+       SND_PCI_QUIRK(0x1043, 0x1c43, "ASUS UX8406MA", ALC245_FIXUP_CS35L41_SPI_2),
        SND_PCI_QUIRK(0x1043, 0x1c62, "ASUS GU603", ALC289_FIXUP_ASUS_GA401),
        SND_PCI_QUIRK(0x1043, 0x1c92, "ASUS ROG Strix G15", ALC285_FIXUP_ASUS_G533Z_PINS),
        SND_PCI_QUIRK(0x1043, 0x1c9f, "ASUS G614JI", ALC285_FIXUP_ASUS_HEADSET_MIC),
@@ -10821,22 +10831,6 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
                {0x12, 0x90a60130},
                {0x17, 0x90170110},
                {0x21, 0x03211020}),
-       SND_HDA_PIN_QUIRK(0x10ec0295, 0x1028, "Dell", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE,
-               {0x14, 0x90170110},
-               {0x21, 0x04211020}),
-       SND_HDA_PIN_QUIRK(0x10ec0295, 0x1028, "Dell", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE,
-               {0x14, 0x90170110},
-               {0x21, 0x04211030}),
-       SND_HDA_PIN_QUIRK(0x10ec0295, 0x1028, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE,
-               ALC295_STANDARD_PINS,
-               {0x17, 0x21014020},
-               {0x18, 0x21a19030}),
-       SND_HDA_PIN_QUIRK(0x10ec0295, 0x1028, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE,
-               ALC295_STANDARD_PINS,
-               {0x17, 0x21014040},
-               {0x18, 0x21a19050}),
-       SND_HDA_PIN_QUIRK(0x10ec0295, 0x1028, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE,
-               ALC295_STANDARD_PINS),
        SND_HDA_PIN_QUIRK(0x10ec0298, 0x1028, "Dell", ALC298_FIXUP_DELL1_MIC_NO_PRESENCE,
                ALC298_STANDARD_PINS,
                {0x17, 0x90170110}),
@@ -10880,6 +10874,9 @@ static const struct snd_hda_pin_quirk alc269_fallback_pin_fixup_tbl[] = {
        SND_HDA_PIN_QUIRK(0x10ec0289, 0x1028, "Dell", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE,
                {0x19, 0x40000000},
                {0x1b, 0x40000000}),
+       SND_HDA_PIN_QUIRK(0x10ec0295, 0x1028, "Dell", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE,
+               {0x19, 0x40000000},
+               {0x1b, 0x40000000}),
        SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
                {0x19, 0x40000000},
                {0x1a, 0x40000000}),
index 87245c5..8d94739 100644 (file)
@@ -75,7 +75,6 @@
 
 /* We now return you to your regularly scheduled HPUX. */
 
-#define ENOSYM         215     /* symbol does not exist in executable */
 #define        ENOTSOCK        216     /* Socket operation on non-socket */
 #define        EDESTADDRREQ    217     /* Destination address required */
 #define        EMSGSIZE        218     /* Message too long */
 #define        ETIMEDOUT       238     /* Connection timed out */
 #define        ECONNREFUSED    239     /* Connection refused */
 #define        EREFUSED        ECONNREFUSED    /* for HP's NFS apparently */
-#define        EREMOTERELEASE  240     /* Remote peer released connection */
 #define        EHOSTDOWN       241     /* Host is down */
 #define        EHOSTUNREACH    242     /* No route to host */
 
index 264eeb9..318e2da 100644 (file)
@@ -1421,7 +1421,7 @@ static int kvp_set_ip_info(char *if_name, struct hv_kvp_ipaddr_value *new_val)
        if (error)
                goto setval_error;
 
-       if (new_val->addr_family == ADDR_FAMILY_IPV6) {
+       if (new_val->addr_family & ADDR_FAMILY_IPV6) {
                error = fprintf(nmfile, "\n[ipv6]\n");
                if (error < 0)
                        goto setval_error;
@@ -1455,14 +1455,18 @@ static int kvp_set_ip_info(char *if_name, struct hv_kvp_ipaddr_value *new_val)
        if (error < 0)
                goto setval_error;
 
-       error = fprintf(nmfile, "gateway=%s\n", (char *)new_val->gate_way);
-       if (error < 0)
-               goto setval_error;
-
-       error = fprintf(nmfile, "dns=%s\n", (char *)new_val->dns_addr);
-       if (error < 0)
-               goto setval_error;
+       /* we do not want ipv4 addresses in ipv6 section and vice versa */
+       if (is_ipv6 != is_ipv4((char *)new_val->gate_way)) {
+               error = fprintf(nmfile, "gateway=%s\n", (char *)new_val->gate_way);
+               if (error < 0)
+                       goto setval_error;
+       }
 
+       if (is_ipv6 != is_ipv4((char *)new_val->dns_addr)) {
+               error = fprintf(nmfile, "dns=%s\n", (char *)new_val->dns_addr);
+               if (error < 0)
+                       goto setval_error;
+       }
        fclose(nmfile);
        fclose(ifcfg_file);
 
index ae5a7a8..440a91b 100755 (executable)
@@ -53,7 +53,7 @@
 #                       or "manual" if no boot-time protocol should be used)
 #
 # address1=ipaddr1/plen
-# address=ipaddr2/plen
+# address2=ipaddr2/plen
 #
 # gateway=gateway1;gateway2
 #
@@ -61,7 +61,7 @@
 #
 # [ipv6]
 # address1=ipaddr1/plen
-# address2=ipaddr1/plen
+# address2=ipaddr2/plen
 #
 # gateway=gateway1;gateway2
 #
index 64d1394..3110f84 100644 (file)
@@ -18,4 +18,4 @@ CFLAGS_devlink:=$(call get_hdr_inc,_LINUX_DEVLINK_H_,devlink.h)
 CFLAGS_ethtool:=$(call get_hdr_inc,_LINUX_ETHTOOL_NETLINK_H_,ethtool_netlink.h)
 CFLAGS_handshake:=$(call get_hdr_inc,_LINUX_HANDSHAKE_H,handshake.h)
 CFLAGS_netdev:=$(call get_hdr_inc,_LINUX_NETDEV_H,netdev.h)
-CFLAGS_nfsd:=$(call get_hdr_inc,_LINUX_NFSD_H,nfsd.h)
+CFLAGS_nfsd:=$(call get_hdr_inc,_LINUX_NFSD_NETLINK_H,nfsd_netlink.h)
index bc5065b..c12ca87 100644 (file)
@@ -15,7 +15,7 @@
 /* Enums */
 static const char * const devlink_op_strmap[] = {
        [3] = "get",
-       [7] = "port-get",
+       // skip "port-get", duplicate reply value
        [DEVLINK_CMD_PORT_NEW] = "port-new",
        [13] = "sb-get",
        [17] = "sb-pool-get",
index c4003a8..3bd6b92 100755 (executable)
@@ -1505,6 +1505,12 @@ def put_op_name(family, cw):
     cw.block_start(line=f"static const char * const {map_name}[] =")
     for op_name, op in family.msgs.items():
         if op.rsp_value:
+            # Make sure we don't add duplicated entries, if multiple commands
+            # produce the same response in legacy families.
+            if family.rsp_by_value[op.rsp_value] != op:
+                cw.p(f'// skip "{op_name}", duplicate reply value')
+                continue
+
             if op.req_value == op.rsp_value:
                 cw.p(f'[{op.enum_name}] = "{op_name}",')
             else:
index 4a356a7..40ad221 100755 (executable)
@@ -4151,7 +4151,7 @@ def parseKernelLog(data):
                        elif(re.match('Enabling non-boot CPUs .*', msg)):
                                # start of first cpu resume
                                cpu_start = ktime
-                       elif(re.match('smpboot: CPU (?P<cpu>[0-9]*) is now offline', msg)) \
+                       elif(re.match('smpboot: CPU (?P<cpu>[0-9]*) is now offline', msg) \
                                or re.match('psci: CPU(?P<cpu>[0-9]*) killed.*', msg)):
                                # end of a cpu suspend, start of the next
                                m = re.match('smpboot: CPU (?P<cpu>[0-9]*) is now offline', msg)
index 9a10512..7a33437 100644 (file)
@@ -211,9 +211,6 @@ int *fd_instr_count_percpu;
 struct timeval interval_tv = { 5, 0 };
 struct timespec interval_ts = { 5, 0 };
 
-/* Save original CPU model */
-unsigned int model_orig;
-
 unsigned int num_iterations;
 unsigned int header_iterations;
 unsigned int debug;
@@ -224,24 +221,16 @@ unsigned int rapl_joules;
 unsigned int summary_only;
 unsigned int list_header_only;
 unsigned int dump_only;
-unsigned int do_snb_cstates;
-unsigned int do_knl_cstates;
-unsigned int do_slm_cstates;
-unsigned int use_c1_residency_msr;
 unsigned int has_aperf;
 unsigned int has_epb;
 unsigned int has_turbo;
 unsigned int is_hybrid;
-unsigned int do_irtl_snb;
-unsigned int do_irtl_hsw;
 unsigned int units = 1000000;  /* MHz etc */
 unsigned int genuine_intel;
 unsigned int authentic_amd;
 unsigned int hygon_genuine;
 unsigned int max_level, max_extended_level;
 unsigned int has_invariant_tsc;
-unsigned int do_nhm_platform_info;
-unsigned int no_MSR_MISC_PWR_MGMT;
 unsigned int aperf_mperf_multiplier = 1;
 double bclk;
 double base_hz;
@@ -250,7 +239,6 @@ double tsc_tweak = 1.0;
 unsigned int show_pkg_only;
 unsigned int show_core_only;
 char *output_buffer, *outp;
-unsigned int do_rapl;
 unsigned int do_dts;
 unsigned int do_ptm;
 unsigned int do_ipc;
@@ -261,65 +249,686 @@ unsigned int gfx_cur_mhz;
 unsigned int gfx_act_mhz;
 unsigned int tj_max;
 unsigned int tj_max_override;
-int tcc_offset_bits;
 double rapl_power_units, rapl_time_units;
 double rapl_dram_energy_units, rapl_energy_units;
 double rapl_joule_counter_range;
-unsigned int do_core_perf_limit_reasons;
-unsigned int has_automatic_cstate_conversion;
-unsigned int dis_cstate_prewake;
-unsigned int do_gfx_perf_limit_reasons;
-unsigned int do_ring_perf_limit_reasons;
 unsigned int crystal_hz;
 unsigned long long tsc_hz;
 int base_cpu;
-double discover_bclk(unsigned int family, unsigned int model);
 unsigned int has_hwp;          /* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */
                        /* IA32_HWP_REQUEST, IA32_HWP_STATUS */
 unsigned int has_hwp_notify;   /* IA32_HWP_INTERRUPT */
 unsigned int has_hwp_activity_window;  /* IA32_HWP_REQUEST[bits 41:32] */
 unsigned int has_hwp_epp;      /* IA32_HWP_REQUEST[bits 31:24] */
 unsigned int has_hwp_pkg;      /* IA32_HWP_REQUEST_PKG */
-unsigned int has_misc_feature_control;
 unsigned int first_counter_read = 1;
 int ignore_stdin;
 
-#define RAPL_PKG               (1 << 0)
-                                       /* 0x610 MSR_PKG_POWER_LIMIT */
-                                       /* 0x611 MSR_PKG_ENERGY_STATUS */
-#define RAPL_PKG_PERF_STATUS   (1 << 1)
-                                       /* 0x613 MSR_PKG_PERF_STATUS */
-#define RAPL_PKG_POWER_INFO    (1 << 2)
-                                       /* 0x614 MSR_PKG_POWER_INFO */
-
-#define RAPL_DRAM              (1 << 3)
-                                       /* 0x618 MSR_DRAM_POWER_LIMIT */
-                                       /* 0x619 MSR_DRAM_ENERGY_STATUS */
-#define RAPL_DRAM_PERF_STATUS  (1 << 4)
-                                       /* 0x61b MSR_DRAM_PERF_STATUS */
-#define RAPL_DRAM_POWER_INFO   (1 << 5)
-                                       /* 0x61c MSR_DRAM_POWER_INFO */
-
-#define RAPL_CORES_POWER_LIMIT (1 << 6)
-                                       /* 0x638 MSR_PP0_POWER_LIMIT */
-#define RAPL_CORE_POLICY       (1 << 7)
-                                       /* 0x63a MSR_PP0_POLICY */
-
-#define RAPL_GFX               (1 << 8)
-                                       /* 0x640 MSR_PP1_POWER_LIMIT */
-                                       /* 0x641 MSR_PP1_ENERGY_STATUS */
-                                       /* 0x642 MSR_PP1_POLICY */
-
-#define RAPL_CORES_ENERGY_STATUS       (1 << 9)
-                                       /* 0x639 MSR_PP0_ENERGY_STATUS */
-#define RAPL_PER_CORE_ENERGY   (1 << 10)
-                                       /* Indicates cores energy collection is per-core,
-                                        * not per-package. */
-#define RAPL_AMD_F17H          (1 << 11)
-                                       /* 0xc0010299 MSR_RAPL_PWR_UNIT */
-                                       /* 0xc001029a MSR_CORE_ENERGY_STAT */
-                                       /* 0xc001029b MSR_PKG_ENERGY_STAT */
-#define RAPL_CORES (RAPL_CORES_ENERGY_STATUS | RAPL_CORES_POWER_LIMIT)
+int get_msr(int cpu, off_t offset, unsigned long long *msr);
+
+/* Model specific support Start */
+
+/* List of features that may diverge among different platforms */
+struct platform_features {
+       bool has_msr_misc_feature_control;      /* MSR_MISC_FEATURE_CONTROL */
+       bool has_msr_misc_pwr_mgmt;     /* MSR_MISC_PWR_MGMT */
+       bool has_nhm_msrs;      /* MSR_PLATFORM_INFO, MSR_IA32_TEMPERATURE_TARGET, MSR_SMI_COUNT, MSR_PKG_CST_CONFIG_CONTROL, MSR_IA32_POWER_CTL, TRL MSRs */
+       bool has_config_tdp;    /* MSR_CONFIG_TDP_NOMINAL/LEVEL_1/LEVEL_2/CONTROL, MSR_TURBO_ACTIVATION_RATIO */
+       int bclk_freq;          /* CPU base clock */
+       int crystal_freq;       /* Crystal clock to use when not available from CPUID.15 */
+       int supported_cstates;  /* Core cstates and Package cstates supported */
+       int cst_limit;          /* MSR_PKG_CST_CONFIG_CONTROL */
+       bool has_cst_auto_convension;   /* AUTOMATIC_CSTATE_CONVERSION bit in MSR_PKG_CST_CONFIG_CONTROL */
+       bool has_irtl_msrs;     /* MSR_PKGC3/PKGC6/PKGC7/PKGC8/PKGC9/PKGC10_IRTL */
+       bool has_msr_core_c1_res;       /* MSR_CORE_C1_RES */
+       bool has_msr_module_c6_res_ms;  /* MSR_MODULE_C6_RES_MS */
+       bool has_msr_c6_demotion_policy_config; /* MSR_CC6_DEMOTION_POLICY_CONFIG/MSR_MC6_DEMOTION_POLICY_CONFIG */
+       bool has_msr_atom_pkg_c6_residency;     /* MSR_ATOM_PKG_C6_RESIDENCY */
+       bool has_msr_knl_core_c6_residency;     /* MSR_KNL_CORE_C6_RESIDENCY */
+       bool has_ext_cst_msrs;  /* MSR_PKG_WEIGHTED_CORE_C0_RES/MSR_PKG_ANY_CORE_C0_RES/MSR_PKG_ANY_GFXE_C0_RES/MSR_PKG_BOTH_CORE_GFXE_C0_RES */
+       bool has_cst_prewake_bit;       /* Cstate prewake bit in MSR_IA32_POWER_CTL */
+       int trl_msrs;           /* MSR_TURBO_RATIO_LIMIT/LIMIT1/LIMIT2/SECONDARY, Atom TRL MSRs */
+       int plr_msrs;           /* MSR_CORE/GFX/RING_PERF_LIMIT_REASONS */
+       int rapl_msrs;          /* RAPL PKG/DRAM/CORE/GFX MSRs, AMD RAPL MSRs */
+       bool has_per_core_rapl; /* Indicates cores energy collection is per-core, not per-package. AMD specific for now */
+       bool has_rapl_divisor;  /* Divisor for Energy unit raw value from MSR_RAPL_POWER_UNIT */
+       bool has_fixed_rapl_unit;       /* Fixed Energy Unit used for DRAM RAPL Domain */
+       int rapl_quirk_tdp;     /* Hardcoded TDP value when cannot be retrieved from hardware */
+       int tcc_offset_bits;    /* TCC Offset bits in MSR_IA32_TEMPERATURE_TARGET */
+       bool enable_tsc_tweak;  /* Use CPU Base freq instead of TSC freq for aperf/mperf counter */
+       bool need_perf_multiplier;      /* mperf/aperf multiplier */
+};
+
+struct platform_data {
+       unsigned int model;
+       const struct platform_features *features;
+};
+
+/* For BCLK */
+enum bclk_freq {
+       BCLK_100MHZ = 1,
+       BCLK_133MHZ,
+       BCLK_SLV,
+};
+
+#define SLM_BCLK_FREQS 5
+double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0 };
+
+double slm_bclk(void)
+{
+       unsigned long long msr = 3;
+       unsigned int i;
+       double freq;
+
+       if (get_msr(base_cpu, MSR_FSB_FREQ, &msr))
+               fprintf(outf, "SLM BCLK: unknown\n");
+
+       i = msr & 0xf;
+       if (i >= SLM_BCLK_FREQS) {
+               fprintf(outf, "SLM BCLK[%d] invalid\n", i);
+               i = 3;
+       }
+       freq = slm_freq_table[i];
+
+       if (!quiet)
+               fprintf(outf, "SLM BCLK: %.1f Mhz\n", freq);
+
+       return freq;
+}
+
+/* For Package cstate limit */
+enum package_cstate_limit {
+       CST_LIMIT_NHM = 1,
+       CST_LIMIT_SNB,
+       CST_LIMIT_HSW,
+       CST_LIMIT_SKX,
+       CST_LIMIT_ICX,
+       CST_LIMIT_SLV,
+       CST_LIMIT_AMT,
+       CST_LIMIT_KNL,
+       CST_LIMIT_GMT,
+};
+
+/* For Turbo Ratio Limit MSRs */
+enum turbo_ratio_limit_msrs {
+       TRL_BASE = BIT(0),
+       TRL_LIMIT1 = BIT(1),
+       TRL_LIMIT2 = BIT(2),
+       TRL_ATOM = BIT(3),
+       TRL_KNL = BIT(4),
+       TRL_CORECOUNT = BIT(5),
+};
+
+/* For Perf Limit Reason MSRs */
+enum perf_limit_reason_msrs {
+       PLR_CORE = BIT(0),
+       PLR_GFX = BIT(1),
+       PLR_RING = BIT(2),
+};
+
+/* For RAPL MSRs */
+enum rapl_msrs {
+       RAPL_PKG_POWER_LIMIT = BIT(0),  /* 0x610 MSR_PKG_POWER_LIMIT */
+       RAPL_PKG_ENERGY_STATUS = BIT(1),        /* 0x611 MSR_PKG_ENERGY_STATUS */
+       RAPL_PKG_PERF_STATUS = BIT(2),  /* 0x613 MSR_PKG_PERF_STATUS */
+       RAPL_PKG_POWER_INFO = BIT(3),   /* 0x614 MSR_PKG_POWER_INFO */
+       RAPL_DRAM_POWER_LIMIT = BIT(4), /* 0x618 MSR_DRAM_POWER_LIMIT */
+       RAPL_DRAM_ENERGY_STATUS = BIT(5),       /* 0x619 MSR_DRAM_ENERGY_STATUS */
+       RAPL_DRAM_PERF_STATUS = BIT(6), /* 0x61b MSR_DRAM_PERF_STATUS */
+       RAPL_DRAM_POWER_INFO = BIT(7),  /* 0x61c MSR_DRAM_POWER_INFO */
+       RAPL_CORE_POWER_LIMIT = BIT(8), /* 0x638 MSR_PP0_POWER_LIMIT */
+       RAPL_CORE_ENERGY_STATUS = BIT(9),       /* 0x639 MSR_PP0_ENERGY_STATUS */
+       RAPL_CORE_POLICY = BIT(10),     /* 0x63a MSR_PP0_POLICY */
+       RAPL_GFX_POWER_LIMIT = BIT(11), /* 0x640 MSR_PP1_POWER_LIMIT */
+       RAPL_GFX_ENERGY_STATUS = BIT(12),       /* 0x641 MSR_PP1_ENERGY_STATUS */
+       RAPL_GFX_POLICY = BIT(13),      /* 0x642 MSR_PP1_POLICY */
+       RAPL_AMD_PWR_UNIT = BIT(14),    /* 0xc0010299 MSR_AMD_RAPL_POWER_UNIT */
+       RAPL_AMD_CORE_ENERGY_STAT = BIT(15),    /* 0xc001029a MSR_AMD_CORE_ENERGY_STATUS */
+       RAPL_AMD_PKG_ENERGY_STAT = BIT(16),     /* 0xc001029b MSR_AMD_PKG_ENERGY_STATUS */
+};
+
+#define RAPL_PKG       (RAPL_PKG_ENERGY_STATUS | RAPL_PKG_POWER_LIMIT)
+#define RAPL_DRAM      (RAPL_DRAM_ENERGY_STATUS | RAPL_DRAM_POWER_LIMIT)
+#define RAPL_CORE      (RAPL_CORE_ENERGY_STATUS | RAPL_CORE_POWER_LIMIT)
+#define RAPL_GFX       (RAPL_GFX_POWER_LIMIT | RAPL_GFX_ENERGY_STATUS)
+
+#define RAPL_PKG_ALL   (RAPL_PKG | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO)
+#define RAPL_DRAM_ALL  (RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_DRAM_POWER_INFO)
+#define RAPL_CORE_ALL  (RAPL_CORE | RAPL_CORE_POLICY)
+#define RAPL_GFX_ALL   (RAPL_GFX | RAPL_GFX_POLIGY)
+
+#define RAPL_AMD_F17H  (RAPL_AMD_PWR_UNIT | RAPL_AMD_CORE_ENERGY_STAT | RAPL_AMD_PKG_ENERGY_STAT)
+
+/* For Cstates */
+enum cstates {
+       CC1 = BIT(0),
+       CC3 = BIT(1),
+       CC6 = BIT(2),
+       CC7 = BIT(3),
+       PC2 = BIT(4),
+       PC3 = BIT(5),
+       PC6 = BIT(6),
+       PC7 = BIT(7),
+       PC8 = BIT(8),
+       PC9 = BIT(9),
+       PC10 = BIT(10),
+};
+
+static const struct platform_features nhm_features = {
+       .has_msr_misc_pwr_mgmt = 1,
+       .has_nhm_msrs = 1,
+       .bclk_freq = BCLK_133MHZ,
+       .supported_cstates = CC1 | CC3 | CC6 | PC3 | PC6,
+       .cst_limit = CST_LIMIT_NHM,
+       .trl_msrs = TRL_BASE,
+};
+
+static const struct platform_features nhx_features = {
+       .has_msr_misc_pwr_mgmt = 1,
+       .has_nhm_msrs = 1,
+       .bclk_freq = BCLK_133MHZ,
+       .supported_cstates = CC1 | CC3 | CC6 | PC3 | PC6,
+       .cst_limit = CST_LIMIT_NHM,
+};
+
+static const struct platform_features snb_features = {
+       .has_msr_misc_feature_control = 1,
+       .has_msr_misc_pwr_mgmt = 1,
+       .has_nhm_msrs = 1,
+       .bclk_freq = BCLK_100MHZ,
+       .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
+       .cst_limit = CST_LIMIT_SNB,
+       .has_irtl_msrs = 1,
+       .trl_msrs = TRL_BASE,
+       .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
+};
+
+static const struct platform_features snx_features = {
+       .has_msr_misc_feature_control = 1,
+       .has_msr_misc_pwr_mgmt = 1,
+       .has_nhm_msrs = 1,
+       .bclk_freq = BCLK_100MHZ,
+       .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
+       .cst_limit = CST_LIMIT_SNB,
+       .has_irtl_msrs = 1,
+       .trl_msrs = TRL_BASE,
+       .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM_ALL,
+};
+
+static const struct platform_features ivb_features = {
+       .has_msr_misc_feature_control = 1,
+       .has_msr_misc_pwr_mgmt = 1,
+       .has_nhm_msrs = 1,
+       .has_config_tdp = 1,
+       .bclk_freq = BCLK_100MHZ,
+       .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
+       .cst_limit = CST_LIMIT_SNB,
+       .has_irtl_msrs = 1,
+       .trl_msrs = TRL_BASE,
+       .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
+};
+
+static const struct platform_features ivx_features = {
+       .has_msr_misc_feature_control = 1,
+       .has_msr_misc_pwr_mgmt = 1,
+       .has_nhm_msrs = 1,
+       .bclk_freq = BCLK_100MHZ,
+       .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
+       .cst_limit = CST_LIMIT_SNB,
+       .has_irtl_msrs = 1,
+       .trl_msrs = TRL_BASE | TRL_LIMIT1,
+       .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM_ALL,
+};
+
+static const struct platform_features hsw_features = {
+       .has_msr_misc_feature_control = 1,
+       .has_msr_misc_pwr_mgmt = 1,
+       .has_nhm_msrs = 1,
+       .has_config_tdp = 1,
+       .bclk_freq = BCLK_100MHZ,
+       .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
+       .cst_limit = CST_LIMIT_HSW,
+       .has_irtl_msrs = 1,
+       .trl_msrs = TRL_BASE,
+       .plr_msrs = PLR_CORE | PLR_GFX | PLR_RING,
+       .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
+};
+
+static const struct platform_features hsx_features = {
+       .has_msr_misc_feature_control = 1,
+       .has_msr_misc_pwr_mgmt = 1,
+       .has_nhm_msrs = 1,
+       .has_config_tdp = 1,
+       .bclk_freq = BCLK_100MHZ,
+       .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
+       .cst_limit = CST_LIMIT_HSW,
+       .has_irtl_msrs = 1,
+       .trl_msrs = TRL_BASE | TRL_LIMIT1 | TRL_LIMIT2,
+       .plr_msrs = PLR_CORE | PLR_RING,
+       .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
+       .has_fixed_rapl_unit = 1,
+};
+
+static const struct platform_features hswl_features = {
+       .has_msr_misc_feature_control = 1,
+       .has_msr_misc_pwr_mgmt = 1,
+       .has_nhm_msrs = 1,
+       .has_config_tdp = 1,
+       .bclk_freq = BCLK_100MHZ,
+       .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10,
+       .cst_limit = CST_LIMIT_HSW,
+       .has_irtl_msrs = 1,
+       .trl_msrs = TRL_BASE,
+       .plr_msrs = PLR_CORE | PLR_GFX | PLR_RING,
+       .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
+};
+
+static const struct platform_features hswg_features = {
+       .has_msr_misc_feature_control = 1,
+       .has_msr_misc_pwr_mgmt = 1,
+       .has_nhm_msrs = 1,
+       .has_config_tdp = 1,
+       .bclk_freq = BCLK_100MHZ,
+       .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
+       .cst_limit = CST_LIMIT_HSW,
+       .has_irtl_msrs = 1,
+       .trl_msrs = TRL_BASE,
+       .plr_msrs = PLR_CORE | PLR_GFX | PLR_RING,
+       .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
+};
+
+static const struct platform_features bdw_features = {
+       .has_msr_misc_feature_control = 1,
+       .has_msr_misc_pwr_mgmt = 1,
+       .has_nhm_msrs = 1,
+       .has_config_tdp = 1,
+       .bclk_freq = BCLK_100MHZ,
+       .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10,
+       .cst_limit = CST_LIMIT_HSW,
+       .has_irtl_msrs = 1,
+       .trl_msrs = TRL_BASE,
+       .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
+};
+
+static const struct platform_features bdwg_features = {
+       .has_msr_misc_feature_control = 1,
+       .has_msr_misc_pwr_mgmt = 1,
+       .has_nhm_msrs = 1,
+       .has_config_tdp = 1,
+       .bclk_freq = BCLK_100MHZ,
+       .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
+       .cst_limit = CST_LIMIT_HSW,
+       .has_irtl_msrs = 1,
+       .trl_msrs = TRL_BASE,
+       .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
+};
+
+static const struct platform_features bdx_features = {
+       .has_msr_misc_feature_control = 1,
+       .has_msr_misc_pwr_mgmt = 1,
+       .has_nhm_msrs = 1,
+       .has_config_tdp = 1,
+       .bclk_freq = BCLK_100MHZ,
+       .supported_cstates = CC1 | CC3 | CC6 | PC2 | PC3 | PC6,
+       .cst_limit = CST_LIMIT_HSW,
+       .has_irtl_msrs = 1,
+       .has_cst_auto_convension = 1,
+       .trl_msrs = TRL_BASE,
+       .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
+       .has_fixed_rapl_unit = 1,
+};
+
+static const struct platform_features skl_features = {
+       .has_msr_misc_feature_control = 1,
+       .has_msr_misc_pwr_mgmt = 1,
+       .has_nhm_msrs = 1,
+       .has_config_tdp = 1,
+       .bclk_freq = BCLK_100MHZ,
+       .crystal_freq = 24000000,
+       .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10,
+       .cst_limit = CST_LIMIT_HSW,
+       .has_irtl_msrs = 1,
+       .has_ext_cst_msrs = 1,
+       .trl_msrs = TRL_BASE,
+       .tcc_offset_bits = 6,
+       .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX,
+       .enable_tsc_tweak = 1,
+};
+
+static const struct platform_features cnl_features = {
+       .has_msr_misc_feature_control = 1,
+       .has_msr_misc_pwr_mgmt = 1,
+       .has_nhm_msrs = 1,
+       .has_config_tdp = 1,
+       .bclk_freq = BCLK_100MHZ,
+       .supported_cstates = CC1 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10,
+       .cst_limit = CST_LIMIT_HSW,
+       .has_irtl_msrs = 1,
+       .has_msr_core_c1_res = 1,
+       .has_ext_cst_msrs = 1,
+       .trl_msrs = TRL_BASE,
+       .tcc_offset_bits = 6,
+       .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX,
+       .enable_tsc_tweak = 1,
+};
+
+static const struct platform_features adl_features = {
+       .has_msr_misc_feature_control = 1,
+       .has_msr_misc_pwr_mgmt = 1,
+       .has_nhm_msrs = 1,
+       .has_config_tdp = 1,
+       .bclk_freq = BCLK_100MHZ,
+       .supported_cstates = CC1 | CC6 | CC7 | PC2 | PC3 | PC6 | PC8 | PC10,
+       .cst_limit = CST_LIMIT_HSW,
+       .has_irtl_msrs = 1,
+       .has_msr_core_c1_res = 1,
+       .has_ext_cst_msrs = 1,
+       .trl_msrs = TRL_BASE,
+       .tcc_offset_bits = 6,
+       .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX,
+       .enable_tsc_tweak = 1,
+};
+
+static const struct platform_features skx_features = {
+       .has_msr_misc_feature_control = 1,
+       .has_msr_misc_pwr_mgmt = 1,
+       .has_nhm_msrs = 1,
+       .has_config_tdp = 1,
+       .bclk_freq = BCLK_100MHZ,
+       .supported_cstates = CC1 | CC6 | PC2 | PC6,
+       .cst_limit = CST_LIMIT_SKX,
+       .has_irtl_msrs = 1,
+       .has_cst_auto_convension = 1,
+       .trl_msrs = TRL_BASE | TRL_CORECOUNT,
+       .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
+       .has_fixed_rapl_unit = 1,
+};
+
+static const struct platform_features icx_features = {
+       .has_msr_misc_feature_control = 1,
+       .has_msr_misc_pwr_mgmt = 1,
+       .has_nhm_msrs = 1,
+       .has_config_tdp = 1,
+       .bclk_freq = BCLK_100MHZ,
+       .supported_cstates = CC1 | CC6 | PC2 | PC6,
+       .cst_limit = CST_LIMIT_ICX,
+       .has_irtl_msrs = 1,
+       .has_cst_prewake_bit = 1,
+       .trl_msrs = TRL_BASE | TRL_CORECOUNT,
+       .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
+       .has_fixed_rapl_unit = 1,
+};
+
+static const struct platform_features spr_features = {
+       .has_msr_misc_feature_control = 1,
+       .has_msr_misc_pwr_mgmt = 1,
+       .has_nhm_msrs = 1,
+       .has_config_tdp = 1,
+       .bclk_freq = BCLK_100MHZ,
+       .supported_cstates = CC1 | CC6 | PC2 | PC6,
+       .cst_limit = CST_LIMIT_SKX,
+       .has_msr_core_c1_res = 1,
+       .has_irtl_msrs = 1,
+       .has_cst_prewake_bit = 1,
+       .trl_msrs = TRL_BASE | TRL_CORECOUNT,
+       .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
+};
+
+static const struct platform_features srf_features = {
+       .has_msr_misc_feature_control = 1,
+       .has_msr_misc_pwr_mgmt = 1,
+       .has_nhm_msrs = 1,
+       .has_config_tdp = 1,
+       .bclk_freq = BCLK_100MHZ,
+       .supported_cstates = CC1 | CC6 | PC2 | PC6,
+       .cst_limit = CST_LIMIT_SKX,
+       .has_msr_core_c1_res = 1,
+       .has_msr_module_c6_res_ms = 1,
+       .has_irtl_msrs = 1,
+       .has_cst_prewake_bit = 1,
+       .trl_msrs = TRL_BASE | TRL_CORECOUNT,
+       .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
+};
+
+static const struct platform_features grr_features = {
+       .has_msr_misc_feature_control = 1,
+       .has_msr_misc_pwr_mgmt = 1,
+       .has_nhm_msrs = 1,
+       .has_config_tdp = 1,
+       .bclk_freq = BCLK_100MHZ,
+       .supported_cstates = CC1 | CC6,
+       .cst_limit = CST_LIMIT_SKX,
+       .has_msr_core_c1_res = 1,
+       .has_msr_module_c6_res_ms = 1,
+       .has_irtl_msrs = 1,
+       .has_cst_prewake_bit = 1,
+       .trl_msrs = TRL_BASE | TRL_CORECOUNT,
+       .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
+};
+
+static const struct platform_features slv_features = {
+       .has_nhm_msrs = 1,
+       .bclk_freq = BCLK_SLV,
+       .supported_cstates = CC1 | CC6 | PC6,
+       .cst_limit = CST_LIMIT_SLV,
+       .has_msr_core_c1_res = 1,
+       .has_msr_module_c6_res_ms = 1,
+       .has_msr_c6_demotion_policy_config = 1,
+       .has_msr_atom_pkg_c6_residency = 1,
+       .trl_msrs = TRL_ATOM,
+       .rapl_msrs = RAPL_PKG | RAPL_CORE,
+       .has_rapl_divisor = 1,
+       .rapl_quirk_tdp = 30,
+};
+
+static const struct platform_features slvd_features = {
+       .has_msr_misc_pwr_mgmt = 1,
+       .has_nhm_msrs = 1,
+       .bclk_freq = BCLK_SLV,
+       .supported_cstates = CC1 | CC6 | PC3 | PC6,
+       .cst_limit = CST_LIMIT_SLV,
+       .has_msr_atom_pkg_c6_residency = 1,
+       .trl_msrs = TRL_BASE,
+       .rapl_msrs = RAPL_PKG | RAPL_CORE,
+       .rapl_quirk_tdp = 30,
+};
+
+static const struct platform_features amt_features = {
+       .has_nhm_msrs = 1,
+       .bclk_freq = BCLK_133MHZ,
+       .supported_cstates = CC1 | CC3 | CC6 | PC3 | PC6,
+       .cst_limit = CST_LIMIT_AMT,
+       .trl_msrs = TRL_BASE,
+};
+
+static const struct platform_features gmt_features = {
+       .has_msr_misc_pwr_mgmt = 1,
+       .has_nhm_msrs = 1,
+       .bclk_freq = BCLK_100MHZ,
+       .crystal_freq = 19200000,
+       .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10,
+       .cst_limit = CST_LIMIT_GMT,
+       .has_irtl_msrs = 1,
+       .trl_msrs = TRL_BASE | TRL_CORECOUNT,
+       .rapl_msrs = RAPL_PKG | RAPL_PKG_POWER_INFO,
+};
+
+static const struct platform_features gmtd_features = {
+       .has_msr_misc_pwr_mgmt = 1,
+       .has_nhm_msrs = 1,
+       .bclk_freq = BCLK_100MHZ,
+       .crystal_freq = 25000000,
+       .supported_cstates = CC1 | CC6 | PC2 | PC6,
+       .cst_limit = CST_LIMIT_GMT,
+       .has_irtl_msrs = 1,
+       .has_msr_core_c1_res = 1,
+       .trl_msrs = TRL_BASE | TRL_CORECOUNT,
+       .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_CORE_ENERGY_STATUS,
+};
+
+static const struct platform_features gmtp_features = {
+       .has_msr_misc_pwr_mgmt = 1,
+       .has_nhm_msrs = 1,
+       .bclk_freq = BCLK_100MHZ,
+       .crystal_freq = 19200000,
+       .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10,
+       .cst_limit = CST_LIMIT_GMT,
+       .has_irtl_msrs = 1,
+       .trl_msrs = TRL_BASE,
+       .rapl_msrs = RAPL_PKG | RAPL_PKG_POWER_INFO,
+};
+
+static const struct platform_features tmt_features = {
+       .has_msr_misc_pwr_mgmt = 1,
+       .has_nhm_msrs = 1,
+       .bclk_freq = BCLK_100MHZ,
+       .supported_cstates = CC1 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10,
+       .cst_limit = CST_LIMIT_GMT,
+       .has_irtl_msrs = 1,
+       .trl_msrs = TRL_BASE,
+       .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX,
+       .enable_tsc_tweak = 1,
+};
+
+static const struct platform_features tmtd_features = {
+       .has_msr_misc_pwr_mgmt = 1,
+       .has_nhm_msrs = 1,
+       .bclk_freq = BCLK_100MHZ,
+       .supported_cstates = CC1 | CC6,
+       .cst_limit = CST_LIMIT_GMT,
+       .has_irtl_msrs = 1,
+       .trl_msrs = TRL_BASE | TRL_CORECOUNT,
+       .rapl_msrs = RAPL_PKG_ALL,
+};
+
+static const struct platform_features knl_features = {
+       .has_msr_misc_pwr_mgmt = 1,
+       .has_nhm_msrs = 1,
+       .has_config_tdp = 1,
+       .bclk_freq = BCLK_100MHZ,
+       .supported_cstates = CC1 | CC6 | PC3 | PC6,
+       .cst_limit = CST_LIMIT_KNL,
+       .has_msr_knl_core_c6_residency = 1,
+       .trl_msrs = TRL_KNL,
+       .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
+       .has_fixed_rapl_unit = 1,
+       .need_perf_multiplier = 1,
+};
+
+static const struct platform_features default_features = {
+};
+
+static const struct platform_features amd_features_with_rapl = {
+       .rapl_msrs = RAPL_AMD_F17H,
+       .has_per_core_rapl = 1,
+       .rapl_quirk_tdp = 280,  /* This is the max stock TDP of HEDT/Server Fam17h+ chips */
+};
+
+static const struct platform_data turbostat_pdata[] = {
+       { INTEL_FAM6_NEHALEM, &nhm_features },
+       { INTEL_FAM6_NEHALEM_G, &nhm_features },
+       { INTEL_FAM6_NEHALEM_EP, &nhm_features },
+       { INTEL_FAM6_NEHALEM_EX, &nhx_features },
+       { INTEL_FAM6_WESTMERE, &nhm_features },
+       { INTEL_FAM6_WESTMERE_EP, &nhm_features },
+       { INTEL_FAM6_WESTMERE_EX, &nhx_features },
+       { INTEL_FAM6_SANDYBRIDGE, &snb_features },
+       { INTEL_FAM6_SANDYBRIDGE_X, &snx_features },
+       { INTEL_FAM6_IVYBRIDGE, &ivb_features },
+       { INTEL_FAM6_IVYBRIDGE_X, &ivx_features },
+       { INTEL_FAM6_HASWELL, &hsw_features },
+       { INTEL_FAM6_HASWELL_X, &hsx_features },
+       { INTEL_FAM6_HASWELL_L, &hswl_features },
+       { INTEL_FAM6_HASWELL_G, &hswg_features },
+       { INTEL_FAM6_BROADWELL, &bdw_features },
+       { INTEL_FAM6_BROADWELL_G, &bdwg_features },
+       { INTEL_FAM6_BROADWELL_X, &bdx_features },
+       { INTEL_FAM6_BROADWELL_D, &bdx_features },
+       { INTEL_FAM6_SKYLAKE_L, &skl_features },
+       { INTEL_FAM6_SKYLAKE, &skl_features },
+       { INTEL_FAM6_SKYLAKE_X, &skx_features },
+       { INTEL_FAM6_KABYLAKE_L, &skl_features },
+       { INTEL_FAM6_KABYLAKE, &skl_features },
+       { INTEL_FAM6_COMETLAKE, &skl_features },
+       { INTEL_FAM6_COMETLAKE_L, &skl_features },
+       { INTEL_FAM6_CANNONLAKE_L, &cnl_features },
+       { INTEL_FAM6_ICELAKE_X, &icx_features },
+       { INTEL_FAM6_ICELAKE_D, &icx_features },
+       { INTEL_FAM6_ICELAKE_L, &cnl_features },
+       { INTEL_FAM6_ICELAKE_NNPI, &cnl_features },
+       { INTEL_FAM6_ROCKETLAKE, &cnl_features },
+       { INTEL_FAM6_TIGERLAKE_L, &cnl_features },
+       { INTEL_FAM6_TIGERLAKE, &cnl_features },
+       { INTEL_FAM6_SAPPHIRERAPIDS_X, &spr_features },
+       { INTEL_FAM6_EMERALDRAPIDS_X, &spr_features },
+       { INTEL_FAM6_GRANITERAPIDS_X, &spr_features },
+       { INTEL_FAM6_LAKEFIELD, &cnl_features },
+       { INTEL_FAM6_ALDERLAKE, &adl_features },
+       { INTEL_FAM6_ALDERLAKE_L, &adl_features },
+       { INTEL_FAM6_RAPTORLAKE, &adl_features },
+       { INTEL_FAM6_RAPTORLAKE_P, &adl_features },
+       { INTEL_FAM6_RAPTORLAKE_S, &adl_features },
+       { INTEL_FAM6_METEORLAKE, &cnl_features },
+       { INTEL_FAM6_METEORLAKE_L, &cnl_features },
+       { INTEL_FAM6_ARROWLAKE, &cnl_features },
+       { INTEL_FAM6_LUNARLAKE_M, &cnl_features },
+       { INTEL_FAM6_ATOM_SILVERMONT, &slv_features },
+       { INTEL_FAM6_ATOM_SILVERMONT_D, &slvd_features },
+       { INTEL_FAM6_ATOM_AIRMONT, &amt_features },
+       { INTEL_FAM6_ATOM_GOLDMONT, &gmt_features },
+       { INTEL_FAM6_ATOM_GOLDMONT_D, &gmtd_features },
+       { INTEL_FAM6_ATOM_GOLDMONT_PLUS, &gmtp_features },
+       { INTEL_FAM6_ATOM_TREMONT_D, &tmtd_features },
+       { INTEL_FAM6_ATOM_TREMONT, &tmt_features },
+       { INTEL_FAM6_ATOM_TREMONT_L, &tmt_features },
+       { INTEL_FAM6_ATOM_GRACEMONT, &adl_features },
+       { INTEL_FAM6_ATOM_CRESTMONT_X, &srf_features },
+       { INTEL_FAM6_ATOM_CRESTMONT, &grr_features },
+       { INTEL_FAM6_XEON_PHI_KNL, &knl_features },
+       { INTEL_FAM6_XEON_PHI_KNM, &knl_features },
+       /*
+        * Missing support for
+        * INTEL_FAM6_ICELAKE
+        * INTEL_FAM6_ATOM_SILVERMONT_MID
+        * INTEL_FAM6_ATOM_AIRMONT_MID
+        * INTEL_FAM6_ATOM_AIRMONT_NP
+        */
+       { 0, NULL },
+};
+
+static const struct platform_features *platform;
+
+void probe_platform_features(unsigned int family, unsigned int model)
+{
+       int i;
+
+       platform = &default_features;
+
+       if (authentic_amd || hygon_genuine) {
+               if (max_extended_level >= 0x80000007) {
+                       unsigned int eax, ebx, ecx, edx;
+
+                       __cpuid(0x80000007, eax, ebx, ecx, edx);
+                       /* RAPL (Fam 17h+) */
+                       if ((edx & (1 << 14)) && family >= 0x17)
+                               platform = &amd_features_with_rapl;
+               }
+               return;
+       }
+
+       if (!genuine_intel || family != 6)
+               return;
+
+       for (i = 0; turbostat_pdata[i].features; i++) {
+               if (turbostat_pdata[i].model == model) {
+                       platform = turbostat_pdata[i].features;
+                       return;
+               }
+       }
+}
+
+/* Model specific support End */
+
 #define        TJMAX_DEFAULT   100
 
 /* MSRs that are not yet in the kernel-provided header. */
@@ -333,8 +942,8 @@ int backwards_count;
 char *progname;
 
 #define CPU_SUBSET_MAXCPUS     1024    /* need to use before probe... */
-cpu_set_t *cpu_present_set, *cpu_affinity_set, *cpu_subset;
-size_t cpu_present_setsize, cpu_affinity_setsize, cpu_subset_size;
+cpu_set_t *cpu_present_set, *cpu_effective_set, *cpu_allowed_set, *cpu_affinity_set, *cpu_subset;
+size_t cpu_present_setsize, cpu_effective_setsize, cpu_allowed_setsize, cpu_affinity_setsize, cpu_subset_size;
 #define MAX_ADDED_COUNTERS 8
 #define MAX_ADDED_THREAD_COUNTERS 24
 #define BITMASK_SIZE 32
@@ -355,12 +964,11 @@ struct thread_data {
        unsigned int x2apic_id;
        unsigned int flags;
        bool is_atom;
-#define CPU_IS_FIRST_THREAD_IN_CORE    0x2
-#define CPU_IS_FIRST_CORE_IN_PACKAGE   0x4
        unsigned long long counter[MAX_ADDED_THREAD_COUNTERS];
 } *thread_even, *thread_odd;
 
 struct core_data {
+       int base_cpu;
        unsigned long long c3;
        unsigned long long c6;
        unsigned long long c7;
@@ -373,6 +981,7 @@ struct core_data {
 } *core_even, *core_odd;
 
 struct pkg_data {
+       int base_cpu;
        unsigned long long pc2;
        unsigned long long pc3;
        unsigned long long pc6;
@@ -456,7 +1065,7 @@ off_t idx_to_offset(int idx)
 
        switch (idx) {
        case IDX_PKG_ENERGY:
-               if (do_rapl & RAPL_AMD_F17H)
+               if (platform->rapl_msrs & RAPL_AMD_F17H)
                        offset = MSR_PKG_ENERGY_STAT;
                else
                        offset = MSR_PKG_ENERGY_STATUS;
@@ -516,17 +1125,17 @@ int idx_valid(int idx)
 {
        switch (idx) {
        case IDX_PKG_ENERGY:
-               return do_rapl & (RAPL_PKG | RAPL_AMD_F17H);
+               return platform->rapl_msrs & (RAPL_PKG | RAPL_AMD_F17H);
        case IDX_DRAM_ENERGY:
-               return do_rapl & RAPL_DRAM;
+               return platform->rapl_msrs & RAPL_DRAM;
        case IDX_PP0_ENERGY:
-               return do_rapl & RAPL_CORES_ENERGY_STATUS;
+               return platform->rapl_msrs & RAPL_CORE_ENERGY_STATUS;
        case IDX_PP1_ENERGY:
-               return do_rapl & RAPL_GFX;
+               return platform->rapl_msrs & RAPL_GFX;
        case IDX_PKG_PERF:
-               return do_rapl & RAPL_PKG_PERF_STATUS;
+               return platform->rapl_msrs & RAPL_PKG_PERF_STATUS;
        case IDX_DRAM_PERF:
-               return do_rapl & RAPL_DRAM_PERF_STATUS;
+               return platform->rapl_msrs & RAPL_DRAM_PERF_STATUS;
        default:
                return 0;
        }
@@ -563,6 +1172,9 @@ struct topo_params {
        int num_die;
        int num_cpus;
        int num_cores;
+       int allowed_packages;
+       int allowed_cpus;
+       int allowed_cores;
        int max_cpu_num;
        int max_node_num;
        int nodes_per_pkg;
@@ -575,7 +1187,7 @@ struct timeval tv_even, tv_odd, tv_delta;
 int *irq_column_2_cpu;         /* /proc/interrupts column numbers */
 int *irqs_per_cpu;             /* indexed by cpu_num */
 
-void setup_all_buffers(void);
+void setup_all_buffers(bool startup);
 
 char *sys_lpi_file;
 char *sys_lpi_file_sysfs = "/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us";
@@ -586,6 +1198,11 @@ int cpu_is_not_present(int cpu)
        return !CPU_ISSET_S(cpu, cpu_present_setsize, cpu_present_set);
 }
 
+int cpu_is_not_allowed(int cpu)
+{
+       return !CPU_ISSET_S(cpu, cpu_allowed_setsize, cpu_allowed_set);
+}
+
 /*
  * run func(thread, core, package) in topology order
  * skip non-present cpus
@@ -603,10 +1220,9 @@ int for_all_cpus(int (func) (struct thread_data *, struct core_data *, struct pk
                                        struct thread_data *t;
                                        struct core_data *c;
                                        struct pkg_data *p;
-
                                        t = GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no);
 
-                                       if (cpu_is_not_present(t->cpu_id))
+                                       if (cpu_is_not_allowed(t->cpu_id))
                                                continue;
 
                                        c = GET_CORE(core_base, core_no, node_no, pkg_no);
@@ -622,6 +1238,25 @@ int for_all_cpus(int (func) (struct thread_data *, struct core_data *, struct pk
        return 0;
 }
 
+int is_cpu_first_thread_in_core(struct thread_data *t, struct core_data *c, struct pkg_data *p)
+{
+       UNUSED(p);
+
+       return ((int)t->cpu_id == c->base_cpu || c->base_cpu < 0);
+}
+
+int is_cpu_first_core_in_package(struct thread_data *t, struct core_data *c, struct pkg_data *p)
+{
+       UNUSED(c);
+
+       return ((int)t->cpu_id == p->base_cpu || p->base_cpu < 0);
+}
+
+int is_cpu_first_thread_in_package(struct thread_data *t, struct core_data *c, struct pkg_data *p)
+{
+       return is_cpu_first_thread_in_core(t, c, p) && is_cpu_first_core_in_package(t, c, p);
+}
+
 int cpu_migrate(int cpu)
 {
        CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
@@ -904,11 +1539,11 @@ void print_header(char *delim)
        if (DO_BIC(BIC_CORE_THROT_CNT))
                outp += sprintf(outp, "%sCoreThr", (printed++ ? delim : ""));
 
-       if (do_rapl && !rapl_joules) {
-               if (DO_BIC(BIC_CorWatt) && (do_rapl & RAPL_PER_CORE_ENERGY))
+       if (platform->rapl_msrs && !rapl_joules) {
+               if (DO_BIC(BIC_CorWatt) && platform->has_per_core_rapl)
                        outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : ""));
-       } else if (do_rapl && rapl_joules) {
-               if (DO_BIC(BIC_Cor_J) && (do_rapl & RAPL_PER_CORE_ENERGY))
+       } else if (platform->rapl_msrs && rapl_joules) {
+               if (DO_BIC(BIC_Cor_J) && platform->has_per_core_rapl)
                        outp += sprintf(outp, "%sCor_J", (printed++ ? delim : ""));
        }
 
@@ -966,10 +1601,10 @@ void print_header(char *delim)
        if (DO_BIC(BIC_SYS_LPI))
                outp += sprintf(outp, "%sSYS%%LPI", (printed++ ? delim : ""));
 
-       if (do_rapl && !rapl_joules) {
+       if (platform->rapl_msrs && !rapl_joules) {
                if (DO_BIC(BIC_PkgWatt))
                        outp += sprintf(outp, "%sPkgWatt", (printed++ ? delim : ""));
-               if (DO_BIC(BIC_CorWatt) && !(do_rapl & RAPL_PER_CORE_ENERGY))
+               if (DO_BIC(BIC_CorWatt) && !platform->has_per_core_rapl)
                        outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : ""));
                if (DO_BIC(BIC_GFXWatt))
                        outp += sprintf(outp, "%sGFXWatt", (printed++ ? delim : ""));
@@ -979,10 +1614,10 @@ void print_header(char *delim)
                        outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : ""));
                if (DO_BIC(BIC_RAM__))
                        outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : ""));
-       } else if (do_rapl && rapl_joules) {
+       } else if (platform->rapl_msrs && rapl_joules) {
                if (DO_BIC(BIC_Pkg_J))
                        outp += sprintf(outp, "%sPkg_J", (printed++ ? delim : ""));
-               if (DO_BIC(BIC_Cor_J) && !(do_rapl & RAPL_PER_CORE_ENERGY))
+               if (DO_BIC(BIC_Cor_J) && !platform->has_per_core_rapl)
                        outp += sprintf(outp, "%sCor_J", (printed++ ? delim : ""));
                if (DO_BIC(BIC_GFX_J))
                        outp += sprintf(outp, "%sGFX_J", (printed++ ? delim : ""));
@@ -1106,11 +1741,11 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data
        int printed = 0;
 
        /* if showing only 1st thread in core and this isn't one, bail out */
-       if (show_core_only && !(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
+       if (show_core_only && !is_cpu_first_thread_in_core(t, c, p))
                return 0;
 
        /* if showing only 1st thread in pkg and this isn't one, bail out */
-       if (show_pkg_only && !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
+       if (show_pkg_only && !is_cpu_first_core_in_package(t, c, p))
                return 0;
 
        /*if not summary line and --cpu is used */
@@ -1244,7 +1879,7 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data
                outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->c1 / tsc);
 
        /* print per-core data only for 1st thread in core */
-       if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
+       if (!is_cpu_first_thread_in_core(t, c, p))
                goto done;
 
        if (DO_BIC(BIC_CPU_c3))
@@ -1284,14 +1919,14 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data
 
        fmt8 = "%s%.2f";
 
-       if (DO_BIC(BIC_CorWatt) && (do_rapl & RAPL_PER_CORE_ENERGY))
+       if (DO_BIC(BIC_CorWatt) && platform->has_per_core_rapl)
                outp +=
                    sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units / interval_float);
-       if (DO_BIC(BIC_Cor_J) && (do_rapl & RAPL_PER_CORE_ENERGY))
+       if (DO_BIC(BIC_Cor_J) && platform->has_per_core_rapl)
                outp += sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units);
 
        /* print per-package data only for 1st core in package */
-       if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
+       if (!is_cpu_first_core_in_package(t, c, p))
                goto done;
 
        /* PkgTmp */
@@ -1352,7 +1987,7 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data
                outp +=
                    sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units / interval_float);
 
-       if (DO_BIC(BIC_CorWatt) && !(do_rapl & RAPL_PER_CORE_ENERGY))
+       if (DO_BIC(BIC_CorWatt) && !platform->has_per_core_rapl)
                outp +=
                    sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units / interval_float);
        if (DO_BIC(BIC_GFXWatt))
@@ -1364,7 +1999,7 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data
                            p->energy_dram * rapl_dram_energy_units / interval_float);
        if (DO_BIC(BIC_Pkg_J))
                outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units);
-       if (DO_BIC(BIC_Cor_J) && !(do_rapl & RAPL_PER_CORE_ENERGY))
+       if (DO_BIC(BIC_Cor_J) && !platform->has_per_core_rapl)
                outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units);
        if (DO_BIC(BIC_GFX_J))
                outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units);
@@ -1527,7 +2162,7 @@ void delta_core(struct core_data *new, struct core_data *old)
 
 int soft_c1_residency_display(int bic)
 {
-       if (!DO_BIC(BIC_CPU_c1) || use_c1_residency_msr)
+       if (!DO_BIC(BIC_CPU_c1) || platform->has_msr_core_c1_res)
                return 0;
 
        return DO_BIC_READ(bic);
@@ -1567,7 +2202,8 @@ int delta_thread(struct thread_data *new, struct thread_data *old, struct core_d
 
        old->c1 = new->c1 - old->c1;
 
-       if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || soft_c1_residency_display(BIC_Avg_MHz)) {
+       if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || DO_BIC(BIC_IPC)
+           || soft_c1_residency_display(BIC_Avg_MHz)) {
                if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) {
                        old->aperf = new->aperf - old->aperf;
                        old->mperf = new->mperf - old->mperf;
@@ -1576,7 +2212,7 @@ int delta_thread(struct thread_data *new, struct thread_data *old, struct core_d
                }
        }
 
-       if (use_c1_residency_msr) {
+       if (platform->has_msr_core_c1_res) {
                /*
                 * Some models have a dedicated C1 residency MSR,
                 * which should be more accurate than the derivation below.
@@ -1626,7 +2262,7 @@ int delta_cpu(struct thread_data *t, struct core_data *c,
        int retval = 0;
 
        /* calculate core delta only for 1st thread in core */
-       if (t->flags & CPU_IS_FIRST_THREAD_IN_CORE)
+       if (is_cpu_first_thread_in_core(t, c, p))
                delta_core(c, c2);
 
        /* always calculate thread delta */
@@ -1635,7 +2271,7 @@ int delta_cpu(struct thread_data *t, struct core_data *c,
                return retval;
 
        /* calculate package delta only for 1st core in package */
-       if (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)
+       if (is_cpu_first_core_in_package(t, c, p))
                retval = delta_package(p, p2);
 
        return retval;
@@ -1663,9 +2299,6 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data
        t->irq_count = 0;
        t->smi_count = 0;
 
-       /* tells format_counters to dump all fields from this set */
-       t->flags = CPU_IS_FIRST_THREAD_IN_CORE | CPU_IS_FIRST_CORE_IN_PACKAGE;
-
        c->c3 = 0;
        c->c6 = 0;
        c->c7 = 0;
@@ -1749,7 +2382,7 @@ int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
        }
 
        /* sum per-core values only for 1st thread in core */
-       if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
+       if (!is_cpu_first_thread_in_core(t, c, p))
                return 0;
 
        average.cores.c3 += c->c3;
@@ -1769,7 +2402,7 @@ int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
        }
 
        /* sum per-pkg values only for 1st core in pkg */
-       if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
+       if (!is_cpu_first_core_in_package(t, c, p))
                return 0;
 
        if (DO_BIC(BIC_Totl_c0))
@@ -1834,40 +2467,40 @@ void compute_average(struct thread_data *t, struct core_data *c, struct pkg_data
        /* Use the global time delta for the average. */
        average.threads.tv_delta = tv_delta;
 
-       average.threads.tsc /= topo.num_cpus;
-       average.threads.aperf /= topo.num_cpus;
-       average.threads.mperf /= topo.num_cpus;
-       average.threads.instr_count /= topo.num_cpus;
-       average.threads.c1 /= topo.num_cpus;
+       average.threads.tsc /= topo.allowed_cpus;
+       average.threads.aperf /= topo.allowed_cpus;
+       average.threads.mperf /= topo.allowed_cpus;
+       average.threads.instr_count /= topo.allowed_cpus;
+       average.threads.c1 /= topo.allowed_cpus;
 
        if (average.threads.irq_count > 9999999)
                sums_need_wide_columns = 1;
 
-       average.cores.c3 /= topo.num_cores;
-       average.cores.c6 /= topo.num_cores;
-       average.cores.c7 /= topo.num_cores;
-       average.cores.mc6_us /= topo.num_cores;
+       average.cores.c3 /= topo.allowed_cores;
+       average.cores.c6 /= topo.allowed_cores;
+       average.cores.c7 /= topo.allowed_cores;
+       average.cores.mc6_us /= topo.allowed_cores;
 
        if (DO_BIC(BIC_Totl_c0))
-               average.packages.pkg_wtd_core_c0 /= topo.num_packages;
+               average.packages.pkg_wtd_core_c0 /= topo.allowed_packages;
        if (DO_BIC(BIC_Any_c0))
-               average.packages.pkg_any_core_c0 /= topo.num_packages;
+               average.packages.pkg_any_core_c0 /= topo.allowed_packages;
        if (DO_BIC(BIC_GFX_c0))
-               average.packages.pkg_any_gfxe_c0 /= topo.num_packages;
+               average.packages.pkg_any_gfxe_c0 /= topo.allowed_packages;
        if (DO_BIC(BIC_CPUGFX))
-               average.packages.pkg_both_core_gfxe_c0 /= topo.num_packages;
+               average.packages.pkg_both_core_gfxe_c0 /= topo.allowed_packages;
 
-       average.packages.pc2 /= topo.num_packages;
+       average.packages.pc2 /= topo.allowed_packages;
        if (DO_BIC(BIC_Pkgpc3))
-               average.packages.pc3 /= topo.num_packages;
+               average.packages.pc3 /= topo.allowed_packages;
        if (DO_BIC(BIC_Pkgpc6))
-               average.packages.pc6 /= topo.num_packages;
+               average.packages.pc6 /= topo.allowed_packages;
        if (DO_BIC(BIC_Pkgpc7))
-               average.packages.pc7 /= topo.num_packages;
+               average.packages.pc7 /= topo.allowed_packages;
 
-       average.packages.pc8 /= topo.num_packages;
-       average.packages.pc9 /= topo.num_packages;
-       average.packages.pc10 /= topo.num_packages;
+       average.packages.pc8 /= topo.allowed_packages;
+       average.packages.pc9 /= topo.allowed_packages;
+       average.packages.pc10 /= topo.allowed_packages;
 
        for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
                if (mp->format == FORMAT_RAW)
@@ -1877,7 +2510,7 @@ void compute_average(struct thread_data *t, struct core_data *c, struct pkg_data
                                sums_need_wide_columns = 1;
                        continue;
                }
-               average.threads.counter[i] /= topo.num_cpus;
+               average.threads.counter[i] /= topo.allowed_cpus;
        }
        for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
                if (mp->format == FORMAT_RAW)
@@ -1886,7 +2519,7 @@ void compute_average(struct thread_data *t, struct core_data *c, struct pkg_data
                        if (average.cores.counter[i] > 9999999)
                                sums_need_wide_columns = 1;
                }
-               average.cores.counter[i] /= topo.num_cores;
+               average.cores.counter[i] /= topo.allowed_cores;
        }
        for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
                if (mp->format == FORMAT_RAW)
@@ -1895,7 +2528,7 @@ void compute_average(struct thread_data *t, struct core_data *c, struct pkg_data
                        if (average.packages.counter[i] > 9999999)
                                sums_need_wide_columns = 1;
                }
-               average.packages.counter[i] /= topo.num_packages;
+               average.packages.counter[i] /= topo.allowed_packages;
        }
 }
 
@@ -2092,7 +2725,8 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 retry:
        t->tsc = rdtsc();       /* we are running on local CPU of interest */
 
-       if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || soft_c1_residency_display(BIC_Avg_MHz)) {
+       if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || DO_BIC(BIC_IPC)
+           || soft_c1_residency_display(BIC_Avg_MHz)) {
                unsigned long long tsc_before, tsc_between, tsc_after, aperf_time, mperf_time;
 
                /*
@@ -2158,7 +2792,7 @@ retry:
                        return -5;
                t->smi_count = msr & 0xFFFFFFFF;
        }
-       if (DO_BIC(BIC_CPU_c1) && use_c1_residency_msr) {
+       if (DO_BIC(BIC_CPU_c1) && platform->has_msr_core_c1_res) {
                if (get_msr(cpu, MSR_CORE_C1_RES, &t->c1))
                        return -6;
        }
@@ -2169,7 +2803,7 @@ retry:
        }
 
        /* collect core counters only for 1st thread in core */
-       if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
+       if (!is_cpu_first_thread_in_core(t, c, p))
                goto done;
 
        if (DO_BIC(BIC_CPU_c3) || soft_c1_residency_display(BIC_CPU_c3)) {
@@ -2177,10 +2811,10 @@ retry:
                        return -6;
        }
 
-       if ((DO_BIC(BIC_CPU_c6) || soft_c1_residency_display(BIC_CPU_c6)) && !do_knl_cstates) {
+       if ((DO_BIC(BIC_CPU_c6) || soft_c1_residency_display(BIC_CPU_c6)) && !platform->has_msr_knl_core_c6_residency) {
                if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6))
                        return -7;
-       } else if (do_knl_cstates || soft_c1_residency_display(BIC_CPU_c6)) {
+       } else if (platform->has_msr_knl_core_c6_residency && soft_c1_residency_display(BIC_CPU_c6)) {
                if (get_msr(cpu, MSR_KNL_CORE_C6_RESIDENCY, &c->c6))
                        return -7;
        }
@@ -2212,7 +2846,7 @@ retry:
        if (DO_BIC(BIC_CORE_THROT_CNT))
                get_core_throt_cnt(cpu, &c->core_throt_cnt);
 
-       if (do_rapl & RAPL_AMD_F17H) {
+       if (platform->rapl_msrs & RAPL_AMD_F17H) {
                if (get_msr(cpu, MSR_CORE_ENERGY_STAT, &msr))
                        return -14;
                c->core_energy = msr & 0xFFFFFFFF;
@@ -2224,7 +2858,7 @@ retry:
        }
 
        /* collect package counters only for 1st core in package */
-       if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
+       if (!is_cpu_first_core_in_package(t, c, p))
                goto done;
 
        if (DO_BIC(BIC_Totl_c0)) {
@@ -2247,7 +2881,7 @@ retry:
                if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3))
                        return -9;
        if (DO_BIC(BIC_Pkgpc6)) {
-               if (do_slm_cstates) {
+               if (platform->has_msr_atom_pkg_c6_residency) {
                        if (get_msr(cpu, MSR_ATOM_PKG_C6_RESIDENCY, &p->pc6))
                                return -10;
                } else {
@@ -2277,37 +2911,37 @@ retry:
        if (DO_BIC(BIC_SYS_LPI))
                p->sys_lpi = cpuidle_cur_sys_lpi_us;
 
-       if (do_rapl & RAPL_PKG) {
+       if (platform->rapl_msrs & RAPL_PKG) {
                if (get_msr_sum(cpu, MSR_PKG_ENERGY_STATUS, &msr))
                        return -13;
                p->energy_pkg = msr;
        }
-       if (do_rapl & RAPL_CORES_ENERGY_STATUS) {
+       if (platform->rapl_msrs & RAPL_CORE_ENERGY_STATUS) {
                if (get_msr_sum(cpu, MSR_PP0_ENERGY_STATUS, &msr))
                        return -14;
                p->energy_cores = msr;
        }
-       if (do_rapl & RAPL_DRAM) {
+       if (platform->rapl_msrs & RAPL_DRAM) {
                if (get_msr_sum(cpu, MSR_DRAM_ENERGY_STATUS, &msr))
                        return -15;
                p->energy_dram = msr;
        }
-       if (do_rapl & RAPL_GFX) {
+       if (platform->rapl_msrs & RAPL_GFX) {
                if (get_msr_sum(cpu, MSR_PP1_ENERGY_STATUS, &msr))
                        return -16;
                p->energy_gfx = msr;
        }
-       if (do_rapl & RAPL_PKG_PERF_STATUS) {
+       if (platform->rapl_msrs & RAPL_PKG_PERF_STATUS) {
                if (get_msr_sum(cpu, MSR_PKG_PERF_STATUS, &msr))
                        return -16;
                p->rapl_pkg_perf_status = msr;
        }
-       if (do_rapl & RAPL_DRAM_PERF_STATUS) {
+       if (platform->rapl_msrs & RAPL_DRAM_PERF_STATUS) {
                if (get_msr_sum(cpu, MSR_DRAM_PERF_STATUS, &msr))
                        return -16;
                p->rapl_dram_perf_status = msr;
        }
-       if (do_rapl & RAPL_AMD_F17H) {
+       if (platform->rapl_msrs & RAPL_AMD_F17H) {
                if (get_msr_sum(cpu, MSR_PKG_ENERGY_STAT, &msr))
                        return -13;
                p->energy_pkg = msr;
@@ -2414,18 +3048,58 @@ int icx_pkg_cstate_limits[16] =
        PCLRSV, PCLRSV
 };
 
-static void calculate_tsc_tweak()
+void probe_cst_limit(void)
 {
-       tsc_tweak = base_hz / tsc_hz;
-}
+       unsigned long long msr;
+       int *pkg_cstate_limits;
+
+       if (!platform->has_nhm_msrs)
+               return;
+
+       switch (platform->cst_limit) {
+       case CST_LIMIT_NHM:
+               pkg_cstate_limits = nhm_pkg_cstate_limits;
+               break;
+       case CST_LIMIT_SNB:
+               pkg_cstate_limits = snb_pkg_cstate_limits;
+               break;
+       case CST_LIMIT_HSW:
+               pkg_cstate_limits = hsw_pkg_cstate_limits;
+               break;
+       case CST_LIMIT_SKX:
+               pkg_cstate_limits = skx_pkg_cstate_limits;
+               break;
+       case CST_LIMIT_ICX:
+               pkg_cstate_limits = icx_pkg_cstate_limits;
+               break;
+       case CST_LIMIT_SLV:
+               pkg_cstate_limits = slv_pkg_cstate_limits;
+               break;
+       case CST_LIMIT_AMT:
+               pkg_cstate_limits = amt_pkg_cstate_limits;
+               break;
+       case CST_LIMIT_KNL:
+               pkg_cstate_limits = phi_pkg_cstate_limits;
+               break;
+       case CST_LIMIT_GMT:
+               pkg_cstate_limits = glm_pkg_cstate_limits;
+               break;
+       default:
+               return;
+       }
 
-void prewake_cstate_probe(unsigned int family, unsigned int model);
+       get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
+       pkg_cstate_limit = pkg_cstate_limits[msr & 0xF];
+}
 
-static void dump_nhm_platform_info(void)
+static void dump_platform_info(void)
 {
        unsigned long long msr;
        unsigned int ratio;
 
+       if (!platform->has_nhm_msrs)
+               return;
+
        get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
 
        fprintf(outf, "cpu%d: MSR_PLATFORM_INFO: 0x%08llx\n", base_cpu, msr);
@@ -2435,19 +3109,27 @@ static void dump_nhm_platform_info(void)
 
        ratio = (msr >> 8) & 0xFF;
        fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n", ratio, bclk, ratio * bclk);
+}
+
+static void dump_power_ctl(void)
+{
+       unsigned long long msr;
+
+       if (!platform->has_nhm_msrs)
+               return;
 
        get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr);
        fprintf(outf, "cpu%d: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n",
                base_cpu, msr, msr & 0x2 ? "EN" : "DIS");
 
        /* C-state Pre-wake Disable (CSTATE_PREWAKE_DISABLE) */
-       if (dis_cstate_prewake)
+       if (platform->has_cst_prewake_bit)
                fprintf(outf, "C-state Pre-wake: %sabled\n", msr & 0x40000000 ? "DIS" : "EN");
 
        return;
 }
 
-static void dump_hsw_turbo_ratio_limits(void)
+static void dump_turbo_ratio_limit2(void)
 {
        unsigned long long msr;
        unsigned int ratio;
@@ -2466,7 +3148,7 @@ static void dump_hsw_turbo_ratio_limits(void)
        return;
 }
 
-static void dump_ivt_turbo_ratio_limits(void)
+static void dump_turbo_ratio_limit1(void)
 {
        unsigned long long msr;
        unsigned int ratio;
@@ -2509,29 +3191,7 @@ static void dump_ivt_turbo_ratio_limits(void)
        return;
 }
 
-int has_turbo_ratio_group_limits(int family, int model)
-{
-
-       if (!genuine_intel)
-               return 0;
-
-       if (family != 6)
-               return 0;
-
-       switch (model) {
-       case INTEL_FAM6_ATOM_GOLDMONT:
-       case INTEL_FAM6_SKYLAKE_X:
-       case INTEL_FAM6_ICELAKE_X:
-       case INTEL_FAM6_SAPPHIRERAPIDS_X:
-       case INTEL_FAM6_ATOM_GOLDMONT_D:
-       case INTEL_FAM6_ATOM_TREMONT_D:
-               return 1;
-       default:
-               return 0;
-       }
-}
-
-static void dump_turbo_ratio_limits(int trl_msr_offset, int family, int model)
+static void dump_turbo_ratio_limits(int trl_msr_offset)
 {
        unsigned long long msr, core_counts;
        int shift;
@@ -2540,7 +3200,7 @@ static void dump_turbo_ratio_limits(int trl_msr_offset, int family, int model)
        fprintf(outf, "cpu%d: MSR_%sTURBO_RATIO_LIMIT: 0x%08llx\n",
                base_cpu, trl_msr_offset == MSR_SECONDARY_TURBO_RATIO_LIMIT ? "SECONDARY_" : "", msr);
 
-       if (has_turbo_ratio_group_limits(family, model)) {
+       if (platform->trl_msrs & TRL_CORECOUNT) {
                get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &core_counts);
                fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, core_counts);
        } else {
@@ -2657,10 +3317,13 @@ static void dump_knl_turbo_ratio_limits(void)
                                ratio[i], bclk, ratio[i] * bclk, cores[i]);
 }
 
-static void dump_nhm_cst_cfg(void)
+static void dump_cst_cfg(void)
 {
        unsigned long long msr;
 
+       if (!platform->has_nhm_msrs)
+               return;
+
        get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
 
        fprintf(outf, "cpu%d: MSR_PKG_CST_CONFIG_CONTROL: 0x%08llx", base_cpu, msr);
@@ -2673,7 +3336,7 @@ static void dump_nhm_cst_cfg(void)
                (msr & (1 << 15)) ? "" : "UN", (unsigned int)msr & 0xF, pkg_cstate_limit_strings[pkg_cstate_limit]);
 
 #define AUTOMATIC_CSTATE_CONVERSION            (1UL << 16)
-       if (has_automatic_cstate_conversion) {
+       if (platform->has_cst_auto_convension) {
                fprintf(outf, ", automatic c-state conversion=%s", (msr & AUTOMATIC_CSTATE_CONVERSION) ? "on" : "off");
        }
 
@@ -2730,39 +3393,50 @@ void print_irtl(void)
 {
        unsigned long long msr;
 
-       get_msr(base_cpu, MSR_PKGC3_IRTL, &msr);
-       fprintf(outf, "cpu%d: MSR_PKGC3_IRTL: 0x%08llx (", base_cpu, msr);
-       fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
-               (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
-
-       get_msr(base_cpu, MSR_PKGC6_IRTL, &msr);
-       fprintf(outf, "cpu%d: MSR_PKGC6_IRTL: 0x%08llx (", base_cpu, msr);
-       fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
-               (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
+       if (!platform->has_irtl_msrs)
+               return;
 
-       get_msr(base_cpu, MSR_PKGC7_IRTL, &msr);
-       fprintf(outf, "cpu%d: MSR_PKGC7_IRTL: 0x%08llx (", base_cpu, msr);
-       fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
-               (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
+       if (platform->supported_cstates & PC3) {
+               get_msr(base_cpu, MSR_PKGC3_IRTL, &msr);
+               fprintf(outf, "cpu%d: MSR_PKGC3_IRTL: 0x%08llx (", base_cpu, msr);
+               fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
+                       (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
+       }
 
-       if (!do_irtl_hsw)
-               return;
+       if (platform->supported_cstates & PC6) {
+               get_msr(base_cpu, MSR_PKGC6_IRTL, &msr);
+               fprintf(outf, "cpu%d: MSR_PKGC6_IRTL: 0x%08llx (", base_cpu, msr);
+               fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
+                       (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
+       }
 
-       get_msr(base_cpu, MSR_PKGC8_IRTL, &msr);
-       fprintf(outf, "cpu%d: MSR_PKGC8_IRTL: 0x%08llx (", base_cpu, msr);
-       fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
-               (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
+       if (platform->supported_cstates & PC7) {
+               get_msr(base_cpu, MSR_PKGC7_IRTL, &msr);
+               fprintf(outf, "cpu%d: MSR_PKGC7_IRTL: 0x%08llx (", base_cpu, msr);
+               fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
+                       (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
+       }
 
-       get_msr(base_cpu, MSR_PKGC9_IRTL, &msr);
-       fprintf(outf, "cpu%d: MSR_PKGC9_IRTL: 0x%08llx (", base_cpu, msr);
-       fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
-               (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
+       if (platform->supported_cstates & PC8) {
+               get_msr(base_cpu, MSR_PKGC8_IRTL, &msr);
+               fprintf(outf, "cpu%d: MSR_PKGC8_IRTL: 0x%08llx (", base_cpu, msr);
+               fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
+                       (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
+       }
 
-       get_msr(base_cpu, MSR_PKGC10_IRTL, &msr);
-       fprintf(outf, "cpu%d: MSR_PKGC10_IRTL: 0x%08llx (", base_cpu, msr);
-       fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
-               (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
+       if (platform->supported_cstates & PC9) {
+               get_msr(base_cpu, MSR_PKGC9_IRTL, &msr);
+               fprintf(outf, "cpu%d: MSR_PKGC9_IRTL: 0x%08llx (", base_cpu, msr);
+               fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
+                       (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
+       }
 
+       if (platform->supported_cstates & PC10) {
+               get_msr(base_cpu, MSR_PKGC10_IRTL, &msr);
+               fprintf(outf, "cpu%d: MSR_PKGC10_IRTL: 0x%08llx (", base_cpu, msr);
+               fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
+                       (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
+       }
 }
 
 void free_fd_percpu(void)
@@ -2785,6 +3459,14 @@ void free_all_buffers(void)
        cpu_present_set = NULL;
        cpu_present_setsize = 0;
 
+       CPU_FREE(cpu_effective_set);
+       cpu_effective_set = NULL;
+       cpu_effective_setsize = 0;
+
+       CPU_FREE(cpu_allowed_set);
+       cpu_allowed_set = NULL;
+       cpu_allowed_setsize = 0;
+
        CPU_FREE(cpu_affinity_set);
        cpu_affinity_set = NULL;
        cpu_affinity_setsize = 0;
@@ -2927,49 +3609,102 @@ int get_physical_node_id(struct cpu_topology *thiscpu)
        return -1;
 }
 
-int get_thread_siblings(struct cpu_topology *thiscpu)
+static int parse_cpu_str(char *cpu_str, cpu_set_t *cpu_set, int cpu_set_size)
 {
-       char path[80], character;
-       FILE *filep;
-       unsigned long map;
-       int so, shift, sib_core;
-       int cpu = thiscpu->logical_cpu_id;
-       int offset = topo.max_cpu_num + 1;
-       size_t size;
-       int thread_id = 0;
+       unsigned int start, end;
+       char *next = cpu_str;
 
-       thiscpu->put_ids = CPU_ALLOC((topo.max_cpu_num + 1));
-       if (thiscpu->thread_id < 0)
-               thiscpu->thread_id = thread_id++;
-       if (!thiscpu->put_ids)
-               return -1;
+       while (next && *next) {
 
-       size = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
-       CPU_ZERO_S(size, thiscpu->put_ids);
+               if (*next == '-')       /* no negative cpu numbers */
+                       return 1;
 
-       sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", cpu);
-       filep = fopen(path, "r");
+               start = strtoul(next, &next, 10);
 
-       if (!filep) {
-               warnx("%s: open failed", path);
-               return -1;
-       }
-       do {
-               offset -= BITMASK_SIZE;
-               if (fscanf(filep, "%lx%c", &map, &character) != 2)
-                       err(1, "%s: failed to parse file", path);
-               for (shift = 0; shift < BITMASK_SIZE; shift++) {
-                       if ((map >> shift) & 0x1) {
-                               so = shift + offset;
-                               sib_core = get_core_id(so);
-                               if (sib_core == thiscpu->physical_core_id) {
-                                       CPU_SET_S(so, size, thiscpu->put_ids);
-                                       if ((so != cpu) && (cpus[so].thread_id < 0))
-                                               cpus[so].thread_id = thread_id++;
-                               }
-                       }
-               }
-       } while (character == ',');
+               if (start >= CPU_SUBSET_MAXCPUS)
+                       return 1;
+               CPU_SET_S(start, cpu_set_size, cpu_set);
+
+               if (*next == '\0' || *next == '\n')
+                       break;
+
+               if (*next == ',') {
+                       next += 1;
+                       continue;
+               }
+
+               if (*next == '-') {
+                       next += 1;      /* start range */
+               } else if (*next == '.') {
+                       next += 1;
+                       if (*next == '.')
+                               next += 1;      /* start range */
+                       else
+                               return 1;
+               }
+
+               end = strtoul(next, &next, 10);
+               if (end <= start)
+                       return 1;
+
+               while (++start <= end) {
+                       if (start >= CPU_SUBSET_MAXCPUS)
+                               return 1;
+                       CPU_SET_S(start, cpu_set_size, cpu_set);
+               }
+
+               if (*next == ',')
+                       next += 1;
+               else if (*next != '\0' && *next != '\n')
+                       return 1;
+       }
+
+       return 0;
+}
+
+int get_thread_siblings(struct cpu_topology *thiscpu)
+{
+       char path[80], character;
+       FILE *filep;
+       unsigned long map;
+       int so, shift, sib_core;
+       int cpu = thiscpu->logical_cpu_id;
+       int offset = topo.max_cpu_num + 1;
+       size_t size;
+       int thread_id = 0;
+
+       thiscpu->put_ids = CPU_ALLOC((topo.max_cpu_num + 1));
+       if (thiscpu->thread_id < 0)
+               thiscpu->thread_id = thread_id++;
+       if (!thiscpu->put_ids)
+               return -1;
+
+       size = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
+       CPU_ZERO_S(size, thiscpu->put_ids);
+
+       sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", cpu);
+       filep = fopen(path, "r");
+
+       if (!filep) {
+               warnx("%s: open failed", path);
+               return -1;
+       }
+       do {
+               offset -= BITMASK_SIZE;
+               if (fscanf(filep, "%lx%c", &map, &character) != 2)
+                       err(1, "%s: failed to parse file", path);
+               for (shift = 0; shift < BITMASK_SIZE; shift++) {
+                       if ((map >> shift) & 0x1) {
+                               so = shift + offset;
+                               sib_core = get_core_id(so);
+                               if (sib_core == thiscpu->physical_core_id) {
+                                       CPU_SET_S(so, size, thiscpu->put_ids);
+                                       if ((so != cpu) && (cpus[so].thread_id < 0))
+                                               cpus[so].thread_id = thread_id++;
+                               }
+                       }
+               }
+       } while (character == ',');
        fclose(filep);
 
        return CPU_COUNT_S(size, thiscpu->put_ids);
@@ -2998,7 +3733,7 @@ int for_all_cpus_2(int (func) (struct thread_data *, struct core_data *,
 
                                        t = GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no);
 
-                                       if (cpu_is_not_present(t->cpu_id))
+                                       if (cpu_is_not_allowed(t->cpu_id))
                                                continue;
 
                                        t2 = GET_THREAD(thread_base2, thread_no, core_no, node_no, pkg_no);
@@ -3050,11 +3785,51 @@ int for_all_proc_cpus(int (func) (int))
        return 0;
 }
 
+#define PATH_EFFECTIVE_CPUS    "/sys/fs/cgroup/cpuset.cpus.effective"
+
+static char cpu_effective_str[1024];
+
+static int update_effective_str(bool startup)
+{
+       FILE *fp;
+       char *pos;
+       char buf[1024];
+       int ret;
+
+       if (cpu_effective_str[0] == '\0' && !startup)
+               return 0;
+
+       fp = fopen(PATH_EFFECTIVE_CPUS, "r");
+       if (!fp)
+               return 0;
+
+       pos = fgets(buf, 1024, fp);
+       if (!pos)
+               err(1, "%s: file read failed\n", PATH_EFFECTIVE_CPUS);
+
+       fclose(fp);
+
+       ret = strncmp(cpu_effective_str, buf, 1024);
+       if (!ret)
+               return 0;
+
+       strncpy(cpu_effective_str, buf, 1024);
+       return 1;
+}
+
+static void update_effective_set(bool startup)
+{
+       update_effective_str(startup);
+
+       if (parse_cpu_str(cpu_effective_str, cpu_effective_set, cpu_effective_setsize))
+               err(1, "%s: cpu str malformat %s\n", PATH_EFFECTIVE_CPUS, cpu_effective_str);
+}
+
 void re_initialize(void)
 {
        free_all_buffers();
-       setup_all_buffers();
-       fprintf(outf, "turbostat: re-initialized with num_cpus %d\n", topo.num_cpus);
+       setup_all_buffers(false);
+       fprintf(outf, "turbostat: re-initialized with num_cpus %d, allowed_cpus %d\n", topo.num_cpus, topo.allowed_cpus);
 }
 
 void set_max_cpu_num(void)
@@ -3191,8 +3966,8 @@ int snapshot_gfx_rc6_ms(void)
 /*
  * snapshot_gfx_mhz()
  *
- * record snapshot of
- * /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz
+ * fall back to /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz
+ * when /sys/class/drm/card0/gt_cur_freq_mhz is not available.
  *
  * return 1 if config change requires a restart, else return 0
  */
@@ -3201,9 +3976,11 @@ int snapshot_gfx_mhz(void)
        static FILE *fp;
        int retval;
 
-       if (fp == NULL)
-               fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", "r");
-       else {
+       if (fp == NULL) {
+               fp = fopen("/sys/class/drm/card0/gt_cur_freq_mhz", "r");
+               if (!fp)
+                       fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", "r");
+       } else {
                rewind(fp);
                fflush(fp);
        }
@@ -3218,8 +3995,8 @@ int snapshot_gfx_mhz(void)
 /*
  * snapshot_gfx_cur_mhz()
  *
- * record snapshot of
- * /sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz
+ * fall back to /sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz
+ * when /sys/class/drm/card0/gt_act_freq_mhz is not available.
  *
  * return 1 if config change requires a restart, else return 0
  */
@@ -3228,9 +4005,11 @@ int snapshot_gfx_act_mhz(void)
        static FILE *fp;
        int retval;
 
-       if (fp == NULL)
-               fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", "r");
-       else {
+       if (fp == NULL) {
+               fp = fopen("/sys/class/drm/card0/gt_act_freq_mhz", "r");
+               if (!fp)
+                       fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", "r");
+       } else {
                rewind(fp);
                fflush(fp);
        }
@@ -3562,6 +4341,10 @@ restart:
                        re_initialize();
                        goto restart;
                }
+               if (update_effective_str(false)) {
+                       re_initialize();
+                       goto restart;
+               }
                do_sleep();
                if (snapshot_proc_sysfs_files())
                        goto restart;
@@ -3674,500 +4457,133 @@ void check_permissions(void)
                exit(-6);
 }
 
-/*
- * NHM adds support for additional MSRs:
- *
- * MSR_SMI_COUNT                   0x00000034
- *
- * MSR_PLATFORM_INFO               0x000000ce
- * MSR_PKG_CST_CONFIG_CONTROL     0x000000e2
- *
- * MSR_MISC_PWR_MGMT               0x000001aa
- *
- * MSR_PKG_C3_RESIDENCY            0x000003f8
- * MSR_PKG_C6_RESIDENCY            0x000003f9
- * MSR_CORE_C3_RESIDENCY           0x000003fc
- * MSR_CORE_C6_RESIDENCY           0x000003fd
- *
- * Side effect:
- * sets global pkg_cstate_limit to decode MSR_PKG_CST_CONFIG_CONTROL
- * sets has_misc_feature_control
- */
-int probe_nhm_msrs(unsigned int family, unsigned int model)
+void probe_bclk(void)
 {
        unsigned long long msr;
        unsigned int base_ratio;
-       int *pkg_cstate_limits;
 
-       if (!genuine_intel)
-               return 0;
-
-       if (family != 6)
-               return 0;
-
-       bclk = discover_bclk(family, model);
+       if (!platform->has_nhm_msrs)
+               return;
 
-       switch (model) {
-       case INTEL_FAM6_NEHALEM:        /* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */
-       case INTEL_FAM6_NEHALEM_EX:     /* Nehalem-EX Xeon - Beckton */
-               pkg_cstate_limits = nhm_pkg_cstate_limits;
-               break;
-       case INTEL_FAM6_SANDYBRIDGE:    /* SNB */
-       case INTEL_FAM6_SANDYBRIDGE_X:  /* SNB Xeon */
-       case INTEL_FAM6_IVYBRIDGE:      /* IVB */
-       case INTEL_FAM6_IVYBRIDGE_X:    /* IVB Xeon */
-               pkg_cstate_limits = snb_pkg_cstate_limits;
-               has_misc_feature_control = 1;
-               break;
-       case INTEL_FAM6_HASWELL:        /* HSW */
-       case INTEL_FAM6_HASWELL_G:      /* HSW */
-       case INTEL_FAM6_HASWELL_X:      /* HSX */
-       case INTEL_FAM6_HASWELL_L:      /* HSW */
-       case INTEL_FAM6_BROADWELL:      /* BDW */
-       case INTEL_FAM6_BROADWELL_G:    /* BDW */
-       case INTEL_FAM6_BROADWELL_X:    /* BDX */
-       case INTEL_FAM6_SKYLAKE_L:      /* SKL */
-       case INTEL_FAM6_CANNONLAKE_L:   /* CNL */
-               pkg_cstate_limits = hsw_pkg_cstate_limits;
-               has_misc_feature_control = 1;
-               break;
-       case INTEL_FAM6_SKYLAKE_X:      /* SKX */
-       case INTEL_FAM6_SAPPHIRERAPIDS_X:       /* SPR */
-               pkg_cstate_limits = skx_pkg_cstate_limits;
-               has_misc_feature_control = 1;
-               break;
-       case INTEL_FAM6_ICELAKE_X:      /* ICX */
-               pkg_cstate_limits = icx_pkg_cstate_limits;
-               has_misc_feature_control = 1;
-               break;
-       case INTEL_FAM6_ATOM_SILVERMONT:        /* BYT */
-               no_MSR_MISC_PWR_MGMT = 1;
-               /* FALLTHRU */
-       case INTEL_FAM6_ATOM_SILVERMONT_D:      /* AVN */
-               pkg_cstate_limits = slv_pkg_cstate_limits;
-               break;
-       case INTEL_FAM6_ATOM_AIRMONT:   /* AMT */
-               pkg_cstate_limits = amt_pkg_cstate_limits;
-               no_MSR_MISC_PWR_MGMT = 1;
-               break;
-       case INTEL_FAM6_XEON_PHI_KNL:   /* PHI */
-               pkg_cstate_limits = phi_pkg_cstate_limits;
-               break;
-       case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
-       case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
-       case INTEL_FAM6_ATOM_GOLDMONT_D:        /* DNV */
-       case INTEL_FAM6_ATOM_TREMONT:   /* EHL */
-       case INTEL_FAM6_ATOM_TREMONT_D: /* JVL */
-               pkg_cstate_limits = glm_pkg_cstate_limits;
-               break;
-       default:
-               return 0;
-       }
-       get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
-       pkg_cstate_limit = pkg_cstate_limits[msr & 0xF];
+       if (platform->bclk_freq == BCLK_100MHZ)
+               bclk = 100.00;
+       else if (platform->bclk_freq == BCLK_133MHZ)
+               bclk = 133.33;
+       else if (platform->bclk_freq == BCLK_SLV)
+               bclk = slm_bclk();
+       else
+               return;
 
        get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
        base_ratio = (msr >> 8) & 0xFF;
 
        base_hz = base_ratio * bclk * 1000000;
        has_base_hz = 1;
-       return 1;
-}
 
-/*
- * SLV client has support for unique MSRs:
- *
- * MSR_CC6_DEMOTION_POLICY_CONFIG
- * MSR_MC6_DEMOTION_POLICY_CONFIG
- */
+       if (platform->enable_tsc_tweak)
+               tsc_tweak = base_hz / tsc_hz;
+}
 
-int has_slv_msrs(unsigned int family, unsigned int model)
+static void remove_underbar(char *s)
 {
-       if (!genuine_intel)
-               return 0;
-
-       if (family != 6)
-               return 0;
+       char *to = s;
 
-       switch (model) {
-       case INTEL_FAM6_ATOM_SILVERMONT:
-       case INTEL_FAM6_ATOM_SILVERMONT_MID:
-       case INTEL_FAM6_ATOM_AIRMONT_MID:
-               return 1;
+       while (*s) {
+               if (*s != '_')
+                       *to++ = *s;
+               s++;
        }
-       return 0;
+
+       *to = 0;
 }
 
-int is_dnv(unsigned int family, unsigned int model)
+static void dump_turbo_ratio_info(void)
 {
+       if (!has_turbo)
+               return;
 
-       if (!genuine_intel)
-               return 0;
+       if (!platform->has_nhm_msrs)
+               return;
 
-       if (family != 6)
-               return 0;
+       if (platform->trl_msrs & TRL_LIMIT2)
+               dump_turbo_ratio_limit2();
 
-       switch (model) {
-       case INTEL_FAM6_ATOM_GOLDMONT_D:
-               return 1;
-       }
-       return 0;
-}
+       if (platform->trl_msrs & TRL_LIMIT1)
+               dump_turbo_ratio_limit1();
 
-int is_bdx(unsigned int family, unsigned int model)
-{
+       if (platform->trl_msrs & TRL_BASE) {
+               dump_turbo_ratio_limits(MSR_TURBO_RATIO_LIMIT);
 
-       if (!genuine_intel)
-               return 0;
+               if (is_hybrid)
+                       dump_turbo_ratio_limits(MSR_SECONDARY_TURBO_RATIO_LIMIT);
+       }
 
-       if (family != 6)
-               return 0;
+       if (platform->trl_msrs & TRL_ATOM)
+               dump_atom_turbo_ratio_limits();
 
-       switch (model) {
-       case INTEL_FAM6_BROADWELL_X:
-               return 1;
-       }
-       return 0;
+       if (platform->trl_msrs & TRL_KNL)
+               dump_knl_turbo_ratio_limits();
+
+       if (platform->has_config_tdp)
+               dump_config_tdp();
 }
 
-int is_skx(unsigned int family, unsigned int model)
+static int read_sysfs_int(char *path)
 {
+       FILE *input;
+       int retval = -1;
 
-       if (!genuine_intel)
-               return 0;
-
-       if (family != 6)
-               return 0;
-
-       switch (model) {
-       case INTEL_FAM6_SKYLAKE_X:
-               return 1;
+       input = fopen(path, "r");
+       if (input == NULL) {
+               if (debug)
+                       fprintf(outf, "NSFOD %s\n", path);
+               return (-1);
        }
-       return 0;
+       if (fscanf(input, "%d", &retval) != 1)
+               err(1, "%s: failed to read int from file", path);
+       fclose(input);
+
+       return (retval);
 }
 
-int is_icx(unsigned int family, unsigned int model)
+static void dump_sysfs_file(char *path)
 {
+       FILE *input;
+       char cpuidle_buf[64];
 
-       if (!genuine_intel)
-               return 0;
-
-       if (family != 6)
-               return 0;
-
-       switch (model) {
-       case INTEL_FAM6_ICELAKE_X:
-               return 1;
+       input = fopen(path, "r");
+       if (input == NULL) {
+               if (debug)
+                       fprintf(outf, "NSFOD %s\n", path);
+               return;
        }
-       return 0;
+       if (!fgets(cpuidle_buf, sizeof(cpuidle_buf), input))
+               err(1, "%s: failed to read file", path);
+       fclose(input);
+
+       fprintf(outf, "%s: %s", strrchr(path, '/') + 1, cpuidle_buf);
 }
 
-int is_spr(unsigned int family, unsigned int model)
+static void probe_intel_uncore_frequency(void)
 {
+       int i, j;
+       char path[128];
 
        if (!genuine_intel)
-               return 0;
+               return;
 
-       if (family != 6)
-               return 0;
+       if (access("/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00", R_OK))
+               return;
 
-       switch (model) {
-       case INTEL_FAM6_SAPPHIRERAPIDS_X:
-               return 1;
-       }
-       return 0;
-}
+       /* Cluster level sysfs not supported yet. */
+       if (!access("/sys/devices/system/cpu/intel_uncore_frequency/uncore00", R_OK))
+               return;
 
-int is_ehl(unsigned int family, unsigned int model)
-{
-       if (!genuine_intel)
-               return 0;
+       if (!access("/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00/current_freq_khz", R_OK))
+               BIC_PRESENT(BIC_UNCORE_MHZ);
 
-       if (family != 6)
-               return 0;
-
-       switch (model) {
-       case INTEL_FAM6_ATOM_TREMONT:
-               return 1;
-       }
-       return 0;
-}
-
-int is_jvl(unsigned int family, unsigned int model)
-{
-       if (!genuine_intel)
-               return 0;
-
-       if (family != 6)
-               return 0;
-
-       switch (model) {
-       case INTEL_FAM6_ATOM_TREMONT_D:
-               return 1;
-       }
-       return 0;
-}
-
-int has_turbo_ratio_limit(unsigned int family, unsigned int model)
-{
-       if (has_slv_msrs(family, model))
-               return 0;
-
-       if (family != 6)
-               return 0;
-
-       switch (model) {
-               /* Nehalem compatible, but do not include turbo-ratio limit support */
-       case INTEL_FAM6_NEHALEM_EX:     /* Nehalem-EX Xeon - Beckton */
-       case INTEL_FAM6_XEON_PHI_KNL:   /* PHI - Knights Landing (different MSR definition) */
-               return 0;
-       default:
-               return 1;
-       }
-}
-
-int has_atom_turbo_ratio_limit(unsigned int family, unsigned int model)
-{
-       if (has_slv_msrs(family, model))
-               return 1;
-
-       return 0;
-}
-
-int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model)
-{
-       if (!genuine_intel)
-               return 0;
-
-       if (family != 6)
-               return 0;
-
-       switch (model) {
-       case INTEL_FAM6_IVYBRIDGE_X:    /* IVB Xeon */
-       case INTEL_FAM6_HASWELL_X:      /* HSW Xeon */
-               return 1;
-       default:
-               return 0;
-       }
-}
-
-int has_hsw_turbo_ratio_limit(unsigned int family, unsigned int model)
-{
-       if (!genuine_intel)
-               return 0;
-
-       if (family != 6)
-               return 0;
-
-       switch (model) {
-       case INTEL_FAM6_HASWELL_X:      /* HSW Xeon */
-               return 1;
-       default:
-               return 0;
-       }
-}
-
-int has_knl_turbo_ratio_limit(unsigned int family, unsigned int model)
-{
-       if (!genuine_intel)
-               return 0;
-
-       if (family != 6)
-               return 0;
-
-       switch (model) {
-       case INTEL_FAM6_XEON_PHI_KNL:   /* Knights Landing */
-               return 1;
-       default:
-               return 0;
-       }
-}
-
-int has_glm_turbo_ratio_limit(unsigned int family, unsigned int model)
-{
-       if (!genuine_intel)
-               return 0;
-
-       if (family != 6)
-               return 0;
-
-       switch (model) {
-       case INTEL_FAM6_ATOM_GOLDMONT:
-       case INTEL_FAM6_SKYLAKE_X:
-       case INTEL_FAM6_ICELAKE_X:
-       case INTEL_FAM6_SAPPHIRERAPIDS_X:
-               return 1;
-       default:
-               return 0;
-       }
-}
-
-int has_config_tdp(unsigned int family, unsigned int model)
-{
-       if (!genuine_intel)
-               return 0;
-
-       if (family != 6)
-               return 0;
-
-       switch (model) {
-       case INTEL_FAM6_IVYBRIDGE:      /* IVB */
-       case INTEL_FAM6_HASWELL:        /* HSW */
-       case INTEL_FAM6_HASWELL_X:      /* HSX */
-       case INTEL_FAM6_HASWELL_L:      /* HSW */
-       case INTEL_FAM6_HASWELL_G:      /* HSW */
-       case INTEL_FAM6_BROADWELL:      /* BDW */
-       case INTEL_FAM6_BROADWELL_G:    /* BDW */
-       case INTEL_FAM6_BROADWELL_X:    /* BDX */
-       case INTEL_FAM6_SKYLAKE_L:      /* SKL */
-       case INTEL_FAM6_CANNONLAKE_L:   /* CNL */
-       case INTEL_FAM6_SKYLAKE_X:      /* SKX */
-       case INTEL_FAM6_ICELAKE_X:      /* ICX */
-       case INTEL_FAM6_SAPPHIRERAPIDS_X:       /* SPR */
-       case INTEL_FAM6_XEON_PHI_KNL:   /* Knights Landing */
-               return 1;
-       default:
-               return 0;
-       }
-}
-
-/*
- * tcc_offset_bits:
- * 0: Tcc Offset not supported (Default)
- * 6: Bit 29:24 of MSR_PLATFORM_INFO
- * 4: Bit 27:24 of MSR_PLATFORM_INFO
- */
-void check_tcc_offset(int model)
-{
-       unsigned long long msr;
-
-       if (!genuine_intel)
-               return;
-
-       switch (model) {
-       case INTEL_FAM6_SKYLAKE_L:
-       case INTEL_FAM6_SKYLAKE:
-       case INTEL_FAM6_KABYLAKE_L:
-       case INTEL_FAM6_KABYLAKE:
-       case INTEL_FAM6_ICELAKE_L:
-       case INTEL_FAM6_ICELAKE:
-       case INTEL_FAM6_TIGERLAKE_L:
-       case INTEL_FAM6_TIGERLAKE:
-       case INTEL_FAM6_COMETLAKE:
-               if (!get_msr(base_cpu, MSR_PLATFORM_INFO, &msr)) {
-                       msr = (msr >> 30) & 1;
-                       if (msr)
-                               tcc_offset_bits = 6;
-               }
-               return;
-       default:
-               return;
-       }
-}
-
-static void remove_underbar(char *s)
-{
-       char *to = s;
-
-       while (*s) {
-               if (*s != '_')
-                       *to++ = *s;
-               s++;
-       }
-
-       *to = 0;
-}
-
-static void dump_turbo_ratio_info(unsigned int family, unsigned int model)
-{
-       if (!has_turbo)
-               return;
-
-       if (has_hsw_turbo_ratio_limit(family, model))
-               dump_hsw_turbo_ratio_limits();
-
-       if (has_ivt_turbo_ratio_limit(family, model))
-               dump_ivt_turbo_ratio_limits();
-
-       if (has_turbo_ratio_limit(family, model)) {
-               dump_turbo_ratio_limits(MSR_TURBO_RATIO_LIMIT, family, model);
-
-               if (is_hybrid)
-                       dump_turbo_ratio_limits(MSR_SECONDARY_TURBO_RATIO_LIMIT, family, model);
-       }
-
-       if (has_atom_turbo_ratio_limit(family, model))
-               dump_atom_turbo_ratio_limits();
-
-       if (has_knl_turbo_ratio_limit(family, model))
-               dump_knl_turbo_ratio_limits();
-
-       if (has_config_tdp(family, model))
-               dump_config_tdp();
-}
-
-static void dump_cstate_pstate_config_info(unsigned int family, unsigned int model)
-{
-       if (!do_nhm_platform_info)
-               return;
-
-       dump_nhm_platform_info();
-       dump_turbo_ratio_info(family, model);
-       dump_nhm_cst_cfg();
-}
-
-static int read_sysfs_int(char *path)
-{
-       FILE *input;
-       int retval = -1;
-
-       input = fopen(path, "r");
-       if (input == NULL) {
-               if (debug)
-                       fprintf(outf, "NSFOD %s\n", path);
-               return (-1);
-       }
-       if (fscanf(input, "%d", &retval) != 1)
-               err(1, "%s: failed to read int from file", path);
-       fclose(input);
-
-       return (retval);
-}
-
-static void dump_sysfs_file(char *path)
-{
-       FILE *input;
-       char cpuidle_buf[64];
-
-       input = fopen(path, "r");
-       if (input == NULL) {
-               if (debug)
-                       fprintf(outf, "NSFOD %s\n", path);
-               return;
-       }
-       if (!fgets(cpuidle_buf, sizeof(cpuidle_buf), input))
-               err(1, "%s: failed to read file", path);
-       fclose(input);
-
-       fprintf(outf, "%s: %s", strrchr(path, '/') + 1, cpuidle_buf);
-}
-
-static void intel_uncore_frequency_probe(void)
-{
-       int i, j;
-       char path[128];
-
-       if (!genuine_intel)
-               return;
-
-       if (access("/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00", R_OK))
-               return;
-
-       if (!access("/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00/current_freq_khz", R_OK))
-               BIC_PRESENT(BIC_UNCORE_MHZ);
-
-       if (quiet)
-               return;
+       if (quiet)
+               return;
 
        for (i = 0; i < topo.num_packages; ++i) {
                for (j = 0; j < topo.num_die; ++j) {
@@ -4194,6 +4610,20 @@ static void intel_uncore_frequency_probe(void)
        }
 }
 
+static void probe_graphics(void)
+{
+       if (!access("/sys/class/drm/card0/power/rc6_residency_ms", R_OK))
+               BIC_PRESENT(BIC_GFX_rc6);
+
+       if (!access("/sys/class/drm/card0/gt_cur_freq_mhz", R_OK) ||
+           !access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK))
+               BIC_PRESENT(BIC_GFXMHz);
+
+       if (!access("/sys/class/drm/card0/gt_act_freq_mhz", R_OK) ||
+           !access("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", R_OK))
+               BIC_PRESENT(BIC_GFXACTMHz);
+}
+
 static void dump_sysfs_cstate_config(void)
 {
        char path[64];
@@ -4310,7 +4740,7 @@ int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p)
        cpu = t->cpu_id;
 
        /* EPB is per-package */
-       if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
+       if (!is_cpu_first_thread_in_package(t, c, p))
                return 0;
 
        if (cpu_migrate(cpu)) {
@@ -4359,7 +4789,7 @@ int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p)
        cpu = t->cpu_id;
 
        /* MSR_HWP_CAPABILITIES is per-package */
-       if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
+       if (!is_cpu_first_thread_in_package(t, c, p))
                return 0;
 
        if (cpu_migrate(cpu)) {
@@ -4442,7 +4872,7 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data
        cpu = t->cpu_id;
 
        /* per-package */
-       if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
+       if (!is_cpu_first_thread_in_package(t, c, p))
                return 0;
 
        if (cpu_migrate(cpu)) {
@@ -4450,7 +4880,7 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data
                return -1;
        }
 
-       if (do_core_perf_limit_reasons) {
+       if (platform->plr_msrs & PLR_CORE) {
                get_msr(cpu, MSR_CORE_PERF_LIMIT_REASONS, &msr);
                fprintf(outf, "cpu%d: MSR_CORE_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
                fprintf(outf, " (Active: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)",
@@ -4483,7 +4913,7 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data
                        (msr & 1 << 17) ? "ThermStatus, " : "", (msr & 1 << 16) ? "PROCHOT, " : "");
 
        }
-       if (do_gfx_perf_limit_reasons) {
+       if (platform->plr_msrs & PLR_GFX) {
                get_msr(cpu, MSR_GFX_PERF_LIMIT_REASONS, &msr);
                fprintf(outf, "cpu%d: MSR_GFX_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
                fprintf(outf, " (Active: %s%s%s%s%s%s%s%s)",
@@ -4503,7 +4933,7 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data
                        (msr & 1 << 25) ? "GFXPwr, " : "",
                        (msr & 1 << 26) ? "PkgPwrL1, " : "", (msr & 1 << 27) ? "PkgPwrL2, " : "");
        }
-       if (do_ring_perf_limit_reasons) {
+       if (platform->plr_msrs & PLR_RING) {
                get_msr(cpu, MSR_RING_PERF_LIMIT_REASONS, &msr);
                fprintf(outf, "cpu%d: MSR_RING_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
                fprintf(outf, " (Active: %s%s%s%s%s%s)",
@@ -4525,208 +4955,74 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data
 #define        RAPL_POWER_GRANULARITY  0x7FFF  /* 15 bit power granularity */
 #define        RAPL_TIME_GRANULARITY   0x3F    /* 6 bit time granularity */
 
-double get_tdp_intel(unsigned int model)
+double get_quirk_tdp(void)
 {
-       unsigned long long msr;
-
-       if (do_rapl & RAPL_PKG_POWER_INFO)
-               if (!get_msr(base_cpu, MSR_PKG_POWER_INFO, &msr))
-                       return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units;
+       if (platform->rapl_quirk_tdp)
+               return platform->rapl_quirk_tdp;
 
-       switch (model) {
-       case INTEL_FAM6_ATOM_SILVERMONT:
-       case INTEL_FAM6_ATOM_SILVERMONT_D:
-               return 30.0;
-       default:
-               return 135.0;
-       }
+       return 135.0;
 }
 
-double get_tdp_amd(unsigned int family)
+double get_tdp_intel(void)
 {
-       UNUSED(family);
+       unsigned long long msr;
 
-       /* This is the max stock TDP of HEDT/Server Fam17h+ chips */
-       return 280.0;
+       if (platform->rapl_msrs & RAPL_PKG_POWER_INFO)
+               if (!get_msr(base_cpu, MSR_PKG_POWER_INFO, &msr))
+                       return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units;
+       return get_quirk_tdp();
 }
 
-/*
- * rapl_dram_energy_units_probe()
- * Energy units are either hard-coded, or come from RAPL Energy Unit MSR.
- */
-static double rapl_dram_energy_units_probe(int model, double rapl_energy_units)
+double get_tdp_amd(void)
 {
-       /* only called for genuine_intel, family 6 */
-
-       switch (model) {
-       case INTEL_FAM6_HASWELL_X:      /* HSX */
-       case INTEL_FAM6_BROADWELL_X:    /* BDX */
-       case INTEL_FAM6_SKYLAKE_X:      /* SKX */
-       case INTEL_FAM6_XEON_PHI_KNL:   /* KNL */
-       case INTEL_FAM6_ICELAKE_X:      /* ICX */
-               return (rapl_dram_energy_units = 15.3 / 1000000);
-       default:
-               return (rapl_energy_units);
-       }
+       return get_quirk_tdp();
 }
 
-void rapl_probe_intel(unsigned int family, unsigned int model)
+void rapl_probe_intel(void)
 {
        unsigned long long msr;
        unsigned int time_unit;
        double tdp;
 
-       if (family != 6)
-               return;
-
-       switch (model) {
-       case INTEL_FAM6_SANDYBRIDGE:
-       case INTEL_FAM6_IVYBRIDGE:
-       case INTEL_FAM6_HASWELL:        /* HSW */
-       case INTEL_FAM6_HASWELL_L:      /* HSW */
-       case INTEL_FAM6_HASWELL_G:      /* HSW */
-       case INTEL_FAM6_BROADWELL:      /* BDW */
-       case INTEL_FAM6_BROADWELL_G:    /* BDW */
-               do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_GFX | RAPL_PKG_POWER_INFO;
-               if (rapl_joules) {
-                       BIC_PRESENT(BIC_Pkg_J);
-                       BIC_PRESENT(BIC_Cor_J);
-                       BIC_PRESENT(BIC_GFX_J);
-               } else {
-                       BIC_PRESENT(BIC_PkgWatt);
-                       BIC_PRESENT(BIC_CorWatt);
-                       BIC_PRESENT(BIC_GFXWatt);
-               }
-               break;
-       case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
-       case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
-               do_rapl = RAPL_PKG | RAPL_PKG_POWER_INFO;
-               if (rapl_joules)
-                       BIC_PRESENT(BIC_Pkg_J);
-               else
-                       BIC_PRESENT(BIC_PkgWatt);
-               break;
-       case INTEL_FAM6_ATOM_TREMONT:   /* EHL */
-               do_rapl =
-                   RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS
-                   | RAPL_GFX | RAPL_PKG_POWER_INFO;
-               if (rapl_joules) {
-                       BIC_PRESENT(BIC_Pkg_J);
-                       BIC_PRESENT(BIC_Cor_J);
-                       BIC_PRESENT(BIC_RAM_J);
-                       BIC_PRESENT(BIC_GFX_J);
-               } else {
-                       BIC_PRESENT(BIC_PkgWatt);
-                       BIC_PRESENT(BIC_CorWatt);
-                       BIC_PRESENT(BIC_RAMWatt);
-                       BIC_PRESENT(BIC_GFXWatt);
-               }
-               break;
-       case INTEL_FAM6_ATOM_TREMONT_D: /* JVL */
-               do_rapl = RAPL_PKG | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO;
-               BIC_PRESENT(BIC_PKG__);
-               if (rapl_joules)
-                       BIC_PRESENT(BIC_Pkg_J);
-               else
-                       BIC_PRESENT(BIC_PkgWatt);
-               break;
-       case INTEL_FAM6_SKYLAKE_L:      /* SKL */
-       case INTEL_FAM6_CANNONLAKE_L:   /* CNL */
-               do_rapl =
-                   RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS
-                   | RAPL_GFX | RAPL_PKG_POWER_INFO;
-               BIC_PRESENT(BIC_PKG__);
-               BIC_PRESENT(BIC_RAM__);
-               if (rapl_joules) {
+       if (rapl_joules) {
+               if (platform->rapl_msrs & RAPL_PKG_ENERGY_STATUS)
                        BIC_PRESENT(BIC_Pkg_J);
+               if (platform->rapl_msrs & RAPL_CORE_ENERGY_STATUS)
                        BIC_PRESENT(BIC_Cor_J);
+               if (platform->rapl_msrs & RAPL_DRAM_ENERGY_STATUS)
                        BIC_PRESENT(BIC_RAM_J);
+               if (platform->rapl_msrs & RAPL_GFX_ENERGY_STATUS)
                        BIC_PRESENT(BIC_GFX_J);
-               } else {
+       } else {
+               if (platform->rapl_msrs & RAPL_PKG_ENERGY_STATUS)
                        BIC_PRESENT(BIC_PkgWatt);
+               if (platform->rapl_msrs & RAPL_CORE_ENERGY_STATUS)
                        BIC_PRESENT(BIC_CorWatt);
+               if (platform->rapl_msrs & RAPL_DRAM_ENERGY_STATUS)
                        BIC_PRESENT(BIC_RAMWatt);
+               if (platform->rapl_msrs & RAPL_GFX_ENERGY_STATUS)
                        BIC_PRESENT(BIC_GFXWatt);
-               }
-               break;
-       case INTEL_FAM6_HASWELL_X:      /* HSX */
-       case INTEL_FAM6_BROADWELL_X:    /* BDX */
-       case INTEL_FAM6_SKYLAKE_X:      /* SKX */
-       case INTEL_FAM6_ICELAKE_X:      /* ICX */
-       case INTEL_FAM6_SAPPHIRERAPIDS_X:       /* SPR */
-       case INTEL_FAM6_XEON_PHI_KNL:   /* KNL */
-               do_rapl =
-                   RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS |
-                   RAPL_PKG_POWER_INFO;
+       }
+
+       if (platform->rapl_msrs & RAPL_PKG_PERF_STATUS)
                BIC_PRESENT(BIC_PKG__);
+       if (platform->rapl_msrs & RAPL_DRAM_PERF_STATUS)
                BIC_PRESENT(BIC_RAM__);
-               if (rapl_joules) {
-                       BIC_PRESENT(BIC_Pkg_J);
-                       BIC_PRESENT(BIC_RAM_J);
-               } else {
-                       BIC_PRESENT(BIC_PkgWatt);
-                       BIC_PRESENT(BIC_RAMWatt);
-               }
-               break;
-       case INTEL_FAM6_SANDYBRIDGE_X:
-       case INTEL_FAM6_IVYBRIDGE_X:
-               do_rapl =
-                   RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_PKG_PERF_STATUS |
-                   RAPL_DRAM_PERF_STATUS | RAPL_PKG_POWER_INFO;
-               BIC_PRESENT(BIC_PKG__);
-               BIC_PRESENT(BIC_RAM__);
-               if (rapl_joules) {
-                       BIC_PRESENT(BIC_Pkg_J);
-                       BIC_PRESENT(BIC_Cor_J);
-                       BIC_PRESENT(BIC_RAM_J);
-               } else {
-                       BIC_PRESENT(BIC_PkgWatt);
-                       BIC_PRESENT(BIC_CorWatt);
-                       BIC_PRESENT(BIC_RAMWatt);
-               }
-               break;
-       case INTEL_FAM6_ATOM_SILVERMONT:        /* BYT */
-       case INTEL_FAM6_ATOM_SILVERMONT_D:      /* AVN */
-               do_rapl = RAPL_PKG | RAPL_CORES;
-               if (rapl_joules) {
-                       BIC_PRESENT(BIC_Pkg_J);
-                       BIC_PRESENT(BIC_Cor_J);
-               } else {
-                       BIC_PRESENT(BIC_PkgWatt);
-                       BIC_PRESENT(BIC_CorWatt);
-               }
-               break;
-       case INTEL_FAM6_ATOM_GOLDMONT_D:        /* DNV */
-               do_rapl =
-                   RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS |
-                   RAPL_PKG_POWER_INFO | RAPL_CORES_ENERGY_STATUS;
-               BIC_PRESENT(BIC_PKG__);
-               BIC_PRESENT(BIC_RAM__);
-               if (rapl_joules) {
-                       BIC_PRESENT(BIC_Pkg_J);
-                       BIC_PRESENT(BIC_Cor_J);
-                       BIC_PRESENT(BIC_RAM_J);
-               } else {
-                       BIC_PRESENT(BIC_PkgWatt);
-                       BIC_PRESENT(BIC_CorWatt);
-                       BIC_PRESENT(BIC_RAMWatt);
-               }
-               break;
-       default:
-               return;
-       }
 
        /* units on package 0, verify later other packages match */
        if (get_msr(base_cpu, MSR_RAPL_POWER_UNIT, &msr))
                return;
 
        rapl_power_units = 1.0 / (1 << (msr & 0xF));
-       if (model == INTEL_FAM6_ATOM_SILVERMONT)
+       if (platform->has_rapl_divisor)
                rapl_energy_units = 1.0 * (1 << (msr >> 8 & 0x1F)) / 1000000;
        else
                rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F));
 
-       rapl_dram_energy_units = rapl_dram_energy_units_probe(model, rapl_energy_units);
+       if (platform->has_fixed_rapl_unit)
+               rapl_dram_energy_units = (15.3 / 1000000);
+       else
+               rapl_dram_energy_units = rapl_energy_units;
 
        time_unit = msr >> 16 & 0xF;
        if (time_unit == 0)
@@ -4734,32 +5030,18 @@ void rapl_probe_intel(unsigned int family, unsigned int model)
 
        rapl_time_units = 1.0 / (1 << (time_unit));
 
-       tdp = get_tdp_intel(model);
+       tdp = get_tdp_intel();
 
        rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
        if (!quiet)
                fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp);
 }
 
-void rapl_probe_amd(unsigned int family, unsigned int model)
+void rapl_probe_amd(void)
 {
        unsigned long long msr;
-       unsigned int eax, ebx, ecx, edx;
-       unsigned int has_rapl = 0;
        double tdp;
 
-       UNUSED(model);
-
-       if (max_extended_level >= 0x80000007) {
-               __cpuid(0x80000007, eax, ebx, ecx, edx);
-               /* RAPL (Fam 17h+) */
-               has_rapl = edx & (1 << 14);
-       }
-
-       if (!has_rapl || family < 0x17)
-               return;
-
-       do_rapl = RAPL_AMD_F17H | RAPL_PER_CORE_ENERGY;
        if (rapl_joules) {
                BIC_PRESENT(BIC_Pkg_J);
                BIC_PRESENT(BIC_Cor_J);
@@ -4775,128 +5057,13 @@ void rapl_probe_amd(unsigned int family, unsigned int model)
        rapl_energy_units = ldexp(1.0, -(msr >> 8 & 0x1f));
        rapl_power_units = ldexp(1.0, -(msr & 0xf));
 
-       tdp = get_tdp_amd(family);
+       tdp = get_tdp_amd();
 
        rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
        if (!quiet)
                fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp);
 }
 
-/*
- * rapl_probe()
- *
- * sets do_rapl, rapl_power_units, rapl_energy_units, rapl_time_units
- */
-void rapl_probe(unsigned int family, unsigned int model)
-{
-       if (genuine_intel)
-               rapl_probe_intel(family, model);
-       if (authentic_amd || hygon_genuine)
-               rapl_probe_amd(family, model);
-}
-
-void perf_limit_reasons_probe(unsigned int family, unsigned int model)
-{
-       if (!genuine_intel)
-               return;
-
-       if (family != 6)
-               return;
-
-       switch (model) {
-       case INTEL_FAM6_HASWELL:        /* HSW */
-       case INTEL_FAM6_HASWELL_L:      /* HSW */
-       case INTEL_FAM6_HASWELL_G:      /* HSW */
-               do_gfx_perf_limit_reasons = 1;
-               /* FALLTHRU */
-       case INTEL_FAM6_HASWELL_X:      /* HSX */
-               do_core_perf_limit_reasons = 1;
-               do_ring_perf_limit_reasons = 1;
-       default:
-               return;
-       }
-}
-
-void automatic_cstate_conversion_probe(unsigned int family, unsigned int model)
-{
-       if (family != 6)
-               return;
-
-       switch (model) {
-       case INTEL_FAM6_BROADWELL_X:
-       case INTEL_FAM6_SKYLAKE_X:
-               has_automatic_cstate_conversion = 1;
-       }
-}
-
-void prewake_cstate_probe(unsigned int family, unsigned int model)
-{
-       if (is_icx(family, model) || is_spr(family, model))
-               dis_cstate_prewake = 1;
-}
-
-int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p)
-{
-       unsigned long long msr;
-       unsigned int dts, dts2;
-       int cpu;
-
-       UNUSED(c);
-       UNUSED(p);
-
-       if (!(do_dts || do_ptm))
-               return 0;
-
-       cpu = t->cpu_id;
-
-       /* DTS is per-core, no need to print for each thread */
-       if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
-               return 0;
-
-       if (cpu_migrate(cpu)) {
-               fprintf(outf, "print_thermal: Could not migrate to CPU %d\n", cpu);
-               return -1;
-       }
-
-       if (do_ptm && (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) {
-               if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
-                       return 0;
-
-               dts = (msr >> 16) & 0x7F;
-               fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n", cpu, msr, tj_max - dts);
-
-               if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &msr))
-                       return 0;
-
-               dts = (msr >> 16) & 0x7F;
-               dts2 = (msr >> 8) & 0x7F;
-               fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
-                       cpu, msr, tj_max - dts, tj_max - dts2);
-       }
-
-       if (do_dts && debug) {
-               unsigned int resolution;
-
-               if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
-                       return 0;
-
-               dts = (msr >> 16) & 0x7F;
-               resolution = (msr >> 27) & 0xF;
-               fprintf(outf, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n",
-                       cpu, msr, tj_max - dts, resolution);
-
-               if (get_msr(cpu, MSR_IA32_THERM_INTERRUPT, &msr))
-                       return 0;
-
-               dts = (msr >> 16) & 0x7F;
-               dts2 = (msr >> 8) & 0x7F;
-               fprintf(outf, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
-                       cpu, msr, tj_max - dts, tj_max - dts2);
-       }
-
-       return 0;
-}
-
 void print_power_limit_msr(int cpu, unsigned long long msr, char *label)
 {
        fprintf(outf, "cpu%d: %s: %sabled (%0.3f Watts, %f sec, clamp %sabled)\n",
@@ -4918,11 +5085,11 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
        UNUSED(c);
        UNUSED(p);
 
-       if (!do_rapl)
+       if (!platform->rapl_msrs)
                return 0;
 
        /* RAPL counters are per package, so print only for 1st thread/package */
-       if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
+       if (!is_cpu_first_thread_in_package(t, c, p))
                return 0;
 
        cpu = t->cpu_id;
@@ -4931,7 +5098,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
                return -1;
        }
 
-       if (do_rapl & RAPL_AMD_F17H) {
+       if (platform->rapl_msrs & RAPL_AMD_F17H) {
                msr_name = "MSR_RAPL_PWR_UNIT";
                if (get_msr(cpu, MSR_RAPL_PWR_UNIT, &msr))
                        return -1;
@@ -4944,7 +5111,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
        fprintf(outf, "cpu%d: %s: 0x%08llx (%f Watts, %f Joules, %f sec.)\n", cpu, msr_name, msr,
                rapl_power_units, rapl_energy_units, rapl_time_units);
 
-       if (do_rapl & RAPL_PKG_POWER_INFO) {
+       if (platform->rapl_msrs & RAPL_PKG_POWER_INFO) {
 
                if (get_msr(cpu, MSR_PKG_POWER_INFO, &msr))
                        return -5;
@@ -4957,7 +5124,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
                        ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
 
        }
-       if (do_rapl & RAPL_PKG) {
+       if (platform->rapl_msrs & RAPL_PKG) {
 
                if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr))
                        return -9;
@@ -4981,7 +5148,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
                        cpu, ((msr >> 0) & 0x1FFF) * rapl_power_units, (msr >> 31) & 1 ? "" : "UN");
        }
 
-       if (do_rapl & RAPL_DRAM_POWER_INFO) {
+       if (platform->rapl_msrs & RAPL_DRAM_POWER_INFO) {
                if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr))
                        return -6;
 
@@ -4992,7 +5159,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
                        ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
                        ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
        }
-       if (do_rapl & RAPL_DRAM) {
+       if (platform->rapl_msrs & RAPL_DRAM) {
                if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr))
                        return -9;
                fprintf(outf, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n",
@@ -5000,20 +5167,20 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 
                print_power_limit_msr(cpu, msr, "DRAM Limit");
        }
-       if (do_rapl & RAPL_CORE_POLICY) {
+       if (platform->rapl_msrs & RAPL_CORE_POLICY) {
                if (get_msr(cpu, MSR_PP0_POLICY, &msr))
                        return -7;
 
                fprintf(outf, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF);
        }
-       if (do_rapl & RAPL_CORES_POWER_LIMIT) {
+       if (platform->rapl_msrs & RAPL_CORE_POWER_LIMIT) {
                if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr))
                        return -9;
                fprintf(outf, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n",
                        cpu, msr, (msr >> 31) & 1 ? "" : "UN");
                print_power_limit_msr(cpu, msr, "Cores Limit");
        }
-       if (do_rapl & RAPL_GFX) {
+       if (platform->rapl_msrs & RAPL_GFX) {
                if (get_msr(cpu, MSR_PP1_POLICY, &msr))
                        return -8;
 
@@ -5029,217 +5196,24 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 }
 
 /*
- * SNB adds support for additional MSRs:
- *
- * MSR_PKG_C7_RESIDENCY            0x000003fa
- * MSR_CORE_C7_RESIDENCY           0x000003fe
- * MSR_PKG_C2_RESIDENCY            0x0000060d
- */
-
-int has_snb_msrs(unsigned int family, unsigned int model)
-{
-       if (!genuine_intel)
-               return 0;
-
-       if (family != 6)
-               return 0;
-
-       switch (model) {
-       case INTEL_FAM6_SANDYBRIDGE:
-       case INTEL_FAM6_SANDYBRIDGE_X:
-       case INTEL_FAM6_IVYBRIDGE:      /* IVB */
-       case INTEL_FAM6_IVYBRIDGE_X:    /* IVB Xeon */
-       case INTEL_FAM6_HASWELL:        /* HSW */
-       case INTEL_FAM6_HASWELL_X:      /* HSW */
-       case INTEL_FAM6_HASWELL_L:      /* HSW */
-       case INTEL_FAM6_HASWELL_G:      /* HSW */
-       case INTEL_FAM6_BROADWELL:      /* BDW */
-       case INTEL_FAM6_BROADWELL_G:    /* BDW */
-       case INTEL_FAM6_BROADWELL_X:    /* BDX */
-       case INTEL_FAM6_SKYLAKE_L:      /* SKL */
-       case INTEL_FAM6_CANNONLAKE_L:   /* CNL */
-       case INTEL_FAM6_SKYLAKE_X:      /* SKX */
-       case INTEL_FAM6_ICELAKE_X:      /* ICX */
-       case INTEL_FAM6_SAPPHIRERAPIDS_X:       /* SPR */
-       case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
-       case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
-       case INTEL_FAM6_ATOM_GOLDMONT_D:        /* DNV */
-       case INTEL_FAM6_ATOM_TREMONT:   /* EHL */
-       case INTEL_FAM6_ATOM_TREMONT_D: /* JVL */
-               return 1;
-       }
-       return 0;
-}
-
-/*
- * HSW ULT added support for C8/C9/C10 MSRs:
- *
- * MSR_PKG_C8_RESIDENCY                0x00000630
- * MSR_PKG_C9_RESIDENCY                0x00000631
- * MSR_PKG_C10_RESIDENCY       0x00000632
- *
- * MSR_PKGC8_IRTL              0x00000633
- * MSR_PKGC9_IRTL              0x00000634
- * MSR_PKGC10_IRTL             0x00000635
- *
- */
-int has_c8910_msrs(unsigned int family, unsigned int model)
-{
-       if (!genuine_intel)
-               return 0;
-
-       if (family != 6)
-               return 0;
-
-       switch (model) {
-       case INTEL_FAM6_HASWELL_L:      /* HSW */
-       case INTEL_FAM6_BROADWELL:      /* BDW */
-       case INTEL_FAM6_SKYLAKE_L:      /* SKL */
-       case INTEL_FAM6_CANNONLAKE_L:   /* CNL */
-       case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
-       case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
-       case INTEL_FAM6_ATOM_TREMONT:   /* EHL */
-               return 1;
-       }
-       return 0;
-}
-
-/*
- * SKL adds support for additional MSRS:
+ * probe_rapl()
  *
- * MSR_PKG_WEIGHTED_CORE_C0_RES    0x00000658
- * MSR_PKG_ANY_CORE_C0_RES         0x00000659
- * MSR_PKG_ANY_GFXE_C0_RES         0x0000065A
- * MSR_PKG_BOTH_CORE_GFXE_C0_RES   0x0000065B
+ * sets rapl_power_units, rapl_energy_units, rapl_time_units
  */
-int has_skl_msrs(unsigned int family, unsigned int model)
-{
-       if (!genuine_intel)
-               return 0;
-
-       if (family != 6)
-               return 0;
-
-       switch (model) {
-       case INTEL_FAM6_SKYLAKE_L:      /* SKL */
-       case INTEL_FAM6_CANNONLAKE_L:   /* CNL */
-               return 1;
-       }
-       return 0;
-}
-
-int is_slm(unsigned int family, unsigned int model)
-{
-       if (!genuine_intel)
-               return 0;
-
-       if (family != 6)
-               return 0;
-
-       switch (model) {
-       case INTEL_FAM6_ATOM_SILVERMONT:        /* BYT */
-       case INTEL_FAM6_ATOM_SILVERMONT_D:      /* AVN */
-               return 1;
-       }
-       return 0;
-}
-
-int is_knl(unsigned int family, unsigned int model)
-{
-       if (!genuine_intel)
-               return 0;
-
-       if (family != 6)
-               return 0;
-
-       switch (model) {
-       case INTEL_FAM6_XEON_PHI_KNL:   /* KNL */
-               return 1;
-       }
-       return 0;
-}
-
-int is_cnl(unsigned int family, unsigned int model)
-{
-       if (!genuine_intel)
-               return 0;
-
-       if (family != 6)
-               return 0;
-
-       switch (model) {
-       case INTEL_FAM6_CANNONLAKE_L:   /* CNL */
-               return 1;
-       }
-
-       return 0;
-}
-
-unsigned int get_aperf_mperf_multiplier(unsigned int family, unsigned int model)
-{
-       if (is_knl(family, model))
-               return 1024;
-       return 1;
-}
-
-#define SLM_BCLK_FREQS 5
-double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0 };
-
-double slm_bclk(void)
-{
-       unsigned long long msr = 3;
-       unsigned int i;
-       double freq;
-
-       if (get_msr(base_cpu, MSR_FSB_FREQ, &msr))
-               fprintf(outf, "SLM BCLK: unknown\n");
-
-       i = msr & 0xf;
-       if (i >= SLM_BCLK_FREQS) {
-               fprintf(outf, "SLM BCLK[%d] invalid\n", i);
-               i = 3;
-       }
-       freq = slm_freq_table[i];
-
-       if (!quiet)
-               fprintf(outf, "SLM BCLK: %.1f Mhz\n", freq);
-
-       return freq;
-}
-
-double discover_bclk(unsigned int family, unsigned int model)
+void probe_rapl(void)
 {
-       if (has_snb_msrs(family, model) || is_knl(family, model))
-               return 100.00;
-       else if (is_slm(family, model))
-               return slm_bclk();
-       else
-               return 133.33;
-}
-
-int get_cpu_type(struct thread_data *t, struct core_data *c, struct pkg_data *p)
-{
-       unsigned int eax, ebx, ecx, edx;
-
-       UNUSED(c);
-       UNUSED(p);
-
-       if (!genuine_intel)
-               return 0;
+       if (!platform->rapl_msrs)
+               return;
 
-       if (cpu_migrate(t->cpu_id)) {
-               fprintf(outf, "Could not migrate to CPU %d\n", t->cpu_id);
-               return -1;
-       }
+       if (genuine_intel)
+               rapl_probe_intel();
+       if (authentic_amd || hygon_genuine)
+               rapl_probe_amd();
 
-       if (max_level < 0x1a)
-               return 0;
+       if (quiet)
+               return;
 
-       __cpuid(0x1a, eax, ebx, ecx, edx);
-       eax = (eax >> 24) & 0xFF;
-       if (eax == 0x20)
-               t->is_atom = true;
-       return 0;
+       for_all_cpus(print_rapl, ODD_COUNTERS);
 }
 
 /*
@@ -5268,7 +5242,7 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk
                return 0;
 
        /* this is a per-package concept */
-       if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
+       if (!is_cpu_first_thread_in_package(t, c, p))
                return 0;
 
        cpu = t->cpu_id;
@@ -5284,7 +5258,7 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk
        }
 
        /* Temperature Target MSR is Nehalem and newer only */
-       if (!do_nhm_platform_info)
+       if (!platform->has_nhm_msrs)
                goto guess;
 
        if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr))
@@ -5293,34 +5267,134 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk
        tcc_default = (msr >> 16) & 0xFF;
 
        if (!quiet) {
-               switch (tcc_offset_bits) {
-               case 4:
-                       tcc_offset = (msr >> 24) & 0xF;
-                       fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C) (%d default - %d offset)\n",
-                               cpu, msr, tcc_default - tcc_offset, tcc_default, tcc_offset);
-                       break;
-               case 6:
-                       tcc_offset = (msr >> 24) & 0x3F;
+               int bits = platform->tcc_offset_bits;
+               unsigned long long enabled = 0;
+
+               if (bits && !get_msr(base_cpu, MSR_PLATFORM_INFO, &enabled))
+                       enabled = (enabled >> 30) & 1;
+
+               if (bits && enabled) {
+                       tcc_offset = (msr >> 24) & GENMASK(bits - 1, 0);
                        fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C) (%d default - %d offset)\n",
                                cpu, msr, tcc_default - tcc_offset, tcc_default, tcc_offset);
-                       break;
-               default:
+               } else {
                        fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n", cpu, msr, tcc_default);
-                       break;
                }
        }
 
-       if (!tcc_default)
-               goto guess;
+       if (!tcc_default)
+               goto guess;
+
+       tj_max = tcc_default;
+
+       return 0;
+
+guess:
+       tj_max = TJMAX_DEFAULT;
+       fprintf(outf, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n", cpu, tj_max);
+
+       return 0;
+}
+
+int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p)
+{
+       unsigned long long msr;
+       unsigned int dts, dts2;
+       int cpu;
+
+       UNUSED(c);
+       UNUSED(p);
+
+       if (!(do_dts || do_ptm))
+               return 0;
+
+       cpu = t->cpu_id;
+
+       /* DTS is per-core, no need to print for each thread */
+       if (!is_cpu_first_thread_in_core(t, c, p))
+               return 0;
+
+       if (cpu_migrate(cpu)) {
+               fprintf(outf, "print_thermal: Could not migrate to CPU %d\n", cpu);
+               return -1;
+       }
+
+       if (do_ptm && is_cpu_first_core_in_package(t, c, p)) {
+               if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
+                       return 0;
+
+               dts = (msr >> 16) & 0x7F;
+               fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n", cpu, msr, tj_max - dts);
+
+               if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &msr))
+                       return 0;
+
+               dts = (msr >> 16) & 0x7F;
+               dts2 = (msr >> 8) & 0x7F;
+               fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
+                       cpu, msr, tj_max - dts, tj_max - dts2);
+       }
+
+       if (do_dts && debug) {
+               unsigned int resolution;
+
+               if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
+                       return 0;
+
+               dts = (msr >> 16) & 0x7F;
+               resolution = (msr >> 27) & 0xF;
+               fprintf(outf, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n",
+                       cpu, msr, tj_max - dts, resolution);
+
+               if (get_msr(cpu, MSR_IA32_THERM_INTERRUPT, &msr))
+                       return 0;
+
+               dts = (msr >> 16) & 0x7F;
+               dts2 = (msr >> 8) & 0x7F;
+               fprintf(outf, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
+                       cpu, msr, tj_max - dts, tj_max - dts2);
+       }
+
+       return 0;
+}
+
+void probe_thermal(void)
+{
+       if (!access("/sys/devices/system/cpu/cpu0/thermal_throttle/core_throttle_count", R_OK))
+               BIC_PRESENT(BIC_CORE_THROT_CNT);
+       else
+               BIC_NOT_PRESENT(BIC_CORE_THROT_CNT);
+
+       for_all_cpus(set_temperature_target, ODD_COUNTERS);
+
+       if (quiet)
+               return;
+
+       for_all_cpus(print_thermal, ODD_COUNTERS);
+}
+
+int get_cpu_type(struct thread_data *t, struct core_data *c, struct pkg_data *p)
+{
+       unsigned int eax, ebx, ecx, edx;
+
+       UNUSED(c);
+       UNUSED(p);
 
-       tj_max = tcc_default;
+       if (!genuine_intel)
+               return 0;
 
-       return 0;
+       if (cpu_migrate(t->cpu_id)) {
+               fprintf(outf, "Could not migrate to CPU %d\n", t->cpu_id);
+               return -1;
+       }
 
-guess:
-       tj_max = TJMAX_DEFAULT;
-       fprintf(outf, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n", cpu, tj_max);
+       if (max_level < 0x1a)
+               return 0;
 
+       __cpuid(0x1a, eax, ebx, ecx, edx);
+       eax = (eax >> 24) & 0xFF;
+       if (eax == 0x20)
+               t->is_atom = true;
        return 0;
 }
 
@@ -5354,7 +5428,7 @@ void decode_misc_feature_control(void)
 {
        unsigned long long msr;
 
-       if (!has_misc_feature_control)
+       if (!platform->has_msr_misc_feature_control)
                return;
 
        if (!get_msr(base_cpu, MSR_MISC_FEATURE_CONTROL, &msr))
@@ -5375,10 +5449,7 @@ void decode_misc_pwr_mgmt_msr(void)
 {
        unsigned long long msr;
 
-       if (!do_nhm_platform_info)
-               return;
-
-       if (no_MSR_MISC_PWR_MGMT)
+       if (!platform->has_msr_misc_pwr_mgmt)
                return;
 
        if (!get_msr(base_cpu, MSR_MISC_PWR_MGMT, &msr))
@@ -5397,6 +5468,9 @@ void decode_c6_demotion_policy_msr(void)
 {
        unsigned long long msr;
 
+       if (!platform->has_msr_c6_demotion_policy_config)
+               return;
+
        if (!get_msr(base_cpu, MSR_CC6_DEMOTION_POLICY_CONFIG, &msr))
                fprintf(outf, "cpu%d: MSR_CC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-CC6-Demotion)\n",
                        base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS");
@@ -5406,67 +5480,6 @@ void decode_c6_demotion_policy_msr(void)
                        base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS");
 }
 
-/*
- * When models are the same, for the purpose of turbostat, reuse
- */
-unsigned int intel_model_duplicates(unsigned int model)
-{
-
-       switch (model) {
-       case INTEL_FAM6_NEHALEM_EP:     /* Core i7, Xeon 5500 series - Bloomfield, Gainstown NHM-EP */
-       case INTEL_FAM6_NEHALEM:        /* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */
-       case 0x1F:              /* Core i7 and i5 Processor - Nehalem */
-       case INTEL_FAM6_WESTMERE:       /* Westmere Client - Clarkdale, Arrandale */
-       case INTEL_FAM6_WESTMERE_EP:    /* Westmere EP - Gulftown */
-               return INTEL_FAM6_NEHALEM;
-
-       case INTEL_FAM6_NEHALEM_EX:     /* Nehalem-EX Xeon - Beckton */
-       case INTEL_FAM6_WESTMERE_EX:    /* Westmere-EX Xeon - Eagleton */
-               return INTEL_FAM6_NEHALEM_EX;
-
-       case INTEL_FAM6_XEON_PHI_KNM:
-               return INTEL_FAM6_XEON_PHI_KNL;
-
-       case INTEL_FAM6_BROADWELL_X:
-       case INTEL_FAM6_BROADWELL_D:    /* BDX-DE */
-               return INTEL_FAM6_BROADWELL_X;
-
-       case INTEL_FAM6_SKYLAKE_L:
-       case INTEL_FAM6_SKYLAKE:
-       case INTEL_FAM6_KABYLAKE_L:
-       case INTEL_FAM6_KABYLAKE:
-       case INTEL_FAM6_COMETLAKE_L:
-       case INTEL_FAM6_COMETLAKE:
-               return INTEL_FAM6_SKYLAKE_L;
-
-       case INTEL_FAM6_ICELAKE_L:
-       case INTEL_FAM6_ICELAKE_NNPI:
-       case INTEL_FAM6_TIGERLAKE_L:
-       case INTEL_FAM6_TIGERLAKE:
-       case INTEL_FAM6_ROCKETLAKE:
-       case INTEL_FAM6_LAKEFIELD:
-       case INTEL_FAM6_ALDERLAKE:
-       case INTEL_FAM6_ALDERLAKE_L:
-       case INTEL_FAM6_ATOM_GRACEMONT:
-       case INTEL_FAM6_RAPTORLAKE:
-       case INTEL_FAM6_RAPTORLAKE_P:
-       case INTEL_FAM6_RAPTORLAKE_S:
-       case INTEL_FAM6_METEORLAKE:
-       case INTEL_FAM6_METEORLAKE_L:
-               return INTEL_FAM6_CANNONLAKE_L;
-
-       case INTEL_FAM6_ATOM_TREMONT_L:
-               return INTEL_FAM6_ATOM_TREMONT;
-
-       case INTEL_FAM6_ICELAKE_D:
-               return INTEL_FAM6_ICELAKE_X;
-
-       case INTEL_FAM6_EMERALDRAPIDS_X:
-               return INTEL_FAM6_SAPPHIRERAPIDS_X;
-       }
-       return model;
-}
-
 void print_dev_latency(void)
 {
        char *path = "/dev/cpu_dma_latency";
@@ -5510,6 +5523,101 @@ void linux_perf_init(void)
        BIC_PRESENT(BIC_IPC);
 }
 
+void probe_cstates(void)
+{
+       probe_cst_limit();
+
+       if (platform->supported_cstates & CC1)
+               BIC_PRESENT(BIC_CPU_c1);
+
+       if (platform->supported_cstates & CC3)
+               BIC_PRESENT(BIC_CPU_c3);
+
+       if (platform->supported_cstates & CC6)
+               BIC_PRESENT(BIC_CPU_c6);
+
+       if (platform->supported_cstates & CC7)
+               BIC_PRESENT(BIC_CPU_c7);
+
+       if (platform->supported_cstates & PC2 && (pkg_cstate_limit >= PCL__2))
+               BIC_PRESENT(BIC_Pkgpc2);
+
+       if (platform->supported_cstates & PC3 && (pkg_cstate_limit >= PCL__3))
+               BIC_PRESENT(BIC_Pkgpc3);
+
+       if (platform->supported_cstates & PC6 && (pkg_cstate_limit >= PCL__6))
+               BIC_PRESENT(BIC_Pkgpc6);
+
+       if (platform->supported_cstates & PC7 && (pkg_cstate_limit >= PCL__7))
+               BIC_PRESENT(BIC_Pkgpc7);
+
+       if (platform->supported_cstates & PC8 && (pkg_cstate_limit >= PCL__8))
+               BIC_PRESENT(BIC_Pkgpc8);
+
+       if (platform->supported_cstates & PC9 && (pkg_cstate_limit >= PCL__9))
+               BIC_PRESENT(BIC_Pkgpc9);
+
+       if (platform->supported_cstates & PC10 && (pkg_cstate_limit >= PCL_10))
+               BIC_PRESENT(BIC_Pkgpc10);
+
+       if (platform->has_msr_module_c6_res_ms)
+               BIC_PRESENT(BIC_Mod_c6);
+
+       if (platform->has_ext_cst_msrs) {
+               BIC_PRESENT(BIC_Totl_c0);
+               BIC_PRESENT(BIC_Any_c0);
+               BIC_PRESENT(BIC_GFX_c0);
+               BIC_PRESENT(BIC_CPUGFX);
+       }
+
+       if (quiet)
+               return;
+
+       dump_power_ctl();
+       dump_cst_cfg();
+       decode_c6_demotion_policy_msr();
+       print_dev_latency();
+       dump_sysfs_cstate_config();
+       print_irtl();
+}
+
+void probe_lpi(void)
+{
+       if (!access("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", R_OK))
+               BIC_PRESENT(BIC_CPU_LPI);
+       else
+               BIC_NOT_PRESENT(BIC_CPU_LPI);
+
+       if (!access(sys_lpi_file_sysfs, R_OK)) {
+               sys_lpi_file = sys_lpi_file_sysfs;
+               BIC_PRESENT(BIC_SYS_LPI);
+       } else if (!access(sys_lpi_file_debugfs, R_OK)) {
+               sys_lpi_file = sys_lpi_file_debugfs;
+               BIC_PRESENT(BIC_SYS_LPI);
+       } else {
+               sys_lpi_file_sysfs = NULL;
+               BIC_NOT_PRESENT(BIC_SYS_LPI);
+       }
+
+}
+
+void probe_pstates(void)
+{
+       probe_bclk();
+
+       if (quiet)
+               return;
+
+       dump_platform_info();
+       dump_turbo_ratio_info();
+       dump_sysfs_pstate_config();
+       decode_misc_pwr_mgmt_msr();
+
+       for_all_cpus(print_hwp, ODD_COUNTERS);
+       for_all_cpus(print_epb, ODD_COUNTERS);
+       for_all_cpus(print_perf_limit, ODD_COUNTERS);
+}
+
 void process_cpuid()
 {
        unsigned int eax, ebx, ecx, edx;
@@ -5569,10 +5677,8 @@ void process_cpuid()
                        edx_flags & (1 << 22) ? "ACPI-TM" : "-",
                        edx_flags & (1 << 28) ? "HT" : "-", edx_flags & (1 << 29) ? "TM" : "-");
        }
-       if (genuine_intel) {
-               model_orig = model;
-               model = intel_model_duplicates(model);
-       }
+
+       probe_platform_features(family, model);
 
        if (!(edx_flags & (1 << 5)))
                errx(1, "CPUID: no MSR");
@@ -5656,26 +5762,12 @@ void process_cpuid()
                __cpuid(0x15, eax_crystal, ebx_tsc, crystal_hz, edx);
 
                if (ebx_tsc != 0) {
-
                        if (!quiet && (ebx != 0))
                                fprintf(outf, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n",
                                        eax_crystal, ebx_tsc, crystal_hz);
 
                        if (crystal_hz == 0)
-                               switch (model) {
-                               case INTEL_FAM6_SKYLAKE_L:      /* SKL */
-                                       crystal_hz = 24000000;  /* 24.0 MHz */
-                                       break;
-                               case INTEL_FAM6_ATOM_GOLDMONT_D:        /* DNV */
-                                       crystal_hz = 25000000;  /* 25.0 MHz */
-                                       break;
-                               case INTEL_FAM6_ATOM_GOLDMONT:  /* BXT */
-                               case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
-                                       crystal_hz = 19200000;  /* 19.2 MHz */
-                                       break;
-                               default:
-                                       crystal_hz = 0;
-                               }
+                               crystal_hz = platform->crystal_freq;
 
                        if (crystal_hz) {
                                tsc_hz = (unsigned long long)crystal_hz *ebx_tsc / eax_crystal;
@@ -5700,147 +5792,33 @@ void process_cpuid()
        }
 
        if (has_aperf)
-               aperf_mperf_multiplier = get_aperf_mperf_multiplier(family, model);
+               aperf_mperf_multiplier = platform->need_perf_multiplier ? 1024 : 1;
 
        BIC_PRESENT(BIC_IRQ);
        BIC_PRESENT(BIC_TSC_MHz);
+}
 
-       if (probe_nhm_msrs(family, model)) {
-               do_nhm_platform_info = 1;
-               BIC_PRESENT(BIC_CPU_c1);
-               BIC_PRESENT(BIC_CPU_c3);
-               BIC_PRESENT(BIC_CPU_c6);
-               BIC_PRESENT(BIC_SMI);
-       }
-       do_snb_cstates = has_snb_msrs(family, model);
-
-       if (do_snb_cstates)
-               BIC_PRESENT(BIC_CPU_c7);
-
-       do_irtl_snb = has_snb_msrs(family, model);
-       if (do_snb_cstates && (pkg_cstate_limit >= PCL__2))
-               BIC_PRESENT(BIC_Pkgpc2);
-       if (pkg_cstate_limit >= PCL__3)
-               BIC_PRESENT(BIC_Pkgpc3);
-       if (pkg_cstate_limit >= PCL__6)
-               BIC_PRESENT(BIC_Pkgpc6);
-       if (do_snb_cstates && (pkg_cstate_limit >= PCL__7))
-               BIC_PRESENT(BIC_Pkgpc7);
-       if (has_slv_msrs(family, model)) {
-               BIC_NOT_PRESENT(BIC_Pkgpc2);
-               BIC_NOT_PRESENT(BIC_Pkgpc3);
-               BIC_PRESENT(BIC_Pkgpc6);
-               BIC_NOT_PRESENT(BIC_Pkgpc7);
-               BIC_PRESENT(BIC_Mod_c6);
-               use_c1_residency_msr = 1;
-       }
-       if (is_jvl(family, model)) {
-               BIC_NOT_PRESENT(BIC_CPU_c3);
-               BIC_NOT_PRESENT(BIC_CPU_c7);
-               BIC_NOT_PRESENT(BIC_Pkgpc2);
-               BIC_NOT_PRESENT(BIC_Pkgpc3);
-               BIC_NOT_PRESENT(BIC_Pkgpc6);
-               BIC_NOT_PRESENT(BIC_Pkgpc7);
-       }
-       if (is_dnv(family, model)) {
-               BIC_PRESENT(BIC_CPU_c1);
-               BIC_NOT_PRESENT(BIC_CPU_c3);
-               BIC_NOT_PRESENT(BIC_Pkgpc3);
-               BIC_NOT_PRESENT(BIC_CPU_c7);
-               BIC_NOT_PRESENT(BIC_Pkgpc7);
-               use_c1_residency_msr = 1;
-       }
-       if (is_skx(family, model) || is_icx(family, model) || is_spr(family, model)) {
-               BIC_NOT_PRESENT(BIC_CPU_c3);
-               BIC_NOT_PRESENT(BIC_Pkgpc3);
-               BIC_NOT_PRESENT(BIC_CPU_c7);
-               BIC_NOT_PRESENT(BIC_Pkgpc7);
-       }
-       if (is_bdx(family, model)) {
-               BIC_NOT_PRESENT(BIC_CPU_c7);
-               BIC_NOT_PRESENT(BIC_Pkgpc7);
-       }
-       if (has_c8910_msrs(family, model)) {
-               if (pkg_cstate_limit >= PCL__8)
-                       BIC_PRESENT(BIC_Pkgpc8);
-               if (pkg_cstate_limit >= PCL__9)
-                       BIC_PRESENT(BIC_Pkgpc9);
-               if (pkg_cstate_limit >= PCL_10)
-                       BIC_PRESENT(BIC_Pkgpc10);
-       }
-       do_irtl_hsw = has_c8910_msrs(family, model);
-       if (has_skl_msrs(family, model)) {
-               BIC_PRESENT(BIC_Totl_c0);
-               BIC_PRESENT(BIC_Any_c0);
-               BIC_PRESENT(BIC_GFX_c0);
-               BIC_PRESENT(BIC_CPUGFX);
-       }
-       do_slm_cstates = is_slm(family, model);
-       do_knl_cstates = is_knl(family, model);
-
-       if (do_slm_cstates || do_knl_cstates || is_cnl(family, model) || is_ehl(family, model))
-               BIC_NOT_PRESENT(BIC_CPU_c3);
-
-       if (!quiet)
-               decode_misc_pwr_mgmt_msr();
-
-       if (!quiet && has_slv_msrs(family, model))
-               decode_c6_demotion_policy_msr();
-
-       rapl_probe(family, model);
-       perf_limit_reasons_probe(family, model);
-       automatic_cstate_conversion_probe(family, model);
-
-       check_tcc_offset(model_orig);
-
-       if (!quiet)
-               dump_cstate_pstate_config_info(family, model);
-       intel_uncore_frequency_probe();
-
-       if (!quiet)
-               print_dev_latency();
-       if (!quiet)
-               dump_sysfs_cstate_config();
-       if (!quiet)
-               dump_sysfs_pstate_config();
+void probe_pm_features(void)
+{
+       probe_pstates();
 
-       if (has_skl_msrs(family, model) || is_ehl(family, model))
-               calculate_tsc_tweak();
+       probe_cstates();
 
-       if (!access("/sys/class/drm/card0/power/rc6_residency_ms", R_OK))
-               BIC_PRESENT(BIC_GFX_rc6);
+       probe_lpi();
 
-       if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK))
-               BIC_PRESENT(BIC_GFXMHz);
+       probe_intel_uncore_frequency();
 
-       if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", R_OK))
-               BIC_PRESENT(BIC_GFXACTMHz);
+       probe_graphics();
 
-       if (!access("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", R_OK))
-               BIC_PRESENT(BIC_CPU_LPI);
-       else
-               BIC_NOT_PRESENT(BIC_CPU_LPI);
+       probe_rapl();
 
-       if (!access("/sys/devices/system/cpu/cpu0/thermal_throttle/core_throttle_count", R_OK))
-               BIC_PRESENT(BIC_CORE_THROT_CNT);
-       else
-               BIC_NOT_PRESENT(BIC_CORE_THROT_CNT);
+       probe_thermal();
 
-       if (!access(sys_lpi_file_sysfs, R_OK)) {
-               sys_lpi_file = sys_lpi_file_sysfs;
-               BIC_PRESENT(BIC_SYS_LPI);
-       } else if (!access(sys_lpi_file_debugfs, R_OK)) {
-               sys_lpi_file = sys_lpi_file_debugfs;
-               BIC_PRESENT(BIC_SYS_LPI);
-       } else {
-               sys_lpi_file_sysfs = NULL;
-               BIC_NOT_PRESENT(BIC_SYS_LPI);
-       }
+       if (platform->has_nhm_msrs)
+               BIC_PRESENT(BIC_SMI);
 
        if (!quiet)
                decode_misc_feature_control();
-
-       return;
 }
 
 /*
@@ -5855,7 +5833,7 @@ int dir_filter(const struct dirent *dirp)
                return 0;
 }
 
-void topology_probe()
+void topology_probe(bool startup)
 {
        int i;
        int max_core_id = 0;
@@ -5888,14 +5866,62 @@ void topology_probe()
        for_all_proc_cpus(mark_cpu_present);
 
        /*
-        * Validate that all cpus in cpu_subset are also in cpu_present_set
+        * Allocate and initialize cpu_effective_set
+        */
+       cpu_effective_set = CPU_ALLOC((topo.max_cpu_num + 1));
+       if (cpu_effective_set == NULL)
+               err(3, "CPU_ALLOC");
+       cpu_effective_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
+       CPU_ZERO_S(cpu_effective_setsize, cpu_effective_set);
+       update_effective_set(startup);
+
+       /*
+        * Allocate and initialize cpu_allowed_set
+        */
+       cpu_allowed_set = CPU_ALLOC((topo.max_cpu_num + 1));
+       if (cpu_allowed_set == NULL)
+               err(3, "CPU_ALLOC");
+       cpu_allowed_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
+       CPU_ZERO_S(cpu_allowed_setsize, cpu_allowed_set);
+
+       /*
+        * Validate and update cpu_allowed_set.
+        *
+        * Make sure all cpus in cpu_subset are also in cpu_present_set during startup.
+        * Give a warning when cpus in cpu_subset become unavailable at runtime.
+        * Give a warning when cpus are not effective because of cgroup setting.
+        *
+        * cpu_allowed_set is the intersection of cpu_present_set/cpu_effective_set/cpu_subset.
         */
        for (i = 0; i < CPU_SUBSET_MAXCPUS; ++i) {
-               if (CPU_ISSET_S(i, cpu_subset_size, cpu_subset))
-                       if (!CPU_ISSET_S(i, cpu_present_setsize, cpu_present_set))
-                               err(1, "cpu%d not present", i);
+               if (cpu_subset && !CPU_ISSET_S(i, cpu_subset_size, cpu_subset))
+                       continue;
+
+               if (!CPU_ISSET_S(i, cpu_present_setsize, cpu_present_set)) {
+                       if (cpu_subset) {
+                               /* cpus in cpu_subset must be in cpu_present_set during startup */
+                               if (startup)
+                                       err(1, "cpu%d not present", i);
+                               else
+                                       fprintf(stderr, "cpu%d not present\n", i);
+                       }
+                       continue;
+               }
+
+               if (CPU_COUNT_S(cpu_effective_setsize, cpu_effective_set)) {
+                       if (!CPU_ISSET_S(i, cpu_effective_setsize, cpu_effective_set)) {
+                               fprintf(stderr, "cpu%d not effective\n", i);
+                               continue;
+                       }
+               }
+
+               CPU_SET_S(i, cpu_allowed_setsize, cpu_allowed_set);
        }
 
+       if (!CPU_COUNT_S(cpu_allowed_setsize, cpu_allowed_set))
+               err(-ENODEV, "No valid cpus found");
+       sched_setaffinity(0, cpu_allowed_setsize, cpu_allowed_set);
+
        /*
         * Allocate and initialize cpu_affinity_set
         */
@@ -6009,15 +6035,19 @@ void allocate_counters(struct thread_data **t, struct core_data **c, struct pkg_
        if (*c == NULL)
                goto error;
 
-       for (i = 0; i < num_cores; i++)
+       for (i = 0; i < num_cores; i++) {
                (*c)[i].core_id = -1;
+               (*c)[i].base_cpu = -1;
+       }
 
        *p = calloc(topo.num_packages, sizeof(struct pkg_data));
        if (*p == NULL)
                goto error;
 
-       for (i = 0; i < topo.num_packages; i++)
+       for (i = 0; i < topo.num_packages; i++) {
                (*p)[i].package_id = i;
+               (*p)[i].base_cpu = -1;
+       }
 
        return;
 error:
@@ -6050,10 +6080,11 @@ void init_counter(struct thread_data *thread_base, struct core_data *core_base,
        p = GET_PKG(pkg_base, pkg_id);
 
        t->cpu_id = cpu_id;
-       if (thread_id == 0) {
-               t->flags |= CPU_IS_FIRST_THREAD_IN_CORE;
-               if (cpu_is_first_core_in_package(cpu_id))
-                       t->flags |= CPU_IS_FIRST_CORE_IN_PACKAGE;
+       if (!cpu_is_not_allowed(cpu_id)) {
+               if (c->base_cpu < 0)
+                       c->base_cpu = t->cpu_id;
+               if (p->base_cpu < 0)
+                       p->base_cpu = t->cpu_id;
        }
 
        c->core_id = core_id;
@@ -6093,59 +6124,64 @@ void allocate_irq_buffers(void)
                err(-1, "calloc %d", topo.max_cpu_num + 1);
 }
 
-void setup_all_buffers(void)
+int update_topo(struct thread_data *t, struct core_data *c, struct pkg_data *p)
+{
+       topo.allowed_cpus++;
+       if ((int)t->cpu_id == c->base_cpu)
+               topo.allowed_cores++;
+       if ((int)t->cpu_id == p->base_cpu)
+               topo.allowed_packages++;
+
+       return 0;
+}
+
+void topology_update(void)
+{
+       topo.allowed_cpus = 0;
+       topo.allowed_cores = 0;
+       topo.allowed_packages = 0;
+       for_all_cpus(update_topo, ODD_COUNTERS);
+}
+void setup_all_buffers(bool startup)
 {
-       topology_probe();
+       topology_probe(startup);
        allocate_irq_buffers();
        allocate_fd_percpu();
        allocate_counters(&thread_even, &core_even, &package_even);
        allocate_counters(&thread_odd, &core_odd, &package_odd);
        allocate_output_buffer();
        for_all_proc_cpus(initialize_counters);
+       topology_update();
 }
 
 void set_base_cpu(void)
 {
-       base_cpu = sched_getcpu();
-       if (base_cpu < 0)
-               err(-ENODEV, "No valid cpus found");
+       int i;
 
-       if (debug > 1)
-               fprintf(outf, "base_cpu = %d\n", base_cpu);
+       for (i = 0; i < topo.max_cpu_num + 1; ++i) {
+               if (cpu_is_not_allowed(i))
+                       continue;
+               base_cpu = i;
+               if (debug > 1)
+                       fprintf(outf, "base_cpu = %d\n", base_cpu);
+               return;
+       }
+       err(-ENODEV, "No valid cpus found");
 }
 
 void turbostat_init()
 {
-       setup_all_buffers();
+       setup_all_buffers(true);
        set_base_cpu();
        check_dev_msr();
        check_permissions();
        process_cpuid();
+       probe_pm_features();
        linux_perf_init();
 
-       if (!quiet)
-               for_all_cpus(print_hwp, ODD_COUNTERS);
-
-       if (!quiet)
-               for_all_cpus(print_epb, ODD_COUNTERS);
-
-       if (!quiet)
-               for_all_cpus(print_perf_limit, ODD_COUNTERS);
-
-       if (!quiet)
-               for_all_cpus(print_rapl, ODD_COUNTERS);
-
-       for_all_cpus(set_temperature_target, ODD_COUNTERS);
-
        for_all_cpus(get_cpu_type, ODD_COUNTERS);
        for_all_cpus(get_cpu_type, EVEN_COUNTERS);
 
-       if (!quiet)
-               for_all_cpus(print_thermal, ODD_COUNTERS);
-
-       if (!quiet && do_irtl_snb)
-               print_irtl();
-
        if (DO_BIC(BIC_IPC))
                (void)get_instr_count_fd(base_cpu);
 }
@@ -6160,8 +6196,6 @@ int fork_it(char **argv)
        first_counter_read = 0;
        if (status)
                exit(status);
-       /* clear affinity side-effect of get_counters() */
-       sched_setaffinity(0, cpu_present_setsize, cpu_present_set);
        gettimeofday(&tv_even, (struct timezone *)NULL);
 
        child_pid = fork();
@@ -6225,7 +6259,7 @@ int get_and_dump_counters(void)
 
 void print_version()
 {
-       fprintf(outf, "turbostat version 2023.03.17 - Len Brown <lenb@kernel.org>\n");
+       fprintf(outf, "turbostat version 2023.11.07 - Len Brown <lenb@kernel.org>\n");
 }
 
 #define COMMAND_LINE_SIZE 2048
@@ -6508,9 +6542,6 @@ void probe_sysfs(void)
  */
 void parse_cpu_command(char *optarg)
 {
-       unsigned int start, end;
-       char *next;
-
        if (!strcmp(optarg, "core")) {
                if (cpu_subset)
                        goto error;
@@ -6533,52 +6564,8 @@ void parse_cpu_command(char *optarg)
 
        CPU_ZERO_S(cpu_subset_size, cpu_subset);
 
-       next = optarg;
-
-       while (next && *next) {
-
-               if (*next == '-')       /* no negative cpu numbers */
-                       goto error;
-
-               start = strtoul(next, &next, 10);
-
-               if (start >= CPU_SUBSET_MAXCPUS)
-                       goto error;
-               CPU_SET_S(start, cpu_subset_size, cpu_subset);
-
-               if (*next == '\0')
-                       break;
-
-               if (*next == ',') {
-                       next += 1;
-                       continue;
-               }
-
-               if (*next == '-') {
-                       next += 1;      /* start range */
-               } else if (*next == '.') {
-                       next += 1;
-                       if (*next == '.')
-                               next += 1;      /* start range */
-                       else
-                               goto error;
-               }
-
-               end = strtoul(next, &next, 10);
-               if (end <= start)
-                       goto error;
-
-               while (++start <= end) {
-                       if (start >= CPU_SUBSET_MAXCPUS)
-                               goto error;
-                       CPU_SET_S(start, cpu_subset_size, cpu_subset);
-               }
-
-               if (*next == ',')
-                       next += 1;
-               else if (*next != '\0')
-                       goto error;
-       }
+       if (parse_cpu_str(optarg, cpu_subset, cpu_subset_size))
+               goto error;
 
        return;
 
@@ -6719,6 +6706,19 @@ void cmdline(int argc, char **argv)
 
 int main(int argc, char **argv)
 {
+       int fd, ret;
+
+       fd = open("/sys/fs/cgroup/cgroup.procs", O_WRONLY);
+       if (fd < 0)
+               goto skip_cgroup_setting;
+
+       ret = write(fd, "0\n", 2);
+       if (ret == -1)
+               perror("Can't update cgroup\n");
+
+       close(fd);
+
+skip_cgroup_setting:
        outf = stderr;
        cmdline(argc, argv);
 
index b86cb10..587b946 100644 (file)
@@ -85,7 +85,7 @@ int main(int argc, char **argv)
         */
        ret = open("/proc/sys/abi/sme_default_vector_length", O_RDONLY, 0);
        if (ret >= 0) {
-               ksft_test_result(fork_test(), "fork_test");
+               ksft_test_result(fork_test(), "fork_test\n");
 
        } else {
                ksft_print_msg("SME not supported\n");
index 6ee22c3..518f143 100644 (file)
@@ -24,6 +24,7 @@
 
 #include "test_progs.h"
 #include "network_helpers.h"
+#include "netlink_helpers.h"
 #include "test_tc_neigh_fib.skel.h"
 #include "test_tc_neigh.skel.h"
 #include "test_tc_peer.skel.h"
@@ -110,11 +111,17 @@ static void netns_setup_namespaces_nofail(const char *verb)
        }
 }
 
+enum dev_mode {
+       MODE_VETH,
+       MODE_NETKIT,
+};
+
 struct netns_setup_result {
-       int ifindex_veth_src;
-       int ifindex_veth_src_fwd;
-       int ifindex_veth_dst;
-       int ifindex_veth_dst_fwd;
+       enum dev_mode dev_mode;
+       int ifindex_src;
+       int ifindex_src_fwd;
+       int ifindex_dst;
+       int ifindex_dst_fwd;
 };
 
 static int get_ifaddr(const char *name, char *ifaddr)
@@ -137,58 +144,110 @@ static int get_ifaddr(const char *name, char *ifaddr)
        return 0;
 }
 
+static int create_netkit(int mode, char *prim, char *peer)
+{
+       struct rtattr *linkinfo, *data, *peer_info;
+       struct rtnl_handle rth = { .fd = -1 };
+       const char *type = "netkit";
+       struct {
+               struct nlmsghdr n;
+               struct ifinfomsg i;
+               char buf[1024];
+       } req = {};
+       int err;
+
+       err = rtnl_open(&rth, 0);
+       if (!ASSERT_OK(err, "open_rtnetlink"))
+               return err;
+
+       memset(&req, 0, sizeof(req));
+       req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
+       req.n.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
+       req.n.nlmsg_type = RTM_NEWLINK;
+       req.i.ifi_family = AF_UNSPEC;
+
+       addattr_l(&req.n, sizeof(req), IFLA_IFNAME, prim, strlen(prim));
+       linkinfo = addattr_nest(&req.n, sizeof(req), IFLA_LINKINFO);
+       addattr_l(&req.n, sizeof(req), IFLA_INFO_KIND, type, strlen(type));
+       data = addattr_nest(&req.n, sizeof(req), IFLA_INFO_DATA);
+       addattr32(&req.n, sizeof(req), IFLA_NETKIT_MODE, mode);
+       peer_info = addattr_nest(&req.n, sizeof(req), IFLA_NETKIT_PEER_INFO);
+       req.n.nlmsg_len += sizeof(struct ifinfomsg);
+       addattr_l(&req.n, sizeof(req), IFLA_IFNAME, peer, strlen(peer));
+       addattr_nest_end(&req.n, peer_info);
+       addattr_nest_end(&req.n, data);
+       addattr_nest_end(&req.n, linkinfo);
+
+       err = rtnl_talk(&rth, &req.n, NULL);
+       ASSERT_OK(err, "talk_rtnetlink");
+       rtnl_close(&rth);
+       return err;
+}
+
 static int netns_setup_links_and_routes(struct netns_setup_result *result)
 {
        struct nstoken *nstoken = NULL;
-       char veth_src_fwd_addr[IFADDR_STR_LEN+1] = {};
-
-       SYS(fail, "ip link add veth_src type veth peer name veth_src_fwd");
-       SYS(fail, "ip link add veth_dst type veth peer name veth_dst_fwd");
+       char src_fwd_addr[IFADDR_STR_LEN+1] = {};
+       int err;
 
-       SYS(fail, "ip link set veth_dst_fwd address " MAC_DST_FWD);
-       SYS(fail, "ip link set veth_dst address " MAC_DST);
+       if (result->dev_mode == MODE_VETH) {
+               SYS(fail, "ip link add src type veth peer name src_fwd");
+               SYS(fail, "ip link add dst type veth peer name dst_fwd");
+
+               SYS(fail, "ip link set dst_fwd address " MAC_DST_FWD);
+               SYS(fail, "ip link set dst address " MAC_DST);
+       } else if (result->dev_mode == MODE_NETKIT) {
+               err = create_netkit(NETKIT_L3, "src", "src_fwd");
+               if (!ASSERT_OK(err, "create_ifindex_src"))
+                       goto fail;
+               err = create_netkit(NETKIT_L3, "dst", "dst_fwd");
+               if (!ASSERT_OK(err, "create_ifindex_dst"))
+                       goto fail;
+       }
 
-       if (get_ifaddr("veth_src_fwd", veth_src_fwd_addr))
+       if (get_ifaddr("src_fwd", src_fwd_addr))
                goto fail;
 
-       result->ifindex_veth_src = if_nametoindex("veth_src");
-       if (!ASSERT_GT(result->ifindex_veth_src, 0, "ifindex_veth_src"))
+       result->ifindex_src = if_nametoindex("src");
+       if (!ASSERT_GT(result->ifindex_src, 0, "ifindex_src"))
                goto fail;
 
-       result->ifindex_veth_src_fwd = if_nametoindex("veth_src_fwd");
-       if (!ASSERT_GT(result->ifindex_veth_src_fwd, 0, "ifindex_veth_src_fwd"))
+       result->ifindex_src_fwd = if_nametoindex("src_fwd");
+       if (!ASSERT_GT(result->ifindex_src_fwd, 0, "ifindex_src_fwd"))
                goto fail;
 
-       result->ifindex_veth_dst = if_nametoindex("veth_dst");
-       if (!ASSERT_GT(result->ifindex_veth_dst, 0, "ifindex_veth_dst"))
+       result->ifindex_dst = if_nametoindex("dst");
+       if (!ASSERT_GT(result->ifindex_dst, 0, "ifindex_dst"))
                goto fail;
 
-       result->ifindex_veth_dst_fwd = if_nametoindex("veth_dst_fwd");
-       if (!ASSERT_GT(result->ifindex_veth_dst_fwd, 0, "ifindex_veth_dst_fwd"))
+       result->ifindex_dst_fwd = if_nametoindex("dst_fwd");
+       if (!ASSERT_GT(result->ifindex_dst_fwd, 0, "ifindex_dst_fwd"))
                goto fail;
 
-       SYS(fail, "ip link set veth_src netns " NS_SRC);
-       SYS(fail, "ip link set veth_src_fwd netns " NS_FWD);
-       SYS(fail, "ip link set veth_dst_fwd netns " NS_FWD);
-       SYS(fail, "ip link set veth_dst netns " NS_DST);
+       SYS(fail, "ip link set src netns " NS_SRC);
+       SYS(fail, "ip link set src_fwd netns " NS_FWD);
+       SYS(fail, "ip link set dst_fwd netns " NS_FWD);
+       SYS(fail, "ip link set dst netns " NS_DST);
 
        /** setup in 'src' namespace */
        nstoken = open_netns(NS_SRC);
        if (!ASSERT_OK_PTR(nstoken, "setns src"))
                goto fail;
 
-       SYS(fail, "ip addr add " IP4_SRC "/32 dev veth_src");
-       SYS(fail, "ip addr add " IP6_SRC "/128 dev veth_src nodad");
-       SYS(fail, "ip link set dev veth_src up");
+       SYS(fail, "ip addr add " IP4_SRC "/32 dev src");
+       SYS(fail, "ip addr add " IP6_SRC "/128 dev src nodad");
+       SYS(fail, "ip link set dev src up");
 
-       SYS(fail, "ip route add " IP4_DST "/32 dev veth_src scope global");
-       SYS(fail, "ip route add " IP4_NET "/16 dev veth_src scope global");
-       SYS(fail, "ip route add " IP6_DST "/128 dev veth_src scope global");
+       SYS(fail, "ip route add " IP4_DST "/32 dev src scope global");
+       SYS(fail, "ip route add " IP4_NET "/16 dev src scope global");
+       SYS(fail, "ip route add " IP6_DST "/128 dev src scope global");
 
-       SYS(fail, "ip neigh add " IP4_DST " dev veth_src lladdr %s",
-           veth_src_fwd_addr);
-       SYS(fail, "ip neigh add " IP6_DST " dev veth_src lladdr %s",
-           veth_src_fwd_addr);
+       if (result->dev_mode == MODE_VETH) {
+               SYS(fail, "ip neigh add " IP4_DST " dev src lladdr %s",
+                   src_fwd_addr);
+               SYS(fail, "ip neigh add " IP6_DST " dev src lladdr %s",
+                   src_fwd_addr);
+       }
 
        close_netns(nstoken);
 
@@ -201,15 +260,15 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result)
         * needs v4 one in order to start ARP probing. IP4_NET route is added
         * to the endpoints so that the ARP processing will reply.
         */
-       SYS(fail, "ip addr add " IP4_SLL "/32 dev veth_src_fwd");
-       SYS(fail, "ip addr add " IP4_DLL "/32 dev veth_dst_fwd");
-       SYS(fail, "ip link set dev veth_src_fwd up");
-       SYS(fail, "ip link set dev veth_dst_fwd up");
+       SYS(fail, "ip addr add " IP4_SLL "/32 dev src_fwd");
+       SYS(fail, "ip addr add " IP4_DLL "/32 dev dst_fwd");
+       SYS(fail, "ip link set dev src_fwd up");
+       SYS(fail, "ip link set dev dst_fwd up");
 
-       SYS(fail, "ip route add " IP4_SRC "/32 dev veth_src_fwd scope global");
-       SYS(fail, "ip route add " IP6_SRC "/128 dev veth_src_fwd scope global");
-       SYS(fail, "ip route add " IP4_DST "/32 dev veth_dst_fwd scope global");
-       SYS(fail, "ip route add " IP6_DST "/128 dev veth_dst_fwd scope global");
+       SYS(fail, "ip route add " IP4_SRC "/32 dev src_fwd scope global");
+       SYS(fail, "ip route add " IP6_SRC "/128 dev src_fwd scope global");
+       SYS(fail, "ip route add " IP4_DST "/32 dev dst_fwd scope global");
+       SYS(fail, "ip route add " IP6_DST "/128 dev dst_fwd scope global");
 
        close_netns(nstoken);
 
@@ -218,16 +277,18 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result)
        if (!ASSERT_OK_PTR(nstoken, "setns dst"))
                goto fail;
 
-       SYS(fail, "ip addr add " IP4_DST "/32 dev veth_dst");
-       SYS(fail, "ip addr add " IP6_DST "/128 dev veth_dst nodad");
-       SYS(fail, "ip link set dev veth_dst up");
+       SYS(fail, "ip addr add " IP4_DST "/32 dev dst");
+       SYS(fail, "ip addr add " IP6_DST "/128 dev dst nodad");
+       SYS(fail, "ip link set dev dst up");
 
-       SYS(fail, "ip route add " IP4_SRC "/32 dev veth_dst scope global");
-       SYS(fail, "ip route add " IP4_NET "/16 dev veth_dst scope global");
-       SYS(fail, "ip route add " IP6_SRC "/128 dev veth_dst scope global");
+       SYS(fail, "ip route add " IP4_SRC "/32 dev dst scope global");
+       SYS(fail, "ip route add " IP4_NET "/16 dev dst scope global");
+       SYS(fail, "ip route add " IP6_SRC "/128 dev dst scope global");
 
-       SYS(fail, "ip neigh add " IP4_SRC " dev veth_dst lladdr " MAC_DST_FWD);
-       SYS(fail, "ip neigh add " IP6_SRC " dev veth_dst lladdr " MAC_DST_FWD);
+       if (result->dev_mode == MODE_VETH) {
+               SYS(fail, "ip neigh add " IP4_SRC " dev dst lladdr " MAC_DST_FWD);
+               SYS(fail, "ip neigh add " IP6_SRC " dev dst lladdr " MAC_DST_FWD);
+       }
 
        close_netns(nstoken);
 
@@ -293,23 +354,23 @@ static int netns_load_bpf(const struct bpf_program *src_prog,
                          const struct bpf_program *chk_prog,
                          const struct netns_setup_result *setup_result)
 {
-       LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_src_fwd);
-       LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_dst_fwd);
+       LIBBPF_OPTS(bpf_tc_hook, qdisc_src_fwd);
+       LIBBPF_OPTS(bpf_tc_hook, qdisc_dst_fwd);
        int err;
 
-       /* tc qdisc add dev veth_src_fwd clsact */
-       QDISC_CLSACT_CREATE(&qdisc_veth_src_fwd, setup_result->ifindex_veth_src_fwd);
-       /* tc filter add dev veth_src_fwd ingress bpf da src_prog */
-       XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_INGRESS, src_prog, 0);
-       /* tc filter add dev veth_src_fwd egress bpf da chk_prog */
-       XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_EGRESS, chk_prog, 0);
+       /* tc qdisc add dev src_fwd clsact */
+       QDISC_CLSACT_CREATE(&qdisc_src_fwd, setup_result->ifindex_src_fwd);
+       /* tc filter add dev src_fwd ingress bpf da src_prog */
+       XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_INGRESS, src_prog, 0);
+       /* tc filter add dev src_fwd egress bpf da chk_prog */
+       XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_EGRESS, chk_prog, 0);
 
-       /* tc qdisc add dev veth_dst_fwd clsact */
-       QDISC_CLSACT_CREATE(&qdisc_veth_dst_fwd, setup_result->ifindex_veth_dst_fwd);
-       /* tc filter add dev veth_dst_fwd ingress bpf da dst_prog */
-       XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_INGRESS, dst_prog, 0);
-       /* tc filter add dev veth_dst_fwd egress bpf da chk_prog */
-       XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_EGRESS, chk_prog, 0);
+       /* tc qdisc add dev dst_fwd clsact */
+       QDISC_CLSACT_CREATE(&qdisc_dst_fwd, setup_result->ifindex_dst_fwd);
+       /* tc filter add dev dst_fwd ingress bpf da dst_prog */
+       XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS, dst_prog, 0);
+       /* tc filter add dev dst_fwd egress bpf da chk_prog */
+       XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS, chk_prog, 0);
 
        return 0;
 fail:
@@ -539,10 +600,10 @@ done:
 static int netns_load_dtime_bpf(struct test_tc_dtime *skel,
                                const struct netns_setup_result *setup_result)
 {
-       LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_src_fwd);
-       LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_dst_fwd);
-       LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_src);
-       LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_dst);
+       LIBBPF_OPTS(bpf_tc_hook, qdisc_src_fwd);
+       LIBBPF_OPTS(bpf_tc_hook, qdisc_dst_fwd);
+       LIBBPF_OPTS(bpf_tc_hook, qdisc_src);
+       LIBBPF_OPTS(bpf_tc_hook, qdisc_dst);
        struct nstoken *nstoken;
        int err;
 
@@ -550,58 +611,58 @@ static int netns_load_dtime_bpf(struct test_tc_dtime *skel,
        nstoken = open_netns(NS_SRC);
        if (!ASSERT_OK_PTR(nstoken, "setns " NS_SRC))
                return -1;
-       /* tc qdisc add dev veth_src clsact */
-       QDISC_CLSACT_CREATE(&qdisc_veth_src, setup_result->ifindex_veth_src);
-       /* tc filter add dev veth_src ingress bpf da ingress_host */
-       XGRESS_FILTER_ADD(&qdisc_veth_src, BPF_TC_INGRESS, skel->progs.ingress_host, 0);
-       /* tc filter add dev veth_src egress bpf da egress_host */
-       XGRESS_FILTER_ADD(&qdisc_veth_src, BPF_TC_EGRESS, skel->progs.egress_host, 0);
+       /* tc qdisc add dev src clsact */
+       QDISC_CLSACT_CREATE(&qdisc_src, setup_result->ifindex_src);
+       /* tc filter add dev src ingress bpf da ingress_host */
+       XGRESS_FILTER_ADD(&qdisc_src, BPF_TC_INGRESS, skel->progs.ingress_host, 0);
+       /* tc filter add dev src egress bpf da egress_host */
+       XGRESS_FILTER_ADD(&qdisc_src, BPF_TC_EGRESS, skel->progs.egress_host, 0);
        close_netns(nstoken);
 
        /* setup ns_dst tc progs */
        nstoken = open_netns(NS_DST);
        if (!ASSERT_OK_PTR(nstoken, "setns " NS_DST))
                return -1;
-       /* tc qdisc add dev veth_dst clsact */
-       QDISC_CLSACT_CREATE(&qdisc_veth_dst, setup_result->ifindex_veth_dst);
-       /* tc filter add dev veth_dst ingress bpf da ingress_host */
-       XGRESS_FILTER_ADD(&qdisc_veth_dst, BPF_TC_INGRESS, skel->progs.ingress_host, 0);
-       /* tc filter add dev veth_dst egress bpf da egress_host */
-       XGRESS_FILTER_ADD(&qdisc_veth_dst, BPF_TC_EGRESS, skel->progs.egress_host, 0);
+       /* tc qdisc add dev dst clsact */
+       QDISC_CLSACT_CREATE(&qdisc_dst, setup_result->ifindex_dst);
+       /* tc filter add dev dst ingress bpf da ingress_host */
+       XGRESS_FILTER_ADD(&qdisc_dst, BPF_TC_INGRESS, skel->progs.ingress_host, 0);
+       /* tc filter add dev dst egress bpf da egress_host */
+       XGRESS_FILTER_ADD(&qdisc_dst, BPF_TC_EGRESS, skel->progs.egress_host, 0);
        close_netns(nstoken);
 
        /* setup ns_fwd tc progs */
        nstoken = open_netns(NS_FWD);
        if (!ASSERT_OK_PTR(nstoken, "setns " NS_FWD))
                return -1;
-       /* tc qdisc add dev veth_dst_fwd clsact */
-       QDISC_CLSACT_CREATE(&qdisc_veth_dst_fwd, setup_result->ifindex_veth_dst_fwd);
-       /* tc filter add dev veth_dst_fwd ingress prio 100 bpf da ingress_fwdns_prio100 */
-       XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_INGRESS,
+       /* tc qdisc add dev dst_fwd clsact */
+       QDISC_CLSACT_CREATE(&qdisc_dst_fwd, setup_result->ifindex_dst_fwd);
+       /* tc filter add dev dst_fwd ingress prio 100 bpf da ingress_fwdns_prio100 */
+       XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS,
                          skel->progs.ingress_fwdns_prio100, 100);
-       /* tc filter add dev veth_dst_fwd ingress prio 101 bpf da ingress_fwdns_prio101 */
-       XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_INGRESS,
+       /* tc filter add dev dst_fwd ingress prio 101 bpf da ingress_fwdns_prio101 */
+       XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS,
                          skel->progs.ingress_fwdns_prio101, 101);
-       /* tc filter add dev veth_dst_fwd egress prio 100 bpf da egress_fwdns_prio100 */
-       XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_EGRESS,
+       /* tc filter add dev dst_fwd egress prio 100 bpf da egress_fwdns_prio100 */
+       XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS,
                          skel->progs.egress_fwdns_prio100, 100);
-       /* tc filter add dev veth_dst_fwd egress prio 101 bpf da egress_fwdns_prio101 */
-       XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_EGRESS,
+       /* tc filter add dev dst_fwd egress prio 101 bpf da egress_fwdns_prio101 */
+       XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS,
                          skel->progs.egress_fwdns_prio101, 101);
 
-       /* tc qdisc add dev veth_src_fwd clsact */
-       QDISC_CLSACT_CREATE(&qdisc_veth_src_fwd, setup_result->ifindex_veth_src_fwd);
-       /* tc filter add dev veth_src_fwd ingress prio 100 bpf da ingress_fwdns_prio100 */
-       XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_INGRESS,
+       /* tc qdisc add dev src_fwd clsact */
+       QDISC_CLSACT_CREATE(&qdisc_src_fwd, setup_result->ifindex_src_fwd);
+       /* tc filter add dev src_fwd ingress prio 100 bpf da ingress_fwdns_prio100 */
+       XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_INGRESS,
                          skel->progs.ingress_fwdns_prio100, 100);
-       /* tc filter add dev veth_src_fwd ingress prio 101 bpf da ingress_fwdns_prio101 */
-       XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_INGRESS,
+       /* tc filter add dev src_fwd ingress prio 101 bpf da ingress_fwdns_prio101 */
+       XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_INGRESS,
                          skel->progs.ingress_fwdns_prio101, 101);
-       /* tc filter add dev veth_src_fwd egress prio 100 bpf da egress_fwdns_prio100 */
-       XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_EGRESS,
+       /* tc filter add dev src_fwd egress prio 100 bpf da egress_fwdns_prio100 */
+       XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_EGRESS,
                          skel->progs.egress_fwdns_prio100, 100);
-       /* tc filter add dev veth_src_fwd egress prio 101 bpf da egress_fwdns_prio101 */
-       XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_EGRESS,
+       /* tc filter add dev src_fwd egress prio 101 bpf da egress_fwdns_prio101 */
+       XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_EGRESS,
                          skel->progs.egress_fwdns_prio101, 101);
        close_netns(nstoken);
        return 0;
@@ -777,8 +838,8 @@ static void test_tc_redirect_dtime(struct netns_setup_result *setup_result)
        if (!ASSERT_OK_PTR(skel, "test_tc_dtime__open"))
                return;
 
-       skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd;
-       skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
+       skel->rodata->IFINDEX_SRC = setup_result->ifindex_src_fwd;
+       skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd;
 
        err = test_tc_dtime__load(skel);
        if (!ASSERT_OK(err, "test_tc_dtime__load"))
@@ -868,8 +929,8 @@ static void test_tc_redirect_neigh(struct netns_setup_result *setup_result)
        if (!ASSERT_OK_PTR(skel, "test_tc_neigh__open"))
                goto done;
 
-       skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd;
-       skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
+       skel->rodata->IFINDEX_SRC = setup_result->ifindex_src_fwd;
+       skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd;
 
        err = test_tc_neigh__load(skel);
        if (!ASSERT_OK(err, "test_tc_neigh__load"))
@@ -904,8 +965,8 @@ static void test_tc_redirect_peer(struct netns_setup_result *setup_result)
        if (!ASSERT_OK_PTR(skel, "test_tc_peer__open"))
                goto done;
 
-       skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd;
-       skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
+       skel->rodata->IFINDEX_SRC = setup_result->ifindex_src_fwd;
+       skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd;
 
        err = test_tc_peer__load(skel);
        if (!ASSERT_OK(err, "test_tc_peer__load"))
@@ -996,7 +1057,7 @@ static int tun_relay_loop(int src_fd, int target_fd)
 static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result)
 {
        LIBBPF_OPTS(bpf_tc_hook, qdisc_tun_fwd);
-       LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_dst_fwd);
+       LIBBPF_OPTS(bpf_tc_hook, qdisc_dst_fwd);
        struct test_tc_peer *skel = NULL;
        struct nstoken *nstoken = NULL;
        int err;
@@ -1045,7 +1106,7 @@ static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result)
                goto fail;
 
        skel->rodata->IFINDEX_SRC = ifindex;
-       skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
+       skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd;
 
        err = test_tc_peer__load(skel);
        if (!ASSERT_OK(err, "test_tc_peer__load"))
@@ -1053,19 +1114,19 @@ static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result)
 
        /* Load "tc_src_l3" to the tun_fwd interface to redirect packets
         * towards dst, and "tc_dst" to redirect packets
-        * and "tc_chk" on veth_dst_fwd to drop non-redirected packets.
+        * and "tc_chk" on dst_fwd to drop non-redirected packets.
         */
        /* tc qdisc add dev tun_fwd clsact */
        QDISC_CLSACT_CREATE(&qdisc_tun_fwd, ifindex);
        /* tc filter add dev tun_fwd ingress bpf da tc_src_l3 */
        XGRESS_FILTER_ADD(&qdisc_tun_fwd, BPF_TC_INGRESS, skel->progs.tc_src_l3, 0);
 
-       /* tc qdisc add dev veth_dst_fwd clsact */
-       QDISC_CLSACT_CREATE(&qdisc_veth_dst_fwd, setup_result->ifindex_veth_dst_fwd);
-       /* tc filter add dev veth_dst_fwd ingress bpf da tc_dst_l3 */
-       XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_INGRESS, skel->progs.tc_dst_l3, 0);
-       /* tc filter add dev veth_dst_fwd egress bpf da tc_chk */
-       XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_EGRESS, skel->progs.tc_chk, 0);
+       /* tc qdisc add dev dst_fwd clsact */
+       QDISC_CLSACT_CREATE(&qdisc_dst_fwd, setup_result->ifindex_dst_fwd);
+       /* tc filter add dev dst_fwd ingress bpf da tc_dst_l3 */
+       XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS, skel->progs.tc_dst_l3, 0);
+       /* tc filter add dev dst_fwd egress bpf da tc_chk */
+       XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS, skel->progs.tc_chk, 0);
 
        /* Setup route and neigh tables */
        SYS(fail, "ip -netns " NS_SRC " addr add dev tun_src " IP4_TUN_SRC "/24");
@@ -1074,17 +1135,17 @@ static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result)
        SYS(fail, "ip -netns " NS_SRC " addr add dev tun_src " IP6_TUN_SRC "/64 nodad");
        SYS(fail, "ip -netns " NS_FWD " addr add dev tun_fwd " IP6_TUN_FWD "/64 nodad");
 
-       SYS(fail, "ip -netns " NS_SRC " route del " IP4_DST "/32 dev veth_src scope global");
+       SYS(fail, "ip -netns " NS_SRC " route del " IP4_DST "/32 dev src scope global");
        SYS(fail, "ip -netns " NS_SRC " route add " IP4_DST "/32 via " IP4_TUN_FWD
            " dev tun_src scope global");
-       SYS(fail, "ip -netns " NS_DST " route add " IP4_TUN_SRC "/32 dev veth_dst scope global");
-       SYS(fail, "ip -netns " NS_SRC " route del " IP6_DST "/128 dev veth_src scope global");
+       SYS(fail, "ip -netns " NS_DST " route add " IP4_TUN_SRC "/32 dev dst scope global");
+       SYS(fail, "ip -netns " NS_SRC " route del " IP6_DST "/128 dev src scope global");
        SYS(fail, "ip -netns " NS_SRC " route add " IP6_DST "/128 via " IP6_TUN_FWD
            " dev tun_src scope global");
-       SYS(fail, "ip -netns " NS_DST " route add " IP6_TUN_SRC "/128 dev veth_dst scope global");
+       SYS(fail, "ip -netns " NS_DST " route add " IP6_TUN_SRC "/128 dev dst scope global");
 
-       SYS(fail, "ip -netns " NS_DST " neigh add " IP4_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD);
-       SYS(fail, "ip -netns " NS_DST " neigh add " IP6_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD);
+       SYS(fail, "ip -netns " NS_DST " neigh add " IP4_TUN_SRC " dev dst lladdr " MAC_DST_FWD);
+       SYS(fail, "ip -netns " NS_DST " neigh add " IP6_TUN_SRC " dev dst lladdr " MAC_DST_FWD);
 
        if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
                goto fail;
@@ -1106,9 +1167,9 @@ fail:
                close_netns(nstoken);
 }
 
-#define RUN_TEST(name)                                                                      \
+#define RUN_TEST(name, mode)                                                                \
        ({                                                                                  \
-               struct netns_setup_result setup_result;                                     \
+               struct netns_setup_result setup_result = { .dev_mode = mode, };             \
                if (test__start_subtest(#name))                                             \
                        if (ASSERT_OK(netns_setup_namespaces("add"), "setup namespaces")) { \
                                if (ASSERT_OK(netns_setup_links_and_routes(&setup_result),  \
@@ -1122,11 +1183,13 @@ static void *test_tc_redirect_run_tests(void *arg)
 {
        netns_setup_namespaces_nofail("delete");
 
-       RUN_TEST(tc_redirect_peer);
-       RUN_TEST(tc_redirect_peer_l3);
-       RUN_TEST(tc_redirect_neigh);
-       RUN_TEST(tc_redirect_neigh_fib);
-       RUN_TEST(tc_redirect_dtime);
+       RUN_TEST(tc_redirect_peer, MODE_VETH);
+       RUN_TEST(tc_redirect_peer, MODE_NETKIT);
+       RUN_TEST(tc_redirect_peer_l3, MODE_VETH);
+       RUN_TEST(tc_redirect_peer_l3, MODE_NETKIT);
+       RUN_TEST(tc_redirect_neigh, MODE_VETH);
+       RUN_TEST(tc_redirect_neigh_fib, MODE_VETH);
+       RUN_TEST(tc_redirect_dtime, MODE_VETH);
        return NULL;
 }
 
index e5c61aa..5cfa7a6 100644 (file)
@@ -31,6 +31,7 @@
 #include "verifier_helper_restricted.skel.h"
 #include "verifier_helper_value_access.skel.h"
 #include "verifier_int_ptr.skel.h"
+#include "verifier_iterating_callbacks.skel.h"
 #include "verifier_jeq_infer_not_null.skel.h"
 #include "verifier_ld_ind.skel.h"
 #include "verifier_ldsx.skel.h"
@@ -139,6 +140,7 @@ void test_verifier_helper_packet_access(void) { RUN(verifier_helper_packet_acces
 void test_verifier_helper_restricted(void)    { RUN(verifier_helper_restricted); }
 void test_verifier_helper_value_access(void)  { RUN(verifier_helper_value_access); }
 void test_verifier_int_ptr(void)              { RUN(verifier_int_ptr); }
+void test_verifier_iterating_callbacks(void)  { RUN(verifier_iterating_callbacks); }
 void test_verifier_jeq_infer_not_null(void)   { RUN(verifier_jeq_infer_not_null); }
 void test_verifier_ld_ind(void)               { RUN(verifier_ld_ind); }
 void test_verifier_ldsx(void)                  { RUN(verifier_ldsx); }
index 4ce76eb..d461746 100644 (file)
@@ -15,13 +15,16 @@ static int empty_callback(__u32 index, void *data)
        return 0;
 }
 
+static int outer_loop(__u32 index, void *data)
+{
+       bpf_loop(nr_loops, empty_callback, NULL, 0);
+       __sync_add_and_fetch(&hits, nr_loops);
+       return 0;
+}
+
 SEC("fentry/" SYS_PREFIX "sys_getpgid")
 int benchmark(void *ctx)
 {
-       for (int i = 0; i < 1000; i++) {
-               bpf_loop(nr_loops, empty_callback, NULL, 0);
-
-               __sync_add_and_fetch(&hits, nr_loops);
-       }
+       bpf_loop(1000, outer_loop, NULL, 0);
        return 0;
 }
index 76d661b..56c764d 100644 (file)
@@ -33,6 +33,7 @@ int underflow_prog(void *ctx)
        if (!p)
                return 0;
        bpf_for_each_map_elem(&array_map, cb1, &p, 0);
+       bpf_kfunc_call_test_release(p);
        return 0;
 }
 
index 4c39e92..8c0ef27 100644 (file)
@@ -171,6 +171,7 @@ int reject_with_rbtree_add_throw(void *ctx)
                return 0;
        bpf_spin_lock(&lock);
        bpf_rbtree_add(&rbtree, &f->node, rbless);
+       bpf_spin_unlock(&lock);
        return 0;
 }
 
@@ -214,6 +215,7 @@ int reject_with_cb_reference(void *ctx)
        if (!f)
                return 0;
        bpf_loop(5, subprog_cb_ref, NULL, 0);
+       bpf_obj_drop(f);
        return 0;
 }
 
index e02cfd3..40df2cc 100644 (file)
@@ -24,9 +24,11 @@ struct task_struct {};
 #define STACK_TABLE_EPOCH_SHIFT 20
 #define STROBE_MAX_STR_LEN 1
 #define STROBE_MAX_CFGS 32
+#define READ_MAP_VAR_PAYLOAD_CAP                                       \
+       ((1 + STROBE_MAX_MAP_ENTRIES * 2) * STROBE_MAX_STR_LEN)
 #define STROBE_MAX_PAYLOAD                                             \
        (STROBE_MAX_STRS * STROBE_MAX_STR_LEN +                         \
-       STROBE_MAX_MAPS * (1 + STROBE_MAX_MAP_ENTRIES * 2) * STROBE_MAX_STR_LEN)
+        STROBE_MAX_MAPS * READ_MAP_VAR_PAYLOAD_CAP)
 
 struct strobe_value_header {
        /*
@@ -355,7 +357,7 @@ static __always_inline uint64_t read_str_var(struct strobemeta_cfg *cfg,
                                             size_t idx, void *tls_base,
                                             struct strobe_value_generic *value,
                                             struct strobemeta_payload *data,
-                                            void *payload)
+                                            size_t off)
 {
        void *location;
        uint64_t len;
@@ -366,7 +368,7 @@ static __always_inline uint64_t read_str_var(struct strobemeta_cfg *cfg,
                return 0;
 
        bpf_probe_read_user(value, sizeof(struct strobe_value_generic), location);
-       len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN, value->ptr);
+       len = bpf_probe_read_user_str(&data->payload[off], STROBE_MAX_STR_LEN, value->ptr);
        /*
         * if bpf_probe_read_user_str returns error (<0), due to casting to
         * unsinged int, it will become big number, so next check is
@@ -378,14 +380,14 @@ static __always_inline uint64_t read_str_var(struct strobemeta_cfg *cfg,
                return 0;
 
        data->str_lens[idx] = len;
-       return len;
+       return off + len;
 }
 
-static __always_inline void *read_map_var(struct strobemeta_cfg *cfg,
-                                         size_t idx, void *tls_base,
-                                         struct strobe_value_generic *value,
-                                         struct strobemeta_payload *data,
-                                         void *payload)
+static __always_inline uint64_t read_map_var(struct strobemeta_cfg *cfg,
+                                            size_t idx, void *tls_base,
+                                            struct strobe_value_generic *value,
+                                            struct strobemeta_payload *data,
+                                            size_t off)
 {
        struct strobe_map_descr* descr = &data->map_descrs[idx];
        struct strobe_map_raw map;
@@ -397,11 +399,11 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg,
 
        location = calc_location(&cfg->map_locs[idx], tls_base);
        if (!location)
-               return payload;
+               return off;
 
        bpf_probe_read_user(value, sizeof(struct strobe_value_generic), location);
        if (bpf_probe_read_user(&map, sizeof(struct strobe_map_raw), value->ptr))
-               return payload;
+               return off;
 
        descr->id = map.id;
        descr->cnt = map.cnt;
@@ -410,10 +412,10 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg,
                data->req_meta_valid = 1;
        }
 
-       len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN, map.tag);
+       len = bpf_probe_read_user_str(&data->payload[off], STROBE_MAX_STR_LEN, map.tag);
        if (len <= STROBE_MAX_STR_LEN) {
                descr->tag_len = len;
-               payload += len;
+               off += len;
        }
 
 #ifdef NO_UNROLL
@@ -426,22 +428,22 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg,
                        break;
 
                descr->key_lens[i] = 0;
-               len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN,
+               len = bpf_probe_read_user_str(&data->payload[off], STROBE_MAX_STR_LEN,
                                              map.entries[i].key);
                if (len <= STROBE_MAX_STR_LEN) {
                        descr->key_lens[i] = len;
-                       payload += len;
+                       off += len;
                }
                descr->val_lens[i] = 0;
-               len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN,
+               len = bpf_probe_read_user_str(&data->payload[off], STROBE_MAX_STR_LEN,
                                              map.entries[i].val);
                if (len <= STROBE_MAX_STR_LEN) {
                        descr->val_lens[i] = len;
-                       payload += len;
+                       off += len;
                }
        }
 
-       return payload;
+       return off;
 }
 
 #ifdef USE_BPF_LOOP
@@ -455,14 +457,20 @@ struct read_var_ctx {
        struct strobemeta_payload *data;
        void *tls_base;
        struct strobemeta_cfg *cfg;
-       void *payload;
+       size_t payload_off;
        /* value gets mutated */
        struct strobe_value_generic *value;
        enum read_type type;
 };
 
-static int read_var_callback(__u32 index, struct read_var_ctx *ctx)
+static int read_var_callback(__u64 index, struct read_var_ctx *ctx)
 {
+       /* lose precision info for ctx->payload_off, verifier won't track
+        * double xor, barrier_var() is needed to force clang keep both xors.
+        */
+       ctx->payload_off ^= index;
+       barrier_var(ctx->payload_off);
+       ctx->payload_off ^= index;
        switch (ctx->type) {
        case READ_INT_VAR:
                if (index >= STROBE_MAX_INTS)
@@ -472,14 +480,18 @@ static int read_var_callback(__u32 index, struct read_var_ctx *ctx)
        case READ_MAP_VAR:
                if (index >= STROBE_MAX_MAPS)
                        return 1;
-               ctx->payload = read_map_var(ctx->cfg, index, ctx->tls_base,
-                                           ctx->value, ctx->data, ctx->payload);
+               if (ctx->payload_off > sizeof(ctx->data->payload) - READ_MAP_VAR_PAYLOAD_CAP)
+                       return 1;
+               ctx->payload_off = read_map_var(ctx->cfg, index, ctx->tls_base,
+                                               ctx->value, ctx->data, ctx->payload_off);
                break;
        case READ_STR_VAR:
                if (index >= STROBE_MAX_STRS)
                        return 1;
-               ctx->payload += read_str_var(ctx->cfg, index, ctx->tls_base,
-                                            ctx->value, ctx->data, ctx->payload);
+               if (ctx->payload_off > sizeof(ctx->data->payload) - STROBE_MAX_STR_LEN)
+                       return 1;
+               ctx->payload_off = read_str_var(ctx->cfg, index, ctx->tls_base,
+                                               ctx->value, ctx->data, ctx->payload_off);
                break;
        }
        return 0;
@@ -501,7 +513,8 @@ static void *read_strobe_meta(struct task_struct *task,
        pid_t pid = bpf_get_current_pid_tgid() >> 32;
        struct strobe_value_generic value = {0};
        struct strobemeta_cfg *cfg;
-       void *tls_base, *payload;
+       size_t payload_off;
+       void *tls_base;
 
        cfg = bpf_map_lookup_elem(&strobemeta_cfgs, &pid);
        if (!cfg)
@@ -509,7 +522,7 @@ static void *read_strobe_meta(struct task_struct *task,
 
        data->int_vals_set_mask = 0;
        data->req_meta_valid = 0;
-       payload = data->payload;
+       payload_off = 0;
        /*
         * we don't have struct task_struct definition, it should be:
         * tls_base = (void *)task->thread.fsbase;
@@ -522,7 +535,7 @@ static void *read_strobe_meta(struct task_struct *task,
                .tls_base = tls_base,
                .value = &value,
                .data = data,
-               .payload = payload,
+               .payload_off = 0,
        };
        int err;
 
@@ -540,6 +553,11 @@ static void *read_strobe_meta(struct task_struct *task,
        err = bpf_loop(STROBE_MAX_MAPS, read_var_callback, &ctx, 0);
        if (err != STROBE_MAX_MAPS)
                return NULL;
+
+       payload_off = ctx.payload_off;
+       /* this should not really happen, here only to satisfy verifer */
+       if (payload_off > sizeof(data->payload))
+               payload_off = sizeof(data->payload);
 #else
 #ifdef NO_UNROLL
 #pragma clang loop unroll(disable)
@@ -555,7 +573,7 @@ static void *read_strobe_meta(struct task_struct *task,
 #pragma unroll
 #endif /* NO_UNROLL */
        for (int i = 0; i < STROBE_MAX_STRS; ++i) {
-               payload += read_str_var(cfg, i, tls_base, &value, data, payload);
+               payload_off = read_str_var(cfg, i, tls_base, &value, data, payload_off);
        }
 #ifdef NO_UNROLL
 #pragma clang loop unroll(disable)
@@ -563,7 +581,7 @@ static void *read_strobe_meta(struct task_struct *task,
 #pragma unroll
 #endif /* NO_UNROLL */
        for (int i = 0; i < STROBE_MAX_MAPS; ++i) {
-               payload = read_map_var(cfg, i, tls_base, &value, data, payload);
+               payload_off = read_map_var(cfg, i, tls_base, &value, data, payload_off);
        }
 #endif /* USE_BPF_LOOP */
 
@@ -571,7 +589,7 @@ static void *read_strobe_meta(struct task_struct *task,
         * return pointer right after end of payload, so it's possible to
         * calculate exact amount of useful data that needs to be sent
         */
-       return payload;
+       return &data->payload[payload_off];
 }
 
 SEC("raw_tracepoint/kfree_skb")
index df7697b..c1f55e1 100644 (file)
@@ -97,4 +97,66 @@ l0_%=:       r2 = r0;                                        \
 "      ::: __clobber_all);
 }
 
+SEC("socket")
+__description("conditional loop (2)")
+__success
+__failure_unpriv __msg_unpriv("back-edge from insn 10 to 11")
+__naked void conditional_loop2(void)
+{
+       asm volatile ("                                 \
+       r9 = 2 ll;                                      \
+       r3 = 0x20 ll;                                   \
+       r4 = 0x35 ll;                                   \
+       r8 = r4;                                        \
+       goto l1_%=;                                     \
+l0_%=: r9 -= r3;                                       \
+       r9 -= r4;                                       \
+       r9 -= r8;                                       \
+l1_%=: r8 += r4;                                       \
+       if r8 < 0x64 goto l0_%=;                        \
+       r0 = r9;                                        \
+       exit;                                           \
+"      ::: __clobber_all);
+}
+
+SEC("socket")
+__description("unconditional loop after conditional jump")
+__failure __msg("infinite loop detected")
+__failure_unpriv __msg_unpriv("back-edge from insn 3 to 2")
+__naked void uncond_loop_after_cond_jmp(void)
+{
+       asm volatile ("                                 \
+       r0 = 0;                                         \
+       if r0 > 0 goto l1_%=;                           \
+l0_%=: r0 = 1;                                         \
+       goto l0_%=;                                     \
+l1_%=: exit;                                           \
+"      ::: __clobber_all);
+}
+
+
+__naked __noinline __used
+static unsigned long never_ending_subprog()
+{
+       asm volatile ("                                 \
+       r0 = r1;                                        \
+       goto -1;                                        \
+"      ::: __clobber_all);
+}
+
+SEC("socket")
+__description("unconditional loop after conditional jump")
+/* infinite loop is detected *after* check_cfg() */
+__failure __msg("infinite loop detected")
+__naked void uncond_loop_in_subprog_after_cond_jmp(void)
+{
+       asm volatile ("                                 \
+       r0 = 0;                                         \
+       if r0 > 0 goto l1_%=;                           \
+l0_%=: r0 += 1;                                        \
+       call never_ending_subprog;                      \
+l1_%=: exit;                                           \
+"      ::: __clobber_all);
+}
+
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c
new file mode 100644 (file)
index 0000000..5905e03
--- /dev/null
@@ -0,0 +1,242 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+struct {
+       __uint(type, BPF_MAP_TYPE_ARRAY);
+       __uint(max_entries, 8);
+       __type(key, __u32);
+       __type(value, __u64);
+} map SEC(".maps");
+
+struct {
+       __uint(type, BPF_MAP_TYPE_USER_RINGBUF);
+       __uint(max_entries, 8);
+} ringbuf SEC(".maps");
+
+struct vm_area_struct;
+struct bpf_map;
+
+struct buf_context {
+       char *buf;
+};
+
+struct num_context {
+       __u64 i;
+       __u64 j;
+};
+
+__u8 choice_arr[2] = { 0, 1 };
+
+static int unsafe_on_2nd_iter_cb(__u32 idx, struct buf_context *ctx)
+{
+       if (idx == 0) {
+               ctx->buf = (char *)(0xDEAD);
+               return 0;
+       }
+
+       if (bpf_probe_read_user(ctx->buf, 8, (void *)(0xBADC0FFEE)))
+               return 1;
+
+       return 0;
+}
+
+SEC("?raw_tp")
+__failure __msg("R1 type=scalar expected=fp")
+int unsafe_on_2nd_iter(void *unused)
+{
+       char buf[4];
+       struct buf_context loop_ctx = { .buf = buf };
+
+       bpf_loop(100, unsafe_on_2nd_iter_cb, &loop_ctx, 0);
+       return 0;
+}
+
+static int unsafe_on_zero_iter_cb(__u32 idx, struct num_context *ctx)
+{
+       ctx->i = 0;
+       return 0;
+}
+
+SEC("?raw_tp")
+__failure __msg("invalid access to map value, value_size=2 off=32 size=1")
+int unsafe_on_zero_iter(void *unused)
+{
+       struct num_context loop_ctx = { .i = 32 };
+
+       bpf_loop(100, unsafe_on_zero_iter_cb, &loop_ctx, 0);
+       return choice_arr[loop_ctx.i];
+}
+
+static int widening_cb(__u32 idx, struct num_context *ctx)
+{
+       ++ctx->i;
+       return 0;
+}
+
+SEC("?raw_tp")
+__success
+int widening(void *unused)
+{
+       struct num_context loop_ctx = { .i = 0, .j = 1 };
+
+       bpf_loop(100, widening_cb, &loop_ctx, 0);
+       /* loop_ctx.j is not changed during callback iteration,
+        * verifier should not apply widening to it.
+        */
+       return choice_arr[loop_ctx.j];
+}
+
+static int loop_detection_cb(__u32 idx, struct num_context *ctx)
+{
+       for (;;) {}
+       return 0;
+}
+
+SEC("?raw_tp")
+__failure __msg("infinite loop detected")
+int loop_detection(void *unused)
+{
+       struct num_context loop_ctx = { .i = 0 };
+
+       bpf_loop(100, loop_detection_cb, &loop_ctx, 0);
+       return 0;
+}
+
+static __always_inline __u64 oob_state_machine(struct num_context *ctx)
+{
+       switch (ctx->i) {
+       case 0:
+               ctx->i = 1;
+               break;
+       case 1:
+               ctx->i = 32;
+               break;
+       }
+       return 0;
+}
+
+static __u64 for_each_map_elem_cb(struct bpf_map *map, __u32 *key, __u64 *val, void *data)
+{
+       return oob_state_machine(data);
+}
+
+SEC("?raw_tp")
+__failure __msg("invalid access to map value, value_size=2 off=32 size=1")
+int unsafe_for_each_map_elem(void *unused)
+{
+       struct num_context loop_ctx = { .i = 0 };
+
+       bpf_for_each_map_elem(&map, for_each_map_elem_cb, &loop_ctx, 0);
+       return choice_arr[loop_ctx.i];
+}
+
+static __u64 ringbuf_drain_cb(struct bpf_dynptr *dynptr, void *data)
+{
+       return oob_state_machine(data);
+}
+
+SEC("?raw_tp")
+__failure __msg("invalid access to map value, value_size=2 off=32 size=1")
+int unsafe_ringbuf_drain(void *unused)
+{
+       struct num_context loop_ctx = { .i = 0 };
+
+       bpf_user_ringbuf_drain(&ringbuf, ringbuf_drain_cb, &loop_ctx, 0);
+       return choice_arr[loop_ctx.i];
+}
+
+static __u64 find_vma_cb(struct task_struct *task, struct vm_area_struct *vma, void *data)
+{
+       return oob_state_machine(data);
+}
+
+SEC("?raw_tp")
+__failure __msg("invalid access to map value, value_size=2 off=32 size=1")
+int unsafe_find_vma(void *unused)
+{
+       struct task_struct *task = bpf_get_current_task_btf();
+       struct num_context loop_ctx = { .i = 0 };
+
+       bpf_find_vma(task, 0, find_vma_cb, &loop_ctx, 0);
+       return choice_arr[loop_ctx.i];
+}
+
+static int iter_limit_cb(__u32 idx, struct num_context *ctx)
+{
+       ctx->i++;
+       return 0;
+}
+
+SEC("?raw_tp")
+__success
+int bpf_loop_iter_limit_ok(void *unused)
+{
+       struct num_context ctx = { .i = 0 };
+
+       bpf_loop(1, iter_limit_cb, &ctx, 0);
+       return choice_arr[ctx.i];
+}
+
+SEC("?raw_tp")
+__failure __msg("invalid access to map value, value_size=2 off=2 size=1")
+int bpf_loop_iter_limit_overflow(void *unused)
+{
+       struct num_context ctx = { .i = 0 };
+
+       bpf_loop(2, iter_limit_cb, &ctx, 0);
+       return choice_arr[ctx.i];
+}
+
+static int iter_limit_level2a_cb(__u32 idx, struct num_context *ctx)
+{
+       ctx->i += 100;
+       return 0;
+}
+
+static int iter_limit_level2b_cb(__u32 idx, struct num_context *ctx)
+{
+       ctx->i += 10;
+       return 0;
+}
+
+static int iter_limit_level1_cb(__u32 idx, struct num_context *ctx)
+{
+       ctx->i += 1;
+       bpf_loop(1, iter_limit_level2a_cb, ctx, 0);
+       bpf_loop(1, iter_limit_level2b_cb, ctx, 0);
+       return 0;
+}
+
+/* Check that path visiting every callback function once had been
+ * reached by verifier. Variables 'ctx{1,2}i' below serve as flags,
+ * with each decimal digit corresponding to a callback visit marker.
+ */
+SEC("socket")
+__success __retval(111111)
+int bpf_loop_iter_limit_nested(void *unused)
+{
+       struct num_context ctx1 = { .i = 0 };
+       struct num_context ctx2 = { .i = 0 };
+       __u64 a, b, c;
+
+       bpf_loop(1, iter_limit_level1_cb, &ctx1, 0);
+       bpf_loop(1, iter_limit_level1_cb, &ctx2, 0);
+       a = ctx1.i;
+       b = ctx2.i;
+       /* Force 'ctx1.i' and 'ctx2.i' precise. */
+       c = choice_arr[(a + b) % 2];
+       /* This makes 'c' zero, but neither clang nor verifier know it. */
+       c /= 10;
+       /* Make sure that verifier does not visit 'impossible' states:
+        * enumerate all possible callback visit masks.
+        */
+       if (a != 0 && a != 1 && a != 11 && a != 101 && a != 111 &&
+           b != 0 && b != 1 && b != 11 && b != 101 && b != 111)
+               asm volatile ("r0 /= 0;" ::: "r0");
+       return 1000 * a + b + c;
+}
+
+char _license[] SEC("license") = "GPL";
index 5bc86af..71735db 100644 (file)
@@ -75,9 +75,10 @@ l0_%=:       r0 += 1;                                        \
 "      ::: __clobber_all);
 }
 
-SEC("tracepoint")
+SEC("socket")
 __description("bounded loop, start in the middle")
-__failure __msg("back-edge")
+__success
+__failure_unpriv __msg_unpriv("back-edge")
 __naked void loop_start_in_the_middle(void)
 {
        asm volatile ("                                 \
@@ -136,7 +137,9 @@ l0_%=:      exit;                                           \
 
 SEC("tracepoint")
 __description("bounded recursion")
-__failure __msg("back-edge")
+__failure
+/* verifier limitation in detecting max stack depth */
+__msg("the call stack of 8 frames is too deep !")
 __naked void bounded_recursion(void)
 {
        asm volatile ("                                 \
index 193c0f8..6b564d4 100644 (file)
@@ -91,3 +91,43 @@ __naked int bpf_end_bswap(void)
 }
 
 #endif /* v4 instruction */
+
+SEC("?raw_tp")
+__success __log_level(2)
+/*
+ * Without the bug fix there will be no history between "last_idx 3 first_idx 3"
+ * and "parent state regs=" lines. "R0_w=6" parts are here to help anchor
+ * expected log messages to the one specific mark_chain_precision operation.
+ *
+ * This is quite fragile: if verifier checkpointing heuristic changes, this
+ * might need adjusting.
+ */
+__msg("2: (07) r0 += 1                       ; R0_w=6")
+__msg("3: (35) if r0 >= 0xa goto pc+1")
+__msg("mark_precise: frame0: last_idx 3 first_idx 3 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r0 stack= before 2: (07) r0 += 1")
+__msg("mark_precise: frame0: regs=r0 stack= before 1: (07) r0 += 1")
+__msg("mark_precise: frame0: regs=r0 stack= before 4: (05) goto pc-4")
+__msg("mark_precise: frame0: regs=r0 stack= before 3: (35) if r0 >= 0xa goto pc+1")
+__msg("mark_precise: frame0: parent state regs= stack=:  R0_rw=P4")
+__msg("3: R0_w=6")
+__naked int state_loop_first_last_equal(void)
+{
+       asm volatile (
+               "r0 = 0;"
+       "l0_%=:"
+               "r0 += 1;"
+               "r0 += 1;"
+               /* every few iterations we'll have a checkpoint here with
+                * first_idx == last_idx, potentially confusing precision
+                * backtracking logic
+                */
+               "if r0 >= 10 goto l1_%=;"       /* checkpoint + mark_precise */
+               "goto l0_%=;"
+       "l1_%=:"
+               "exit;"
+               ::: __clobber_common
+       );
+}
+
+char _license[] SEC("license") = "GPL";
index db6b314..f61d623 100644 (file)
@@ -119,15 +119,41 @@ __naked int global_subprog_result_precise(void)
 
 SEC("?raw_tp")
 __success __log_level(2)
+/* First simulated path does not include callback body,
+ * r1 and r4 are always precise for bpf_loop() calls.
+ */
+__msg("9: (85) call bpf_loop#181")
+__msg("mark_precise: frame0: last_idx 9 first_idx 9 subseq_idx -1")
+__msg("mark_precise: frame0: parent state regs=r4 stack=:")
+__msg("mark_precise: frame0: last_idx 8 first_idx 0 subseq_idx 9")
+__msg("mark_precise: frame0: regs=r4 stack= before 8: (b7) r4 = 0")
+__msg("mark_precise: frame0: last_idx 9 first_idx 9 subseq_idx -1")
+__msg("mark_precise: frame0: parent state regs=r1 stack=:")
+__msg("mark_precise: frame0: last_idx 8 first_idx 0 subseq_idx 9")
+__msg("mark_precise: frame0: regs=r1 stack= before 8: (b7) r4 = 0")
+__msg("mark_precise: frame0: regs=r1 stack= before 7: (b7) r3 = 0")
+__msg("mark_precise: frame0: regs=r1 stack= before 6: (bf) r2 = r8")
+__msg("mark_precise: frame0: regs=r1 stack= before 5: (bf) r1 = r6")
+__msg("mark_precise: frame0: regs=r6 stack= before 4: (b7) r6 = 3")
+/* r6 precision propagation */
 __msg("14: (0f) r1 += r6")
-__msg("mark_precise: frame0: last_idx 14 first_idx 10")
+__msg("mark_precise: frame0: last_idx 14 first_idx 9")
 __msg("mark_precise: frame0: regs=r6 stack= before 13: (bf) r1 = r7")
 __msg("mark_precise: frame0: regs=r6 stack= before 12: (27) r6 *= 4")
 __msg("mark_precise: frame0: regs=r6 stack= before 11: (25) if r6 > 0x3 goto pc+4")
 __msg("mark_precise: frame0: regs=r6 stack= before 10: (bf) r6 = r0")
-__msg("mark_precise: frame0: parent state regs=r0 stack=:")
-__msg("mark_precise: frame0: last_idx 18 first_idx 0")
-__msg("mark_precise: frame0: regs=r0 stack= before 18: (95) exit")
+__msg("mark_precise: frame0: regs=r0 stack= before 9: (85) call bpf_loop")
+/* State entering callback body popped from states stack */
+__msg("from 9 to 17: frame1:")
+__msg("17: frame1: R1=scalar() R2=0 R10=fp0 cb")
+__msg("17: (b7) r0 = 0")
+__msg("18: (95) exit")
+__msg("returning from callee:")
+__msg("to caller at 9:")
+__msg("frame 0: propagating r1,r4")
+__msg("mark_precise: frame0: last_idx 9 first_idx 9 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r1,r4 stack= before 18: (95) exit")
+__msg("from 18 to 9: safe")
 __naked int callback_result_precise(void)
 {
        asm volatile (
@@ -233,20 +259,36 @@ __naked int parent_callee_saved_reg_precise_global(void)
 
 SEC("?raw_tp")
 __success __log_level(2)
+/* First simulated path does not include callback body */
 __msg("12: (0f) r1 += r6")
-__msg("mark_precise: frame0: last_idx 12 first_idx 10")
+__msg("mark_precise: frame0: last_idx 12 first_idx 9")
 __msg("mark_precise: frame0: regs=r6 stack= before 11: (bf) r1 = r7")
 __msg("mark_precise: frame0: regs=r6 stack= before 10: (27) r6 *= 4")
+__msg("mark_precise: frame0: regs=r6 stack= before 9: (85) call bpf_loop")
 __msg("mark_precise: frame0: parent state regs=r6 stack=:")
-__msg("mark_precise: frame0: last_idx 16 first_idx 0")
-__msg("mark_precise: frame0: regs=r6 stack= before 16: (95) exit")
-__msg("mark_precise: frame1: regs= stack= before 15: (b7) r0 = 0")
-__msg("mark_precise: frame1: regs= stack= before 9: (85) call bpf_loop#181")
+__msg("mark_precise: frame0: last_idx 8 first_idx 0 subseq_idx 9")
 __msg("mark_precise: frame0: regs=r6 stack= before 8: (b7) r4 = 0")
 __msg("mark_precise: frame0: regs=r6 stack= before 7: (b7) r3 = 0")
 __msg("mark_precise: frame0: regs=r6 stack= before 6: (bf) r2 = r8")
 __msg("mark_precise: frame0: regs=r6 stack= before 5: (b7) r1 = 1")
 __msg("mark_precise: frame0: regs=r6 stack= before 4: (b7) r6 = 3")
+/* State entering callback body popped from states stack */
+__msg("from 9 to 15: frame1:")
+__msg("15: frame1: R1=scalar() R2=0 R10=fp0 cb")
+__msg("15: (b7) r0 = 0")
+__msg("16: (95) exit")
+__msg("returning from callee:")
+__msg("to caller at 9:")
+/* r1, r4 are always precise for bpf_loop(),
+ * r6 was marked before backtracking to callback body.
+ */
+__msg("frame 0: propagating r1,r4,r6")
+__msg("mark_precise: frame0: last_idx 9 first_idx 9 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r1,r4,r6 stack= before 16: (95) exit")
+__msg("mark_precise: frame1: regs= stack= before 15: (b7) r0 = 0")
+__msg("mark_precise: frame1: regs= stack= before 9: (85) call bpf_loop")
+__msg("mark_precise: frame0: parent state regs= stack=:")
+__msg("from 16 to 9: safe")
 __naked int parent_callee_saved_reg_precise_with_callback(void)
 {
        asm volatile (
@@ -373,22 +415,38 @@ __naked int parent_stack_slot_precise_global(void)
 
 SEC("?raw_tp")
 __success __log_level(2)
+/* First simulated path does not include callback body */
 __msg("14: (0f) r1 += r6")
-__msg("mark_precise: frame0: last_idx 14 first_idx 11")
+__msg("mark_precise: frame0: last_idx 14 first_idx 10")
 __msg("mark_precise: frame0: regs=r6 stack= before 13: (bf) r1 = r7")
 __msg("mark_precise: frame0: regs=r6 stack= before 12: (27) r6 *= 4")
 __msg("mark_precise: frame0: regs=r6 stack= before 11: (79) r6 = *(u64 *)(r10 -8)")
+__msg("mark_precise: frame0: regs= stack=-8 before 10: (85) call bpf_loop")
 __msg("mark_precise: frame0: parent state regs= stack=-8:")
-__msg("mark_precise: frame0: last_idx 18 first_idx 0")
-__msg("mark_precise: frame0: regs= stack=-8 before 18: (95) exit")
-__msg("mark_precise: frame1: regs= stack= before 17: (b7) r0 = 0")
-__msg("mark_precise: frame1: regs= stack= before 10: (85) call bpf_loop#181")
+__msg("mark_precise: frame0: last_idx 9 first_idx 0 subseq_idx 10")
 __msg("mark_precise: frame0: regs= stack=-8 before 9: (b7) r4 = 0")
 __msg("mark_precise: frame0: regs= stack=-8 before 8: (b7) r3 = 0")
 __msg("mark_precise: frame0: regs= stack=-8 before 7: (bf) r2 = r8")
 __msg("mark_precise: frame0: regs= stack=-8 before 6: (bf) r1 = r6")
 __msg("mark_precise: frame0: regs= stack=-8 before 5: (7b) *(u64 *)(r10 -8) = r6")
 __msg("mark_precise: frame0: regs=r6 stack= before 4: (b7) r6 = 3")
+/* State entering callback body popped from states stack */
+__msg("from 10 to 17: frame1:")
+__msg("17: frame1: R1=scalar() R2=0 R10=fp0 cb")
+__msg("17: (b7) r0 = 0")
+__msg("18: (95) exit")
+__msg("returning from callee:")
+__msg("to caller at 10:")
+/* r1, r4 are always precise for bpf_loop(),
+ * fp-8 was marked before backtracking to callback body.
+ */
+__msg("frame 0: propagating r1,r4,fp-8")
+__msg("mark_precise: frame0: last_idx 10 first_idx 10 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r1,r4 stack=-8 before 18: (95) exit")
+__msg("mark_precise: frame1: regs= stack= before 17: (b7) r0 = 0")
+__msg("mark_precise: frame1: regs= stack= before 10: (85) call bpf_loop#181")
+__msg("mark_precise: frame0: parent state regs= stack=:")
+__msg("from 18 to 10: safe")
 __naked int parent_stack_slot_precise_with_callback(void)
 {
        asm volatile (
index e959336..80f6206 100644 (file)
@@ -53,6 +53,8 @@
 #define DEFAULT_TTL 64
 #define MAX_ALLOWED_PORTS 8
 
+#define MAX_PACKET_OFF 0xffff
+
 #define swap(a, b) \
        do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0)
 
@@ -183,63 +185,76 @@ static __always_inline __u32 tcp_clock_ms(void)
 }
 
 struct tcpopt_context {
-       __u8 *ptr;
-       __u8 *end;
+       void *data;
        void *data_end;
        __be32 *tsecr;
        __u8 wscale;
        bool option_timestamp;
        bool option_sack;
+       __u32 off;
 };
 
-static int tscookie_tcpopt_parse(struct tcpopt_context *ctx)
+static __always_inline u8 *next(struct tcpopt_context *ctx, __u32 sz)
 {
-       __u8 opcode, opsize;
+       __u64 off = ctx->off;
+       __u8 *data;
 
-       if (ctx->ptr >= ctx->end)
-               return 1;
-       if (ctx->ptr >= ctx->data_end)
-               return 1;
+       /* Verifier forbids access to packet when offset exceeds MAX_PACKET_OFF */
+       if (off > MAX_PACKET_OFF - sz)
+               return NULL;
 
-       opcode = ctx->ptr[0];
+       data = ctx->data + off;
+       barrier_var(data);
+       if (data + sz >= ctx->data_end)
+               return NULL;
 
-       if (opcode == TCPOPT_EOL)
-               return 1;
-       if (opcode == TCPOPT_NOP) {
-               ++ctx->ptr;
-               return 0;
-       }
+       ctx->off += sz;
+       return data;
+}
 
-       if (ctx->ptr + 1 >= ctx->end)
-               return 1;
-       if (ctx->ptr + 1 >= ctx->data_end)
+static int tscookie_tcpopt_parse(struct tcpopt_context *ctx)
+{
+       __u8 *opcode, *opsize, *wscale, *tsecr;
+       __u32 off = ctx->off;
+
+       opcode = next(ctx, 1);
+       if (!opcode)
                return 1;
-       opsize = ctx->ptr[1];
-       if (opsize < 2)
+
+       if (*opcode == TCPOPT_EOL)
                return 1;
+       if (*opcode == TCPOPT_NOP)
+               return 0;
 
-       if (ctx->ptr + opsize > ctx->end)
+       opsize = next(ctx, 1);
+       if (!opsize || *opsize < 2)
                return 1;
 
-       switch (opcode) {
+       switch (*opcode) {
        case TCPOPT_WINDOW:
-               if (opsize == TCPOLEN_WINDOW && ctx->ptr + TCPOLEN_WINDOW <= ctx->data_end)
-                       ctx->wscale = ctx->ptr[2] < TCP_MAX_WSCALE ? ctx->ptr[2] : TCP_MAX_WSCALE;
+               wscale = next(ctx, 1);
+               if (!wscale)
+                       return 1;
+               if (*opsize == TCPOLEN_WINDOW)
+                       ctx->wscale = *wscale < TCP_MAX_WSCALE ? *wscale : TCP_MAX_WSCALE;
                break;
        case TCPOPT_TIMESTAMP:
-               if (opsize == TCPOLEN_TIMESTAMP && ctx->ptr + TCPOLEN_TIMESTAMP <= ctx->data_end) {
+               tsecr = next(ctx, 4);
+               if (!tsecr)
+                       return 1;
+               if (*opsize == TCPOLEN_TIMESTAMP) {
                        ctx->option_timestamp = true;
                        /* Client's tsval becomes our tsecr. */
-                       *ctx->tsecr = get_unaligned((__be32 *)(ctx->ptr + 2));
+                       *ctx->tsecr = get_unaligned((__be32 *)tsecr);
                }
                break;
        case TCPOPT_SACK_PERM:
-               if (opsize == TCPOLEN_SACK_PERM)
+               if (*opsize == TCPOLEN_SACK_PERM)
                        ctx->option_sack = true;
                break;
        }
 
-       ctx->ptr += opsize;
+       ctx->off = off + *opsize;
 
        return 0;
 }
@@ -256,16 +271,21 @@ static int tscookie_tcpopt_parse_batch(__u32 index, void *context)
 
 static __always_inline bool tscookie_init(struct tcphdr *tcp_header,
                                          __u16 tcp_len, __be32 *tsval,
-                                         __be32 *tsecr, void *data_end)
+                                         __be32 *tsecr, void *data, void *data_end)
 {
        struct tcpopt_context loop_ctx = {
-               .ptr = (__u8 *)(tcp_header + 1),
-               .end = (__u8 *)tcp_header + tcp_len,
+               .data = data,
                .data_end = data_end,
                .tsecr = tsecr,
                .wscale = TS_OPT_WSCALE_MASK,
                .option_timestamp = false,
                .option_sack = false,
+               /* Note: currently verifier would track .off as unbound scalar.
+                *       In case if verifier would at some point get smarter and
+                *       compute bounded value for this var, beware that it might
+                *       hinder bpf_loop() convergence validation.
+                */
+               .off = (__u8 *)(tcp_header + 1) - (__u8 *)data,
        };
        u32 cookie;
 
@@ -635,7 +655,7 @@ static __always_inline int syncookie_handle_syn(struct header_pointers *hdr,
        cookie = (__u32)value;
 
        if (tscookie_init((void *)hdr->tcp, hdr->tcp_len,
-                         &tsopt_buf[0], &tsopt_buf[1], data_end))
+                         &tsopt_buf[0], &tsopt_buf[1], data, data_end))
                tsopt = tsopt_buf;
 
        /* Check that there is enough space for a SYNACK. It also covers
index 1bdf2b4..3d5cd51 100644 (file)
        BPF_EXIT_INSN(),
        },
        .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-       .errstr = "back-edge from insn 0 to 0",
+       .errstr = "the call stack of 9 frames is too deep",
        .result = REJECT,
 },
 {
        BPF_EXIT_INSN(),
        },
        .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-       .errstr = "back-edge",
+       .errstr = "the call stack of 9 frames is too deep",
        .result = REJECT,
 },
 {
        BPF_EXIT_INSN(),
        },
        .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-       .errstr = "back-edge",
+       .errstr = "the call stack of 9 frames is too deep",
        .result = REJECT,
 },
 {
index f929790..78f19c2 100644 (file)
@@ -9,8 +9,8 @@
        BPF_MOV64_IMM(BPF_REG_0, 2),
        BPF_EXIT_INSN(),
        },
-       .errstr = "invalid BPF_LD_IMM insn",
-       .errstr_unpriv = "R1 pointer comparison",
+       .errstr = "jump into the middle of ldimm64 insn 1",
+       .errstr_unpriv = "jump into the middle of ldimm64 insn 1",
        .result = REJECT,
 },
 {
@@ -23,8 +23,8 @@
        BPF_LD_IMM64(BPF_REG_0, 1),
        BPF_EXIT_INSN(),
        },
-       .errstr = "invalid BPF_LD_IMM insn",
-       .errstr_unpriv = "R1 pointer comparison",
+       .errstr = "jump into the middle of ldimm64 insn 1",
+       .errstr_unpriv = "jump into the middle of ldimm64 insn 1",
        .result = REJECT,
 },
 {
index 591ca96..b604c57 100644 (file)
@@ -908,8 +908,9 @@ static bool is_metadata_correct(struct pkt *pkt, void *buffer, u64 addr)
        struct xdp_info *meta = data - sizeof(struct xdp_info);
 
        if (meta->count != pkt->pkt_nb) {
-               ksft_print_msg("[%s] expected meta_count [%d], got meta_count [%d]\n",
-                              __func__, pkt->pkt_nb, meta->count);
+               ksft_print_msg("[%s] expected meta_count [%d], got meta_count [%llu]\n",
+                              __func__, pkt->pkt_nb,
+                              (unsigned long long)meta->count);
                return false;
        }
 
@@ -926,11 +927,13 @@ static bool is_frag_valid(struct xsk_umem_info *umem, u64 addr, u32 len, u32 exp
 
        if (addr >= umem->num_frames * umem->frame_size ||
            addr + len > umem->num_frames * umem->frame_size) {
-               ksft_print_msg("Frag invalid addr: %llx len: %u\n", addr, len);
+               ksft_print_msg("Frag invalid addr: %llx len: %u\n",
+                              (unsigned long long)addr, len);
                return false;
        }
        if (!umem->unaligned_mode && addr % umem->frame_size + len > umem->frame_size) {
-               ksft_print_msg("Frag crosses frame boundary addr: %llx len: %u\n", addr, len);
+               ksft_print_msg("Frag crosses frame boundary addr: %llx len: %u\n",
+                              (unsigned long long)addr, len);
                return false;
        }
 
@@ -1029,7 +1032,8 @@ static int complete_pkts(struct xsk_socket_info *xsk, int batch_size)
                        u64 addr = *xsk_ring_cons__comp_addr(&xsk->umem->cq, idx + rcvd - 1);
 
                        ksft_print_msg("[%s] Too many packets completed\n", __func__);
-                       ksft_print_msg("Last completion address: %llx\n", addr);
+                       ksft_print_msg("Last completion address: %llx\n",
+                                      (unsigned long long)addr);
                        return TEST_FAILURE;
                }
 
@@ -1513,8 +1517,9 @@ static int validate_tx_invalid_descs(struct ifobject *ifobject)
        }
 
        if (stats.tx_invalid_descs != ifobject->xsk->pkt_stream->nb_pkts / 2) {
-               ksft_print_msg("[%s] tx_invalid_descs incorrect. Got [%u] expected [%u]\n",
-                              __func__, stats.tx_invalid_descs,
+               ksft_print_msg("[%s] tx_invalid_descs incorrect. Got [%llu] expected [%u]\n",
+                              __func__,
+                              (unsigned long long)stats.tx_invalid_descs,
                               ifobject->xsk->pkt_stream->nb_pkts);
                return TEST_FAILURE;
        }
index cc920c7..4ff10ea 100644 (file)
@@ -45,3 +45,4 @@ mdwe_test
 gup_longterm
 mkdirty
 va_high_addr_switch
+hugetlb_fault_after_madv
index 0161fb4..befab43 100644 (file)
@@ -94,19 +94,19 @@ int init_uffd(void)
 
        uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK | UFFD_USER_MODE_ONLY);
        if (uffd == -1)
-               ksft_exit_fail_msg("uffd syscall failed\n");
+               return uffd;
 
        uffdio_api.api = UFFD_API;
        uffdio_api.features = UFFD_FEATURE_WP_UNPOPULATED | UFFD_FEATURE_WP_ASYNC |
                              UFFD_FEATURE_WP_HUGETLBFS_SHMEM;
        if (ioctl(uffd, UFFDIO_API, &uffdio_api))
-               ksft_exit_fail_msg("UFFDIO_API\n");
+               return -1;
 
        if (!(uffdio_api.api & UFFDIO_REGISTER_MODE_WP) ||
            !(uffdio_api.features & UFFD_FEATURE_WP_UNPOPULATED) ||
            !(uffdio_api.features & UFFD_FEATURE_WP_ASYNC) ||
            !(uffdio_api.features & UFFD_FEATURE_WP_HUGETLBFS_SHMEM))
-               ksft_exit_fail_msg("UFFDIO_API error %llu\n", uffdio_api.api);
+               return -1;
 
        return 0;
 }
@@ -1151,7 +1151,7 @@ int sanity_tests(void)
        /* 9. Memory mapped file */
        fd = open(__FILE__, O_RDONLY);
        if (fd < 0)
-               ksft_exit_fail_msg("%s Memory mapped file\n");
+               ksft_exit_fail_msg("%s Memory mapped file\n", __func__);
 
        ret = stat(__FILE__, &sbuf);
        if (ret < 0)
@@ -1159,7 +1159,7 @@ int sanity_tests(void)
 
        fmem = mmap(NULL, sbuf.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
        if (fmem == MAP_FAILED)
-               ksft_exit_fail_msg("error nomem %ld %s\n", errno, strerror(errno));
+               ksft_exit_fail_msg("error nomem %d %s\n", errno, strerror(errno));
 
        tmp_buf = malloc(sbuf.st_size);
        memcpy(tmp_buf, fmem, sbuf.st_size);
@@ -1189,7 +1189,7 @@ int sanity_tests(void)
 
        fmem = mmap(NULL, buf_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
        if (fmem == MAP_FAILED)
-               ksft_exit_fail_msg("error nomem %ld %s\n", errno, strerror(errno));
+               ksft_exit_fail_msg("error nomem %d %s\n", errno, strerror(errno));
 
        wp_init(fmem, buf_size);
        wp_addr_range(fmem, buf_size);
@@ -1479,6 +1479,10 @@ int main(void)
        struct stat sbuf;
 
        ksft_print_header();
+
+       if (init_uffd())
+               return ksft_exit_pass();
+
        ksft_set_plan(115);
 
        page_size = getpagesize();
@@ -1488,9 +1492,6 @@ int main(void)
        if (pagemap_fd < 0)
                return -EINVAL;
 
-       if (init_uffd())
-               ksft_exit_fail_msg("uffd init failed\n");
-
        /* 1. Sanity testing */
        sanity_tests_sd();
 
@@ -1595,7 +1596,7 @@ int main(void)
 
        fmem = mmap(NULL, sbuf.st_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
        if (fmem == MAP_FAILED)
-               ksft_exit_fail_msg("error nomem %ld %s\n", errno, strerror(errno));
+               ksft_exit_fail_msg("error nomem %d %s\n", errno, strerror(errno));
 
        wp_init(fmem, sbuf.st_size);
        wp_addr_range(fmem, sbuf.st_size);
@@ -1623,7 +1624,7 @@ int main(void)
 
        fmem = mmap(NULL, buf_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
        if (fmem == MAP_FAILED)
-               ksft_exit_fail_msg("error nomem %ld %s\n", errno, strerror(errno));
+               ksft_exit_fail_msg("error nomem %d %s\n", errno, strerror(errno));
 
        wp_init(fmem, buf_size);
        wp_addr_range(fmem, buf_size);
index cc16f6c..0075744 100755 (executable)
@@ -223,9 +223,12 @@ CATEGORY="hugetlb" run_test ./hugepage-mremap
 CATEGORY="hugetlb" run_test ./hugepage-vmemmap
 CATEGORY="hugetlb" run_test ./hugetlb-madvise
 
+nr_hugepages_tmp=$(cat /proc/sys/vm/nr_hugepages)
 # For this test, we need one and just one huge page
 echo 1 > /proc/sys/vm/nr_hugepages
 CATEGORY="hugetlb" run_test ./hugetlb_fault_after_madv
+# Restore the previous number of huge pages, since further tests rely on it
+echo "$nr_hugepages_tmp" > /proc/sys/vm/nr_hugepages
 
 if test_selected "hugetlb"; then
        echo "NOTE: These hugetlb tests provide minimal coverage.  Use"
index 75a2438..3c94f2f 100755 (executable)
@@ -3240,7 +3240,7 @@ fastclose_tests()
        if reset_check_counter "fastclose server test" "MPTcpExtMPFastcloseRx"; then
                test_linkfail=1024 fastclose=server \
                        run_tests $ns1 $ns2 10.0.1.1
-               chk_join_nr 0 0 0
+               chk_join_nr 0 0 0 0 0 0 1
                chk_fclose_nr 1 1 invert
                chk_rst_nr 1 1
        fi
index 5f2b3f6..38be970 100755 (executable)
@@ -859,7 +859,7 @@ kci_test_gretap()
 
 
        run_cmd ip -netns "$testns" addr add dev "$DEV_NS" 10.1.1.100/24
-       run_cmd ip -netns "$testns" link set dev $DEV_NS ups
+       run_cmd ip -netns "$testns" link set dev $DEV_NS up
        run_cmd ip -netns "$testns" link del "$DEV_NS"
 
        # test external mode
index 5b0e93f..01fa816 100644 (file)
@@ -353,11 +353,12 @@ static void test_stream_msg_peek_server(const struct test_opts *opts)
 }
 
 #define SOCK_BUF_SIZE (2 * 1024 * 1024)
-#define MAX_MSG_SIZE (32 * 1024)
+#define MAX_MSG_PAGES 4
 
 static void test_seqpacket_msg_bounds_client(const struct test_opts *opts)
 {
        unsigned long curr_hash;
+       size_t max_msg_size;
        int page_size;
        int msg_count;
        int fd;
@@ -373,7 +374,8 @@ static void test_seqpacket_msg_bounds_client(const struct test_opts *opts)
 
        curr_hash = 0;
        page_size = getpagesize();
-       msg_count = SOCK_BUF_SIZE / MAX_MSG_SIZE;
+       max_msg_size = MAX_MSG_PAGES * page_size;
+       msg_count = SOCK_BUF_SIZE / max_msg_size;
 
        for (int i = 0; i < msg_count; i++) {
                size_t buf_size;
@@ -383,7 +385,7 @@ static void test_seqpacket_msg_bounds_client(const struct test_opts *opts)
                /* Use "small" buffers and "big" buffers. */
                if (i & 1)
                        buf_size = page_size +
-                                       (rand() % (MAX_MSG_SIZE - page_size));
+                                       (rand() % (max_msg_size - page_size));
                else
                        buf_size = 1 + (rand() % page_size);
 
@@ -429,7 +431,6 @@ static void test_seqpacket_msg_bounds_server(const struct test_opts *opts)
        unsigned long remote_hash;
        unsigned long curr_hash;
        int fd;
-       char buf[MAX_MSG_SIZE];
        struct msghdr msg = {0};
        struct iovec iov = {0};
 
@@ -457,8 +458,13 @@ static void test_seqpacket_msg_bounds_server(const struct test_opts *opts)
        control_writeln("SRVREADY");
        /* Wait, until peer sends whole data. */
        control_expectln("SENDDONE");
-       iov.iov_base = buf;
-       iov.iov_len = sizeof(buf);
+       iov.iov_len = MAX_MSG_PAGES * getpagesize();
+       iov.iov_base = malloc(iov.iov_len);
+       if (!iov.iov_base) {
+               perror("malloc");
+               exit(EXIT_FAILURE);
+       }
+
        msg.msg_iov = &iov;
        msg.msg_iovlen = 1;
 
@@ -483,6 +489,7 @@ static void test_seqpacket_msg_bounds_server(const struct test_opts *opts)
                curr_hash += hash_djb2(msg.msg_iov[0].iov_base, recv_size);
        }
 
+       free(iov.iov_base);
        close(fd);
        remote_hash = control_readulong();