Protocol 2.15 (Kernel 5.5) Added the kernel_info and kernel_info.setup_type_max.
============= ============================================================
-.. note::
+ .. note::
The protocol version number should be changed only if the setup header
is changed. There is no need to update the version number if boot_params
or kernel_info are changed. Additionally, it is recommended to use
properties:
rx-internal-delay-ps:
description:
- RGMII Receive Clock Delay defined in pico seconds.This is used for
+ RGMII Receive Clock Delay defined in pico seconds. This is used for
controllers that have configurable RX internal delays. If this
property is present then the MAC applies the RX delay.
tx-internal-delay-ps:
description:
- RGMII Transmit Clock Delay defined in pico seconds.This is used for
+ RGMII Transmit Clock Delay defined in pico seconds. This is used for
controllers that have configurable TX internal delays. If this
property is present then the MAC applies the TX delay.
- qcom,sm8350-ufshc
- qcom,sm8450-ufshc
- qcom,sm8550-ufshc
+ - qcom,sm8650-ufshc
- const: qcom,ufshc
- const: jedec,ufs-2.0
- qcom,sm8350-ufshc
- qcom,sm8450-ufshc
- qcom,sm8550-ufshc
+ - qcom,sm8650-ufshc
then:
properties:
clocks:
F: scripts/get_maintainer.pl
GFS2 FILE SYSTEM
-M: Bob Peterson <rpeterso@redhat.com>
M: Andreas Gruenbacher <agruenba@redhat.com>
L: gfs2@lists.linux.dev
S: Supported
F: drivers/counter/ti-eqep.c
TI ETHERNET SWITCH DRIVER (CPSW)
-R: Grygorii Strashko <grygorii.strashko@ti.com>
+R: Siddharth Vadapalli <s-vadapalli@ti.com>
+R: Ravi Gunasekaran <r-gunasekaran@ti.com>
+R: Roger Quadros <rogerq@kernel.org>
L: linux-omap@vger.kernel.org
L: netdev@vger.kernel.org
S: Maintained
F: drivers/media/i2c/ds90*
F: include/media/i2c/ds90*
+TI ICSSG ETHERNET DRIVER (ICSSG)
+R: MD Danish Anwar <danishanwar@ti.com>
+R: Roger Quadros <rogerq@kernel.org>
+L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+L: netdev@vger.kernel.org
+S: Maintained
+F: Documentation/devicetree/bindings/net/ti,icss*.yaml
+F: drivers/net/ethernet/ti/icssg/*
+
TI J721E CSI2RX DRIVER
M: Jai Luthra <j-luthra@ti.com>
L: linux-media@vger.kernel.org
F: arch/x86/kernel/stacktrace.c
F: arch/x86/kernel/unwind_*.c
+X86 TRUST DOMAIN EXTENSIONS (TDX)
+M: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+R: Dave Hansen <dave.hansen@linux.intel.com>
+L: x86@kernel.org
+L: linux-coco@lists.linux.dev
+S: Supported
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86/tdx
+F: arch/x86/boot/compressed/tdx*
+F: arch/x86/coco/tdx/
+F: arch/x86/include/asm/shared/tdx.h
+F: arch/x86/include/asm/tdx.h
+F: arch/x86/virt/vmx/tdx/
+F: drivers/virt/coco/tdx-guest
+
X86 VDSO
M: Andy Lutomirski <luto@kernel.org>
L: linux-kernel@vger.kernel.org
P: Documentation/filesystems/xfs-maintainer-entry-profile.rst
F: Documentation/ABI/testing/sysfs-fs-xfs
F: Documentation/admin-guide/xfs.rst
-F: Documentation/filesystems/xfs-delayed-logging-design.rst
-F: Documentation/filesystems/xfs-self-describing-metadata.rst
+F: Documentation/filesystems/xfs-*
F: fs/xfs/
F: include/uapi/linux/dqblk_xfs.h
F: include/uapi/linux/fsmap.h
default 8
config ARCH_MMAP_RND_BITS_MAX
- default 24 if 64BIT
- default 17
+ default 18 if 64BIT
+ default 13
config ARCH_MMAP_RND_COMPAT_BITS_MAX
- default 17
+ default 13
# unless you want to implement ACPI on PA-RISC ... ;-)
config PM
#define ELF_HWCAP 0
-/* Masks for stack and mmap randomization */
-#define BRK_RND_MASK (is_32bit_task() ? 0x07ffUL : 0x3ffffUL)
-#define MMAP_RND_MASK (is_32bit_task() ? 0x1fffUL : 0x3ffffUL)
-#define STACK_RND_MASK MMAP_RND_MASK
-
-struct mm_struct;
-extern unsigned long arch_randomize_brk(struct mm_struct *);
-#define arch_randomize_brk arch_randomize_brk
-
+#define STACK_RND_MASK 0x7ff /* 8MB of VA */
#define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1
struct linux_binprm;
#ifndef __ASSEMBLY__
+struct rlimit;
+unsigned long mmap_upper_limit(struct rlimit *rlim_stack);
unsigned long calc_max_stack_size(unsigned long stack_max);
/*
char cpu_name[60], *p;
/* strip PA path from CPU name to not confuse lscpu */
- strlcpy(cpu_name, per_cpu(cpu_data, 0).dev->name, sizeof(cpu_name));
+ strscpy(cpu_name, per_cpu(cpu_data, 0).dev->name, sizeof(cpu_name));
p = strrchr(cpu_name, '[');
if (p)
*(--p) = 0;
* indicating that "current" should be used instead of a passed-in
* value from the exec bprm as done with arch_pick_mmap_layout().
*/
-static unsigned long mmap_upper_limit(struct rlimit *rlim_stack)
+unsigned long mmap_upper_limit(struct rlimit *rlim_stack)
{
unsigned long stack_base;
#include <asm/x86_init.h>
#include <asm/cpufeature.h>
#include <asm/irq_vectors.h>
+#include <asm/xen/hypervisor.h>
+
+#include <xen/xen.h>
#ifdef CONFIG_ACPI_APEI
# include <asm/pgtable_types.h>
if (!cpu_has(c, X86_FEATURE_MWAIT) ||
boot_option_idle_override == IDLE_NOMWAIT)
*cap &= ~(ACPI_PROC_CAP_C_C1_FFH | ACPI_PROC_CAP_C_C2C3_FFH);
+
+ if (xen_initial_domain()) {
+ /*
+ * When Linux is running as Xen dom0, the hypervisor is the
+ * entity in charge of the processor power management, and so
+ * Xen needs to check the OS capabilities reported in the
+ * processor capabilities buffer matches what the hypervisor
+ * driver supports.
+ */
+ xen_sanitize_proc_cap_bits(cap);
+ }
}
static inline bool acpi_has_cpu_in_madt(void)
enum xen_lazy_mode xen_get_lazy_mode(void);
+#if defined(CONFIG_XEN_DOM0) && defined(CONFIG_ACPI)
+void xen_sanitize_proc_cap_bits(uint32_t *buf);
+#else
+static inline void xen_sanitize_proc_cap_bits(uint32_t *buf)
+{
+ BUG();
+}
+#endif
+
#endif /* _ASM_X86_XEN_HYPERVISOR_H */
#ifdef CONFIG_X86_LOCAL_APIC
static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
+static bool has_lapic_cpus __initdata;
static bool acpi_support_online_capable;
#endif
if (!acpi_is_processor_usable(processor->lapic_flags))
return 0;
+ /*
+ * According to https://uefi.org/specs/ACPI/6.5/05_ACPI_Software_Programming_Model.html#processor-local-x2apic-structure
+ * when MADT provides both valid LAPIC and x2APIC entries, the APIC ID
+ * in x2APIC must be equal or greater than 0xff.
+ */
+ if (has_lapic_cpus && apic_id < 0xff)
+ return 0;
+
/*
* We need to register disabled CPU as well to permit
* counting disabled CPUs. This allows us to size
static int __init acpi_parse_madt_lapic_entries(void)
{
- int count;
- int x2count = 0;
- int ret;
- struct acpi_subtable_proc madt_proc[2];
+ int count, x2count = 0;
if (!boot_cpu_has(X86_FEATURE_APIC))
return -ENODEV;
acpi_parse_sapic, MAX_LOCAL_APIC);
if (!count) {
- memset(madt_proc, 0, sizeof(madt_proc));
- madt_proc[0].id = ACPI_MADT_TYPE_LOCAL_APIC;
- madt_proc[0].handler = acpi_parse_lapic;
- madt_proc[1].id = ACPI_MADT_TYPE_LOCAL_X2APIC;
- madt_proc[1].handler = acpi_parse_x2apic;
- ret = acpi_table_parse_entries_array(ACPI_SIG_MADT,
- sizeof(struct acpi_table_madt),
- madt_proc, ARRAY_SIZE(madt_proc), MAX_LOCAL_APIC);
- if (ret < 0) {
- pr_err("Error parsing LAPIC/X2APIC entries\n");
- return ret;
- }
-
- count = madt_proc[0].count;
- x2count = madt_proc[1].count;
+ count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC,
+ acpi_parse_lapic, MAX_LOCAL_APIC);
+ has_lapic_cpus = count > 0;
+ x2count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC,
+ acpi_parse_x2apic, MAX_LOCAL_APIC);
}
if (!count && !x2count) {
pr_err("No LAPIC entries present\n");
frame = get_sigframe(ksig, regs, sizeof(struct rt_sigframe), &fp);
uc_flags = frame_uc_flags(regs);
- if (setup_signal_shadow_stack(ksig))
- return -EFAULT;
-
if (!user_access_begin(frame, sizeof(*frame)))
return -EFAULT;
return -EFAULT;
}
+ if (setup_signal_shadow_stack(ksig))
+ return -EFAULT;
+
/* Set up registers for signal handler */
regs->di = ksig->sig;
/* In case the signal handler was declared without prototypes */
};
struct request *rq;
- if (unlikely(bio_queue_enter(bio)))
- return NULL;
-
if (blk_mq_attempt_bio_merge(q, bio, nsegs))
- goto queue_exit;
+ return NULL;
rq_qos_throttle(q, bio);
rq_qos_cleanup(q, bio);
if (bio->bi_opf & REQ_NOWAIT)
bio_wouldblock_error(bio);
-queue_exit:
- blk_queue_exit(q);
return NULL;
}
-static inline struct request *blk_mq_get_cached_request(struct request_queue *q,
- struct blk_plug *plug, struct bio **bio, unsigned int nsegs)
+/* return true if this @rq can be used for @bio */
+static bool blk_mq_can_use_cached_rq(struct request *rq, struct blk_plug *plug,
+ struct bio *bio)
{
- struct request *rq;
- enum hctx_type type, hctx_type;
+ enum hctx_type type = blk_mq_get_hctx_type(bio->bi_opf);
+ enum hctx_type hctx_type = rq->mq_hctx->type;
- if (!plug)
- return NULL;
- rq = rq_list_peek(&plug->cached_rq);
- if (!rq || rq->q != q)
- return NULL;
+ WARN_ON_ONCE(rq_list_peek(&plug->cached_rq) != rq);
- if (blk_mq_attempt_bio_merge(q, *bio, nsegs)) {
- *bio = NULL;
- return NULL;
- }
-
- type = blk_mq_get_hctx_type((*bio)->bi_opf);
- hctx_type = rq->mq_hctx->type;
if (type != hctx_type &&
!(type == HCTX_TYPE_READ && hctx_type == HCTX_TYPE_DEFAULT))
- return NULL;
- if (op_is_flush(rq->cmd_flags) != op_is_flush((*bio)->bi_opf))
- return NULL;
+ return false;
+ if (op_is_flush(rq->cmd_flags) != op_is_flush(bio->bi_opf))
+ return false;
/*
* If any qos ->throttle() end up blocking, we will have flushed the
* before we throttle.
*/
plug->cached_rq = rq_list_next(rq);
- rq_qos_throttle(q, *bio);
+ rq_qos_throttle(rq->q, bio);
blk_mq_rq_time_init(rq, 0);
- rq->cmd_flags = (*bio)->bi_opf;
+ rq->cmd_flags = bio->bi_opf;
INIT_LIST_HEAD(&rq->queuelist);
- return rq;
+ return true;
}
static void bio_set_ioprio(struct bio *bio)
struct blk_plug *plug = blk_mq_plug(bio);
const int is_sync = op_is_sync(bio->bi_opf);
struct blk_mq_hw_ctx *hctx;
- struct request *rq;
+ struct request *rq = NULL;
unsigned int nr_segs = 1;
blk_status_t ret;
return;
}
- if (!bio_integrity_prep(bio))
- return;
-
bio_set_ioprio(bio);
- rq = blk_mq_get_cached_request(q, plug, &bio, nr_segs);
- if (!rq) {
- if (!bio)
+ if (plug) {
+ rq = rq_list_peek(&plug->cached_rq);
+ if (rq && rq->q != q)
+ rq = NULL;
+ }
+ if (rq) {
+ if (!bio_integrity_prep(bio))
return;
- rq = blk_mq_get_new_requests(q, plug, bio, nr_segs);
- if (unlikely(!rq))
+ if (blk_mq_attempt_bio_merge(q, bio, nr_segs))
return;
+ if (blk_mq_can_use_cached_rq(rq, plug, bio))
+ goto done;
+ percpu_ref_get(&q->q_usage_counter);
+ } else {
+ if (unlikely(bio_queue_enter(bio)))
+ return;
+ if (!bio_integrity_prep(bio))
+ goto fail;
+ }
+
+ rq = blk_mq_get_new_requests(q, plug, bio, nr_segs);
+ if (unlikely(!rq)) {
+fail:
+ blk_queue_exit(q);
+ return;
}
+done:
trace_block_getrq(bio);
rq_qos_track(q, rq, bio);
{
int ret;
- ivpu_dbg(vdev, RPM, "rpm_get_if_active count %d\n",
- atomic_read(&vdev->drm.dev->power.usage_count));
-
ret = pm_runtime_get_if_active(vdev->drm.dev, false);
drm_WARN_ON(&vdev->drm, ret < 0);
config FW_CFG_SYSFS
tristate "QEMU fw_cfg device support in sysfs"
- depends on SYSFS && (ARM || ARM64 || PARISC || PPC_PMAC || SPARC || X86)
+ depends on SYSFS && (ARM || ARM64 || PARISC || PPC_PMAC || RISCV || SPARC || X86)
depends on HAS_IOPORT_MAP
default n
help
/* arch-specific ctrl & data register offsets are not available in ACPI, DT */
#if !(defined(FW_CFG_CTRL_OFF) && defined(FW_CFG_DATA_OFF))
-# if (defined(CONFIG_ARM) || defined(CONFIG_ARM64))
+# if (defined(CONFIG_ARM) || defined(CONFIG_ARM64) || defined(CONFIG_RISCV))
# define FW_CFG_CTRL_OFF 0x08
# define FW_CFG_DATA_OFF 0x00
# define FW_CFG_DMA_OFF 0x10
extern int amdgpu_seamless;
extern int amdgpu_user_partt_mode;
+extern int amdgpu_agp;
#define AMDGPU_VM_MAX_NUM_CTX 4096
#define AMDGPU_SG_THRESHOLD (256*1024*1024)
}
for (i = 0; i < p->nchunks; i++) {
- struct drm_amdgpu_cs_chunk __user **chunk_ptr = NULL;
+ struct drm_amdgpu_cs_chunk __user *chunk_ptr = NULL;
struct drm_amdgpu_cs_chunk user_chunk;
uint32_t __user *cdata;
int amdgpu_umsch_mm;
int amdgpu_seamless = -1; /* auto */
uint amdgpu_debug_mask;
+int amdgpu_agp = -1; /* auto */
static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work);
MODULE_PARM_DESC(debug_mask, "debug options for amdgpu, disabled by default");
module_param_named(debug_mask, amdgpu_debug_mask, uint, 0444);
+/**
+ * DOC: agp (int)
+ * Enable the AGP aperture. This provides an aperture in the GPU's internal
+ * address space for direct access to system memory. Note that these accesses
+ * are non-snooped, so they are only used for access to uncached memory.
+ */
+MODULE_PARM_DESC(agp, "AGP (-1 = auto (default), 0 = disable, 1 = enable)");
+module_param_named(agp, amdgpu_agp, int, 0444);
+
/* These devices are not supported by amdgpu.
* They are supported by the mach64, r128, radeon drivers
*/
topology->nodes[i].num_links = (requires_reflection && topology->nodes[i].num_links) ?
topology->nodes[i].num_links : node_num_links;
}
+ /* popluate the connected port num info if supported and available */
+ if (ta_port_num_support && topology->nodes[i].num_links) {
+ memcpy(topology->nodes[i].port_num, link_extend_info_output->nodes[i].port_num,
+ sizeof(struct xgmi_connected_port_num) * TA_XGMI__MAX_PORT_NUM);
+ }
/* reflect the topology information for bi-directionality */
if (requires_reflection && topology->nodes[i].num_hops)
uint8_t is_sharing_enabled;
enum ta_xgmi_assigned_sdma_engine sdma_engine;
uint8_t num_links;
+ struct xgmi_connected_port_num port_num[TA_XGMI__MAX_PORT_NUM];
};
struct psp_xgmi_topology_info {
}
if (block_obj->hw_ops->query_ras_error_count)
- block_obj->hw_ops->query_ras_error_count(adev, &err_data);
+ block_obj->hw_ops->query_ras_error_count(adev, err_data);
if ((info->head.block == AMDGPU_RAS_BLOCK__SDMA) ||
(info->head.block == AMDGPU_RAS_BLOCK__GFX) ||
* amdgpu_uvd_entity_init - init entity
*
* @adev: amdgpu_device pointer
+ * @ring: amdgpu_ring pointer to check
*
* Initialize the entity used for handle management in the kernel driver.
*/
* amdgpu_vce_entity_init - init entity
*
* @adev: amdgpu_device pointer
+ * @ring: amdgpu_ring pointer to check
*
* Initialize the entity used for handle management in the kernel driver.
*/
amdgpu_gmc_set_agp_default(adev, mc);
amdgpu_gmc_vram_location(adev, &adev->gmc, base);
amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_BEST_FIT);
- if (!amdgpu_sriov_vf(adev))
+ if (!amdgpu_sriov_vf(adev) && (amdgpu_agp == 1))
amdgpu_gmc_agp_location(adev, mc);
/* base offset of vram pages */
amdgpu_gmc_set_agp_default(adev, mc);
amdgpu_gmc_vram_location(adev, &adev->gmc, base);
amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_HIGH);
- if (!amdgpu_sriov_vf(adev) ||
- (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(11, 5, 0)))
+ if (!amdgpu_sriov_vf(adev) &&
+ (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(11, 5, 0)) &&
+ (amdgpu_agp == 1))
amdgpu_gmc_agp_location(adev, mc);
/* base offset of vram pages */
} else {
amdgpu_gmc_vram_location(adev, mc, base);
amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_BEST_FIT);
- if (!amdgpu_sriov_vf(adev))
+ if (!amdgpu_sriov_vf(adev) && (amdgpu_agp == 1))
amdgpu_gmc_agp_location(adev, mc);
}
/* base offset of vram pages */
if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3))
amdgpu_gmc_sysfs_fini(adev);
- adev->gmc.num_mem_partitions = 0;
- kfree(adev->gmc.mem_partitions);
amdgpu_gmc_ras_fini(adev);
amdgpu_gem_force_release(adev);
amdgpu_bo_free_kernel(&adev->gmc.pdb0_bo, NULL, &adev->gmc.ptr_pdb0);
amdgpu_bo_fini(adev);
+ adev->gmc.num_mem_partitions = 0;
+ kfree(adev->gmc.mem_partitions);
+
return 0;
}
uint64_t value;
int i;
+ if (amdgpu_sriov_vf(adev))
+ return;
+
inst_mask = adev->aid_mask;
for_each_inst(i, inst_mask) {
/* Program the AGP BAR */
WREG32_SOC15(MMHUB, i, regMC_VM_AGP_TOP,
adev->gmc.agp_end >> 24);
- if (amdgpu_sriov_vf(adev))
- return;
-
/* Program the system aperture low logical page number. */
WREG32_SOC15(MMHUB, i, regMC_VM_SYSTEM_APERTURE_LOW_ADDR,
min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
struct dmub_srv_create_params create_params;
struct dmub_srv_region_params region_params;
struct dmub_srv_region_info region_info;
- struct dmub_srv_fb_params fb_params;
+ struct dmub_srv_memory_params memory_params;
struct dmub_srv_fb_info *fb_info;
struct dmub_srv *dmub_srv;
const struct dmcub_firmware_header_v1_0 *hdr;
adev->dm.dmub_fw->data +
le32_to_cpu(hdr->header.ucode_array_offset_bytes) +
PSP_HEADER_BYTES;
+ region_params.is_mailbox_in_inbox = false;
status = dmub_srv_calc_region_info(dmub_srv, ®ion_params,
®ion_info);
return r;
/* Rebase the regions on the framebuffer address. */
- memset(&fb_params, 0, sizeof(fb_params));
- fb_params.cpu_addr = adev->dm.dmub_bo_cpu_addr;
- fb_params.gpu_addr = adev->dm.dmub_bo_gpu_addr;
- fb_params.region_info = ®ion_info;
+ memset(&memory_params, 0, sizeof(memory_params));
+ memory_params.cpu_fb_addr = adev->dm.dmub_bo_cpu_addr;
+ memory_params.gpu_fb_addr = adev->dm.dmub_bo_gpu_addr;
+ memory_params.region_info = ®ion_info;
adev->dm.dmub_fb_info =
kzalloc(sizeof(*adev->dm.dmub_fb_info), GFP_KERNEL);
return -ENOMEM;
}
- status = dmub_srv_calc_fb_info(dmub_srv, &fb_params, fb_info);
+ status = dmub_srv_calc_mem_info(dmub_srv, &memory_params, fb_info);
if (status != DMUB_STATUS_OK) {
DRM_ERROR("Error calculating DMUB FB info: %d\n", status);
return -EINVAL;
int i;
int result = -EIO;
+ if (!ddc_service->ddc_pin || !ddc_service->ddc_pin->hw_info.hw_supported)
+ return result;
+
cmd.payloads = kcalloc(num, sizeof(struct i2c_payload), GFP_KERNEL);
if (!cmd.payloads)
struct drm_plane *other;
struct drm_plane_state *old_other_state, *new_other_state;
struct drm_crtc_state *new_crtc_state;
+ struct amdgpu_device *adev = drm_to_adev(plane->dev);
int i;
/*
- * TODO: Remove this hack once the checks below are sufficient
- * enough to determine when we need to reset all the planes on
- * the stream.
+ * TODO: Remove this hack for all asics once it proves that the
+ * fast updates works fine on DCN3.2+.
*/
- if (state->allow_modeset)
+ if (adev->ip_versions[DCE_HWIP][0] < IP_VERSION(3, 2, 0) && state->allow_modeset)
return true;
/* Exit early if we know that we're adding or removing the plane. */
struct amdgpu_dm_connector *aconnector = link->priv;
- if (!aconnector) {
- drm_dbg_dp(aconnector->base.dev,
- "Failed to find connector for link!\n");
+ if (!aconnector)
return false;
- }
return drm_dp_dpcd_read(&aconnector->dm_dp_aux.aux, address, data,
size) == size;
unsigned int upper_link_bw_in_kbps = 0, down_link_bw_in_kbps = 0;
unsigned int max_compressed_bw_in_kbps = 0;
struct dc_dsc_bw_range bw_range = {0};
- struct drm_dp_mst_topology_mgr *mst_mgr;
+ uint16_t full_pbn = aconnector->mst_output_port->full_pbn;
/*
- * check if the mode could be supported if DSC pass-through is supported
- * AND check if there enough bandwidth available to support the mode
- * with DSC enabled.
+ * Consider the case with the depth of the mst topology tree is equal or less than 2
+ * A. When dsc bitstream can be transmitted along the entire path
+ * 1. dsc is possible between source and branch/leaf device (common dsc params is possible), AND
+ * 2. dsc passthrough supported at MST branch, or
+ * 3. dsc decoding supported at leaf MST device
+ * Use maximum dsc compression as bw constraint
+ * B. When dsc bitstream cannot be transmitted along the entire path
+ * Use native bw as bw constraint
*/
if (is_dsc_common_config_possible(stream, &bw_range) &&
- aconnector->mst_output_port->passthrough_aux) {
- mst_mgr = aconnector->mst_output_port->mgr;
- mutex_lock(&mst_mgr->lock);
-
+ (aconnector->mst_output_port->passthrough_aux ||
+ aconnector->dsc_aux == &aconnector->mst_output_port->aux)) {
cur_link_settings = stream->link->verified_link_cap;
upper_link_bw_in_kbps = dc_link_bandwidth_kbps(aconnector->dc_link,
- &cur_link_settings
- );
- down_link_bw_in_kbps = kbps_from_pbn(aconnector->mst_output_port->full_pbn);
+ &cur_link_settings);
+ down_link_bw_in_kbps = kbps_from_pbn(full_pbn);
/* pick the bottleneck */
end_to_end_bw_in_kbps = min(upper_link_bw_in_kbps,
down_link_bw_in_kbps);
- mutex_unlock(&mst_mgr->lock);
-
/*
* use the maximum dsc compression bandwidth as the required
* bandwidth for the mode
/* check if mode could be supported within full_pbn */
bpp = convert_dc_color_depth_into_bpc(stream->timing.display_color_depth) * 3;
pbn = drm_dp_calc_pbn_mode(stream->timing.pix_clk_100hz / 10, bpp, false);
-
- if (pbn > aconnector->mst_output_port->full_pbn)
+ if (pbn > full_pbn)
return DC_FAIL_BANDWIDTH_VALIDATE;
}
if (dc->config.disable_ips == DMUB_IPS_ENABLE ||
dc->config.disable_ips == DMUB_IPS_DISABLE_DYNAMIC) {
- val |= DMUB_IPS1_ALLOW_MASK;
- val |= DMUB_IPS2_ALLOW_MASK;
- } else if (dc->config.disable_ips == DMUB_IPS_DISABLE_IPS1) {
val = val & ~DMUB_IPS1_ALLOW_MASK;
val = val & ~DMUB_IPS2_ALLOW_MASK;
- } else if (dc->config.disable_ips == DMUB_IPS_DISABLE_IPS2) {
- val |= DMUB_IPS1_ALLOW_MASK;
- val = val & ~DMUB_IPS2_ALLOW_MASK;
- } else if (dc->config.disable_ips == DMUB_IPS_DISABLE_IPS2_Z10) {
+ } else if (dc->config.disable_ips == DMUB_IPS_DISABLE_IPS1) {
val |= DMUB_IPS1_ALLOW_MASK;
val |= DMUB_IPS2_ALLOW_MASK;
+ } else if (dc->config.disable_ips == DMUB_IPS_DISABLE_IPS2) {
+ val = val & ~DMUB_IPS1_ALLOW_MASK;
+ val |= DMUB_IPS2_ALLOW_MASK;
+ } else if (dc->config.disable_ips == DMUB_IPS_DISABLE_IPS2_Z10) {
+ val = val & ~DMUB_IPS1_ALLOW_MASK;
+ val = val & ~DMUB_IPS2_ALLOW_MASK;
}
if (!allow_idle) {
- val = val & ~DMUB_IPS1_ALLOW_MASK;
- val = val & ~DMUB_IPS2_ALLOW_MASK;
+ val |= DMUB_IPS1_ALLOW_MASK;
+ val |= DMUB_IPS2_ALLOW_MASK;
}
dcn35_smu_write_ips_scratch(clk_mgr, val);
struct pipe_ctx *otg_master = resource_get_otg_master_for_stream(&context->res_ctx,
context->streams[i]);
- if (otg_master->stream->test_pattern.type != DP_TEST_PATTERN_VIDEO_MODE)
+ if (otg_master && otg_master->stream->test_pattern.type != DP_TEST_PATTERN_VIDEO_MODE)
resource_build_test_pattern_params(&context->res_ctx, otg_master);
}
}
if (dc->hwss.get_idle_state)
idle_state = dc->hwss.get_idle_state(dc);
- if ((idle_state & DMUB_IPS1_ALLOW_MASK) ||
- (idle_state & DMUB_IPS2_ALLOW_MASK))
+ if (!(idle_state & DMUB_IPS1_ALLOW_MASK) ||
+ !(idle_state & DMUB_IPS2_ALLOW_MASK))
return true;
return false;
sec_next = sec_pipe->next_odm_pipe;
sec_prev = sec_pipe->prev_odm_pipe;
+ if (pri_pipe == NULL)
+ return false;
+
*sec_pipe = *pri_pipe;
sec_pipe->top_pipe = sec_top;
allow_state = dc->hwss.get_idle_state(dc);
dc->hwss.set_idle_state(dc, false);
- if (allow_state & DMUB_IPS2_ALLOW_MASK) {
+ if (!(allow_state & DMUB_IPS2_ALLOW_MASK)) {
// Wait for evaluation time
udelay(dc->debug.ips2_eval_delay_us);
commit_state = dc->hwss.get_idle_state(dc);
- if (commit_state & DMUB_IPS2_COMMIT_MASK) {
+ if (!(commit_state & DMUB_IPS2_COMMIT_MASK)) {
// Tell PMFW to exit low power state
dc->clk_mgr->funcs->exit_low_power_state(dc->clk_mgr);
for (i = 0; i < max_num_polls; ++i) {
commit_state = dc->hwss.get_idle_state(dc);
- if (!(commit_state & DMUB_IPS2_COMMIT_MASK))
+ if (commit_state & DMUB_IPS2_COMMIT_MASK)
break;
udelay(1);
}
dc_dmub_srv_notify_idle(dc, false);
- if (allow_state & DMUB_IPS1_ALLOW_MASK) {
+ if (!(allow_state & DMUB_IPS1_ALLOW_MASK)) {
for (i = 0; i < max_num_polls; ++i) {
commit_state = dc->hwss.get_idle_state(dc);
- if (!(commit_state & DMUB_IPS1_COMMIT_MASK))
+ if (commit_state & DMUB_IPS1_COMMIT_MASK)
break;
udelay(1);
unsigned int disable_fams;
unsigned int skip_avmute;
unsigned int mst_start_top_delay;
+ unsigned int remove_sink_ext_caps;
};
struct dc_edid_caps {
/* invalid mode ! */
ASSERT_CRITICAL(false);
}
-
- REG_UPDATE(DIG_FE_CLK_CNTL, DIG_FE_CLK_EN, 1);
- REG_UPDATE(DIG_FE_EN_CNTL, DIG_FE_ENABLE, 1);
- } else {
- REG_UPDATE(DIG_FE_EN_CNTL, DIG_FE_ENABLE, 0);
- REG_UPDATE(DIG_FE_CLK_CNTL, DIG_FE_CLK_EN, 0);
}
}
struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_ENABLE, 0);
+ REG_UPDATE(DIG_FE_EN_CNTL, DIG_FE_ENABLE, 0);
+ REG_UPDATE(DIG_FE_CLK_CNTL, DIG_FE_CLK_EN, 0);
}
static void enc35_enable_fifo(struct stream_encoder *enc)
struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_READ_START_LEVEL, 0x7);
+ REG_UPDATE(DIG_FE_CLK_CNTL, DIG_FE_CLK_EN, 1);
+ REG_UPDATE(DIG_FE_EN_CNTL, DIG_FE_ENABLE, 1);
enc35_reset_fifo(enc, true);
enc35_reset_fifo(enc, false);
if (sink->edid_caps.panel_patch.skip_scdc_overwrite)
link->ctx->dc->debug.hdmi20_disable = true;
+ if (sink->edid_caps.panel_patch.remove_sink_ext_caps)
+ link->dpcd_sink_ext_caps.raw = 0;
+
if (dc_is_hdmi_signal(link->connector_signal))
read_scdc_caps(link->ddc, link->local_sink);
uint32_t vbios_size;
const uint8_t *fw_inst_const;
const uint8_t *fw_bss_data;
+ bool is_mailbox_in_inbox;
};
/**
*/
struct dmub_srv_region_info {
uint32_t fb_size;
+ uint32_t inbox_size;
uint8_t num_regions;
struct dmub_region regions[DMUB_WINDOW_TOTAL];
};
/**
- * struct dmub_srv_fb_params - parameters used for driver fb setup
+ * struct dmub_srv_memory_params - parameters used for driver fb setup
* @region_info: region info calculated by dmub service
- * @cpu_addr: base cpu address for the framebuffer
- * @gpu_addr: base gpu virtual address for the framebuffer
+ * @cpu_fb_addr: base cpu address for the framebuffer
+ * @cpu_inbox_addr: base cpu address for the gart
+ * @gpu_fb_addr: base gpu virtual address for the framebuffer
+ * @gpu_inbox_addr: base gpu virtual address for the gart
*/
-struct dmub_srv_fb_params {
+struct dmub_srv_memory_params {
const struct dmub_srv_region_info *region_info;
- void *cpu_addr;
- uint64_t gpu_addr;
+ void *cpu_fb_addr;
+ void *cpu_inbox_addr;
+ uint64_t gpu_fb_addr;
+ uint64_t gpu_inbox_addr;
};
/**
* DMUB_STATUS_OK - success
* DMUB_STATUS_INVALID - unspecified error
*/
-enum dmub_status dmub_srv_calc_fb_info(struct dmub_srv *dmub,
- const struct dmub_srv_fb_params *params,
+enum dmub_status dmub_srv_calc_mem_info(struct dmub_srv *dmub,
+ const struct dmub_srv_memory_params *params,
struct dmub_srv_fb_info *out);
/**
uint32_t fw_state_size = DMUB_FW_STATE_SIZE;
uint32_t trace_buffer_size = DMUB_TRACE_BUFFER_SIZE;
uint32_t scratch_mem_size = DMUB_SCRATCH_MEM_SIZE;
-
+ uint32_t previous_top = 0;
if (!dmub->sw_init)
return DMUB_STATUS_INVALID;
bios->base = dmub_align(stack->top, 256);
bios->top = bios->base + params->vbios_size;
- mail->base = dmub_align(bios->top, 256);
- mail->top = mail->base + DMUB_MAILBOX_SIZE;
+ if (params->is_mailbox_in_inbox) {
+ mail->base = 0;
+ mail->top = mail->base + DMUB_MAILBOX_SIZE;
+ previous_top = bios->top;
+ } else {
+ mail->base = dmub_align(bios->top, 256);
+ mail->top = mail->base + DMUB_MAILBOX_SIZE;
+ previous_top = mail->top;
+ }
fw_info = dmub_get_fw_meta_info(params);
dmub->fw_version = fw_info->fw_version;
}
- trace_buff->base = dmub_align(mail->top, 256);
+ trace_buff->base = dmub_align(previous_top, 256);
trace_buff->top = trace_buff->base + dmub_align(trace_buffer_size, 64);
fw_state->base = dmub_align(trace_buff->top, 256);
out->fb_size = dmub_align(scratch_mem->top, 4096);
+ if (params->is_mailbox_in_inbox)
+ out->inbox_size = dmub_align(mail->top, 4096);
+
return DMUB_STATUS_OK;
}
-enum dmub_status dmub_srv_calc_fb_info(struct dmub_srv *dmub,
- const struct dmub_srv_fb_params *params,
+enum dmub_status dmub_srv_calc_mem_info(struct dmub_srv *dmub,
+ const struct dmub_srv_memory_params *params,
struct dmub_srv_fb_info *out)
{
uint8_t *cpu_base;
if (params->region_info->num_regions != DMUB_NUM_WINDOWS)
return DMUB_STATUS_INVALID;
- cpu_base = (uint8_t *)params->cpu_addr;
- gpu_base = params->gpu_addr;
+ cpu_base = (uint8_t *)params->cpu_fb_addr;
+ gpu_base = params->gpu_fb_addr;
for (i = 0; i < DMUB_NUM_WINDOWS; ++i) {
const struct dmub_region *reg =
out->fb[i].cpu_addr = cpu_base + reg->base;
out->fb[i].gpu_addr = gpu_base + reg->base;
+
+ if (i == DMUB_WINDOW_4_MAILBOX && params->cpu_inbox_addr != 0) {
+ out->fb[i].cpu_addr = (uint8_t *)params->cpu_inbox_addr + reg->base;
+ out->fb[i].gpu_addr = params->gpu_inbox_addr + reg->base;
+ }
+
out->fb[i].size = reg->top - reg->base;
}
return DMUB_STATUS_INVALID;
if (dmub->hw_funcs.get_inbox1_rptr && dmub->hw_funcs.get_inbox1_wptr) {
- dmub->inbox1_rb.rptr = dmub->hw_funcs.get_inbox1_rptr(dmub);
- dmub->inbox1_rb.wrpt = dmub->hw_funcs.get_inbox1_wptr(dmub);
- dmub->inbox1_last_wptr = dmub->inbox1_rb.wrpt;
+ uint32_t rptr = dmub->hw_funcs.get_inbox1_rptr(dmub);
+ uint32_t wptr = dmub->hw_funcs.get_inbox1_wptr(dmub);
+
+ if (rptr > dmub->inbox1_rb.capacity || wptr > dmub->inbox1_rb.capacity) {
+ return DMUB_STATUS_HW_FAILURE;
+ } else {
+ dmub->inbox1_rb.rptr = rptr;
+ dmub->inbox1_rb.wrpt = wptr;
+ dmub->inbox1_last_wptr = dmub->inbox1_rb.wrpt;
+ }
}
return DMUB_STATUS_OK;
if (!dmub->hw_init)
return DMUB_STATUS_INVALID;
+ if (dmub->inbox1_rb.rptr > dmub->inbox1_rb.capacity ||
+ dmub->inbox1_rb.wrpt > dmub->inbox1_rb.capacity) {
+ return DMUB_STATUS_HW_FAILURE;
+ }
+
if (dmub_rb_push_front(&dmub->inbox1_rb, cmd))
return DMUB_STATUS_OK;
VOLTAGE_GUARDBAND_COUNT
} GFX_GUARDBAND_e;
-#define SMU_METRICS_TABLE_VERSION 0x8
+#define SMU_METRICS_TABLE_VERSION 0x9
typedef struct __attribute__((packed, aligned(4))) {
uint32_t AccumulationCounter;
//XGMI Data tranfser size
uint64_t XgmiReadDataSizeAcc[8];//in KByte
uint64_t XgmiWriteDataSizeAcc[8];//in KByte
+
+ //PCIE BW Data and error count
+ uint32_t PcieBandwidth[4];
+ uint32_t PCIeL0ToRecoveryCountAcc; // The Pcie counter itself is accumulated
+ uint32_t PCIenReplayAAcc; // The Pcie counter itself is accumulated
+ uint32_t PCIenReplayARolloverCountAcc; // The Pcie counter itself is accumulated
+ uint32_t PCIeNAKSentCountAcc; // The Pcie counter itself is accumulated
+ uint32_t PCIeNAKReceivedCountAcc; // The Pcie counter itself is accumulated
} MetricsTable_t;
#define SMU_VF_METRICS_TABLE_VERSION 0x3
static int smu_v13_0_6_notify_unload(struct smu_context *smu)
{
- if (smu->smc_fw_version <= 0x553500)
+ if (amdgpu_in_reset(smu->adev))
return 0;
dev_dbg(smu->adev->dev, "Notify PMFW about driver unload");
smu_v13_0_6_get_current_pcie_link_speed(smu);
gpu_metrics->pcie_bandwidth_acc =
SMUQ10_ROUND(metrics->PcieBandwidthAcc[0]);
+ gpu_metrics->pcie_bandwidth_inst =
+ SMUQ10_ROUND(metrics->PcieBandwidth[0]);
+ gpu_metrics->pcie_l0_to_recov_count_acc =
+ metrics->PCIeL0ToRecoveryCountAcc;
+ gpu_metrics->pcie_replay_count_acc =
+ metrics->PCIenReplayAAcc;
+ gpu_metrics->pcie_replay_rover_count_acc =
+ metrics->PCIenReplayARolloverCountAcc;
}
gpu_metrics->system_clock_counter = ktime_get_boottime_ns();
certifi==2023.7.22
charset-normalizer==3.2.0
idna==3.4
-pip==23.2.1
+pip==23.3
python-gitlab==3.15.0
requests==2.31.0
requests-toolbelt==1.0.0
ruamel.yaml.clib==0.2.7
setuptools==68.0.0
tenacity==8.2.3
-urllib3==2.0.4
-wheel==0.41.1
\ No newline at end of file
+urllib3==2.0.7
+wheel==0.41.1
DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "IdeaPad Duet 3 10IGL5"),
},
.driver_data = (void *)&lcd1200x1920_rightside_up,
+ }, { /* Lenovo Legion Go 8APU1 */
+ .matches = {
+ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+ DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "Legion Go 8APU1"),
+ },
+ .driver_data = (void *)&lcd1600x2560_leftside_up,
}, { /* Lenovo Yoga Book X90F / X90L */
.matches = {
DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Intel Corporation"),
int index_nr;
spinlock_t refs_lock;
- spinlock_t list_lock;
+ rwlock_t list_lock;
int *refs;
struct list_head ntfy;
int types_nr, int index_nr, struct nvkm_event *event)
{
spin_lock_init(&event->refs_lock);
- spin_lock_init(&event->list_lock);
+ rwlock_init(&event->list_lock);
return __nvkm_event_init(func, subdev, types_nr, index_nr, event);
}
if (nouveau_modeset != 2) {
ret = nvif_disp_ctor(&drm->client.device, "kmsDisp", 0, &disp->disp);
+ /* no display hw */
+ if (ret == -ENODEV) {
+ ret = 0;
+ goto disp_create_err;
+ }
if (!ret && (disp->disp.outp_mask || drm->vbios.dcb.entries)) {
nouveau_display_create_properties(dev);
static void
nvkm_event_ntfy_remove(struct nvkm_event_ntfy *ntfy)
{
- spin_lock_irq(&ntfy->event->list_lock);
+ write_lock_irq(&ntfy->event->list_lock);
list_del_init(&ntfy->head);
- spin_unlock_irq(&ntfy->event->list_lock);
+ write_unlock_irq(&ntfy->event->list_lock);
}
static void
nvkm_event_ntfy_insert(struct nvkm_event_ntfy *ntfy)
{
- spin_lock_irq(&ntfy->event->list_lock);
+ write_lock_irq(&ntfy->event->list_lock);
list_add_tail(&ntfy->head, &ntfy->event->ntfy);
- spin_unlock_irq(&ntfy->event->list_lock);
+ write_unlock_irq(&ntfy->event->list_lock);
}
static void
return;
nvkm_trace(event->subdev, "event: ntfy %08x on %d\n", bits, id);
- spin_lock_irqsave(&event->list_lock, flags);
+ read_lock_irqsave(&event->list_lock, flags);
list_for_each_entry_safe(ntfy, ntmp, &event->ntfy, head) {
if (ntfy->id == id && ntfy->bits & bits) {
}
}
- spin_unlock_irqrestore(&event->list_lock, flags);
+ read_unlock_irqrestore(&event->list_lock, flags);
}
void
struct nvfw_gsp_rpc *rpc;
rpc = r535_gsp_cmdq_get(gsp, ALIGN(sizeof(*rpc) + argc, sizeof(u64)));
- if (!rpc)
- return NULL;
+ if (IS_ERR(rpc))
+ return ERR_CAST(rpc);
rpc->header_version = 0x03000000;
rpc->signature = ('C' << 24) | ('P' << 16) | ('R' << 8) | 'V';
r535_gsp_acpi_mux_id(acpi_handle handle, u32 id, MUX_METHOD_DATA_ELEMENT *mode,
MUX_METHOD_DATA_ELEMENT *part)
{
- acpi_handle iter = NULL, handle_mux;
+ acpi_handle iter = NULL, handle_mux = NULL;
acpi_status status;
unsigned long long value;
{
struct dw_i2c_dev *dev = context;
- *val = readl_relaxed(dev->base + reg);
+ *val = readl(dev->base + reg);
return 0;
}
{
struct dw_i2c_dev *dev = context;
- writel_relaxed(val, dev->base + reg);
+ writel(val, dev->base + reg);
return 0;
}
{
struct dw_i2c_dev *dev = context;
- *val = swab32(readl_relaxed(dev->base + reg));
+ *val = swab32(readl(dev->base + reg));
return 0;
}
{
struct dw_i2c_dev *dev = context;
- writel_relaxed(swab32(val), dev->base + reg);
+ writel(swab32(val), dev->base + reg);
return 0;
}
{
struct dw_i2c_dev *dev = context;
- *val = readw_relaxed(dev->base + reg) |
- (readw_relaxed(dev->base + reg + 2) << 16);
+ *val = readw(dev->base + reg) |
+ (readw(dev->base + reg + 2) << 16);
return 0;
}
{
struct dw_i2c_dev *dev = context;
- writew_relaxed(val, dev->base + reg);
- writew_relaxed(val >> 16, dev->base + reg + 2);
+ writew(val, dev->base + reg);
+ writew(val >> 16, dev->base + reg + 2);
return 0;
}
return ocores_init(dev, i2c);
}
-static DEFINE_SIMPLE_DEV_PM_OPS(ocores_i2c_pm,
- ocores_i2c_suspend, ocores_i2c_resume);
+static DEFINE_NOIRQ_DEV_PM_OPS(ocores_i2c_pm,
+ ocores_i2c_suspend, ocores_i2c_resume);
static struct platform_driver ocores_i2c_driver = {
.probe = ocores_i2c_probe,
u32 hs_mask;
struct i2c_bus_recovery_info recovery;
+ struct pinctrl *pinctrl;
+ struct pinctrl_state *pinctrl_default;
+ struct pinctrl_state *pinctrl_recovery;
};
#define _IBMR(i2c) ((i2c)->reg_ibmr)
*/
gpiod_set_value(i2c->recovery.scl_gpiod, ibmr & IBMR_SCLS);
gpiod_set_value(i2c->recovery.sda_gpiod, ibmr & IBMR_SDAS);
+
+ WARN_ON(pinctrl_select_state(i2c->pinctrl, i2c->pinctrl_recovery));
}
static void i2c_pxa_unprepare_recovery(struct i2c_adapter *adap)
{
struct pxa_i2c *i2c = adap->algo_data;
- struct i2c_bus_recovery_info *bri = adap->bus_recovery_info;
u32 isr;
/*
i2c_pxa_do_reset(i2c);
}
- WARN_ON(pinctrl_select_state(bri->pinctrl, bri->pins_default));
+ WARN_ON(pinctrl_select_state(i2c->pinctrl, i2c->pinctrl_default));
dev_dbg(&i2c->adap.dev, "recovery: IBMR 0x%08x ISR 0x%08x\n",
readl(_IBMR(i2c)), readl(_ISR(i2c)));
if (IS_ENABLED(CONFIG_I2C_PXA_SLAVE))
return 0;
- bri->pinctrl = devm_pinctrl_get(dev);
- if (PTR_ERR(bri->pinctrl) == -ENODEV) {
- bri->pinctrl = NULL;
+ i2c->pinctrl = devm_pinctrl_get(dev);
+ if (PTR_ERR(i2c->pinctrl) == -ENODEV)
+ i2c->pinctrl = NULL;
+ if (IS_ERR(i2c->pinctrl))
+ return PTR_ERR(i2c->pinctrl);
+
+ if (!i2c->pinctrl)
+ return 0;
+
+ i2c->pinctrl_default = pinctrl_lookup_state(i2c->pinctrl,
+ PINCTRL_STATE_DEFAULT);
+ i2c->pinctrl_recovery = pinctrl_lookup_state(i2c->pinctrl, "recovery");
+
+ if (IS_ERR(i2c->pinctrl_default) || IS_ERR(i2c->pinctrl_recovery)) {
+ dev_info(dev, "missing pinmux recovery information: %ld %ld\n",
+ PTR_ERR(i2c->pinctrl_default),
+ PTR_ERR(i2c->pinctrl_recovery));
+ return 0;
+ }
+
+ /*
+ * Claiming GPIOs can influence the pinmux state, and may glitch the
+ * I2C bus. Do this carefully.
+ */
+ bri->scl_gpiod = devm_gpiod_get(dev, "scl", GPIOD_OUT_HIGH_OPEN_DRAIN);
+ if (bri->scl_gpiod == ERR_PTR(-EPROBE_DEFER))
+ return -EPROBE_DEFER;
+ if (IS_ERR(bri->scl_gpiod)) {
+ dev_info(dev, "missing scl gpio recovery information: %pe\n",
+ bri->scl_gpiod);
+ return 0;
+ }
+
+ /*
+ * We have SCL. Pull SCL low and wait a bit so that SDA glitches
+ * have no effect.
+ */
+ gpiod_direction_output(bri->scl_gpiod, 0);
+ udelay(10);
+ bri->sda_gpiod = devm_gpiod_get(dev, "sda", GPIOD_OUT_HIGH_OPEN_DRAIN);
+
+ /* Wait a bit in case of a SDA glitch, and then release SCL. */
+ udelay(10);
+ gpiod_direction_output(bri->scl_gpiod, 1);
+
+ if (bri->sda_gpiod == ERR_PTR(-EPROBE_DEFER))
+ return -EPROBE_DEFER;
+
+ if (IS_ERR(bri->sda_gpiod)) {
+ dev_info(dev, "missing sda gpio recovery information: %pe\n",
+ bri->sda_gpiod);
return 0;
}
- if (IS_ERR(bri->pinctrl))
- return PTR_ERR(bri->pinctrl);
bri->prepare_recovery = i2c_pxa_prepare_recovery;
bri->unprepare_recovery = i2c_pxa_unprepare_recovery;
+ bri->recover_bus = i2c_generic_scl_recovery;
i2c->adap.bus_recovery_info = bri;
- return 0;
+ /*
+ * Claiming GPIOs can change the pinmux state, which confuses the
+ * pinctrl since pinctrl's idea of the current setting is unaffected
+ * by the pinmux change caused by claiming the GPIO. Work around that
+ * by switching pinctrl to the GPIO state here. We do it this way to
+ * avoid glitching the I2C bus.
+ */
+ pinctrl_select_state(i2c->pinctrl, i2c->pinctrl_recovery);
+
+ return pinctrl_select_state(i2c->pinctrl, i2c->pinctrl_default);
}
static int i2c_pxa_probe(struct platform_device *dev)
break;
}
+ if (!shr)
+ gic_flush_dcache_to_poc(base, PAGE_ORDER_TO_SIZE(order));
+
its_write_baser(its, baser, val);
tmp = baser->val;
- if (its->flags & ITS_FLAGS_FORCE_NON_SHAREABLE)
- tmp &= ~GITS_BASER_SHAREABILITY_MASK;
-
if ((val ^ tmp) & GITS_BASER_SHAREABILITY_MASK) {
/*
* Shareability didn't stick. Just use
* non-cacheable as well.
*/
shr = tmp & GITS_BASER_SHAREABILITY_MASK;
- if (!shr) {
+ if (!shr)
cache = GITS_BASER_nC;
- gic_flush_dcache_to_poc(base, PAGE_ORDER_TO_SIZE(order));
- }
+
goto retry_baser;
}
/* erratum 24313: ignore memory access type */
cache = GITS_BASER_nCnB;
+ if (its->flags & ITS_FLAGS_FORCE_NON_SHAREABLE) {
+ cache = GITS_BASER_nC;
+ shr = 0;
+ }
+
for (i = 0; i < GITS_BASER_NR_REGS; i++) {
struct its_baser *baser = its->tables + i;
u64 val = its_read_baser(its, baser);
typedef enum evict_result (*le_predicate)(struct lru_entry *le, void *context);
-static struct lru_entry *lru_evict(struct lru *lru, le_predicate pred, void *context)
+static struct lru_entry *lru_evict(struct lru *lru, le_predicate pred, void *context, bool no_sleep)
{
unsigned long tested = 0;
struct list_head *h = lru->cursor;
h = h->next;
- cond_resched();
+ if (!no_sleep)
+ cond_resched();
}
return NULL;
*/
struct buffer_tree {
- struct rw_semaphore lock;
+ union {
+ struct rw_semaphore lock;
+ rwlock_t spinlock;
+ } u;
struct rb_root root;
} ____cacheline_aligned_in_smp;
* on the locks.
*/
unsigned int num_locks;
+ bool no_sleep;
struct buffer_tree trees[];
};
+static DEFINE_STATIC_KEY_FALSE(no_sleep_enabled);
+
static inline unsigned int cache_index(sector_t block, unsigned int num_locks)
{
return dm_hash_locks_index(block, num_locks);
static inline void cache_read_lock(struct dm_buffer_cache *bc, sector_t block)
{
- down_read(&bc->trees[cache_index(block, bc->num_locks)].lock);
+ if (static_branch_unlikely(&no_sleep_enabled) && bc->no_sleep)
+ read_lock_bh(&bc->trees[cache_index(block, bc->num_locks)].u.spinlock);
+ else
+ down_read(&bc->trees[cache_index(block, bc->num_locks)].u.lock);
}
static inline void cache_read_unlock(struct dm_buffer_cache *bc, sector_t block)
{
- up_read(&bc->trees[cache_index(block, bc->num_locks)].lock);
+ if (static_branch_unlikely(&no_sleep_enabled) && bc->no_sleep)
+ read_unlock_bh(&bc->trees[cache_index(block, bc->num_locks)].u.spinlock);
+ else
+ up_read(&bc->trees[cache_index(block, bc->num_locks)].u.lock);
}
static inline void cache_write_lock(struct dm_buffer_cache *bc, sector_t block)
{
- down_write(&bc->trees[cache_index(block, bc->num_locks)].lock);
+ if (static_branch_unlikely(&no_sleep_enabled) && bc->no_sleep)
+ write_lock_bh(&bc->trees[cache_index(block, bc->num_locks)].u.spinlock);
+ else
+ down_write(&bc->trees[cache_index(block, bc->num_locks)].u.lock);
}
static inline void cache_write_unlock(struct dm_buffer_cache *bc, sector_t block)
{
- up_write(&bc->trees[cache_index(block, bc->num_locks)].lock);
+ if (static_branch_unlikely(&no_sleep_enabled) && bc->no_sleep)
+ write_unlock_bh(&bc->trees[cache_index(block, bc->num_locks)].u.spinlock);
+ else
+ up_write(&bc->trees[cache_index(block, bc->num_locks)].u.lock);
}
/*
static void __lh_lock(struct lock_history *lh, unsigned int index)
{
- if (lh->write)
- down_write(&lh->cache->trees[index].lock);
- else
- down_read(&lh->cache->trees[index].lock);
+ if (lh->write) {
+ if (static_branch_unlikely(&no_sleep_enabled) && lh->cache->no_sleep)
+ write_lock_bh(&lh->cache->trees[index].u.spinlock);
+ else
+ down_write(&lh->cache->trees[index].u.lock);
+ } else {
+ if (static_branch_unlikely(&no_sleep_enabled) && lh->cache->no_sleep)
+ read_lock_bh(&lh->cache->trees[index].u.spinlock);
+ else
+ down_read(&lh->cache->trees[index].u.lock);
+ }
}
static void __lh_unlock(struct lock_history *lh, unsigned int index)
{
- if (lh->write)
- up_write(&lh->cache->trees[index].lock);
- else
- up_read(&lh->cache->trees[index].lock);
+ if (lh->write) {
+ if (static_branch_unlikely(&no_sleep_enabled) && lh->cache->no_sleep)
+ write_unlock_bh(&lh->cache->trees[index].u.spinlock);
+ else
+ up_write(&lh->cache->trees[index].u.lock);
+ } else {
+ if (static_branch_unlikely(&no_sleep_enabled) && lh->cache->no_sleep)
+ read_unlock_bh(&lh->cache->trees[index].u.spinlock);
+ else
+ up_read(&lh->cache->trees[index].u.lock);
+ }
}
/*
return le_to_buffer(le);
}
-static void cache_init(struct dm_buffer_cache *bc, unsigned int num_locks)
+static void cache_init(struct dm_buffer_cache *bc, unsigned int num_locks, bool no_sleep)
{
unsigned int i;
bc->num_locks = num_locks;
+ bc->no_sleep = no_sleep;
for (i = 0; i < bc->num_locks; i++) {
- init_rwsem(&bc->trees[i].lock);
+ if (no_sleep)
+ rwlock_init(&bc->trees[i].u.spinlock);
+ else
+ init_rwsem(&bc->trees[i].u.lock);
bc->trees[i].root = RB_ROOT;
}
struct lru_entry *le;
struct dm_buffer *b;
- le = lru_evict(&bc->lru[list_mode], __evict_pred, &w);
+ le = lru_evict(&bc->lru[list_mode], __evict_pred, &w, bc->no_sleep);
if (!le)
return NULL;
struct evict_wrapper w = {.lh = lh, .pred = pred, .context = context};
while (true) {
- le = lru_evict(&bc->lru[old_mode], __evict_pred, &w);
+ le = lru_evict(&bc->lru[old_mode], __evict_pred, &w, bc->no_sleep);
if (!le)
break;
{
unsigned int i;
+ BUG_ON(bc->no_sleep);
for (i = 0; i < bc->num_locks; i++) {
- down_write(&bc->trees[i].lock);
+ down_write(&bc->trees[i].u.lock);
__remove_range(bc, &bc->trees[i].root, begin, end, pred, release);
- up_write(&bc->trees[i].lock);
+ up_write(&bc->trees[i].u.lock);
}
}
struct dm_buffer_cache cache; /* must be last member */
};
-static DEFINE_STATIC_KEY_FALSE(no_sleep_enabled);
-
/*----------------------------------------------------------------*/
#define dm_bufio_in_request() (!!current->bio_list)
if (need_submit)
submit_io(b, REQ_OP_READ, read_endio);
- wait_on_bit_io(&b->state, B_READING, TASK_UNINTERRUPTIBLE);
+ if (nf != NF_GET) /* we already tested this condition above */
+ wait_on_bit_io(&b->state, B_READING, TASK_UNINTERRUPTIBLE);
if (b->read_error) {
int error = blk_status_to_errno(b->read_error);
r = -ENOMEM;
goto bad_client;
}
- cache_init(&c->cache, num_locks);
+ cache_init(&c->cache, num_locks, (flags & DM_BUFIO_CLIENT_NO_SLEEP) != 0);
c->bdev = bdev;
c->block_size = block_size;
unsigned int nr_iovecs = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
gfp_t gfp_mask = GFP_NOWAIT | __GFP_HIGHMEM;
unsigned int remaining_size;
- unsigned int order = MAX_ORDER - 1;
+ unsigned int order = MAX_ORDER;
retry:
if (unlikely(gfp_mask & __GFP_DIRECT_RECLAIM))
struct work_struct flush_expired_bios;
struct list_head delayed_bios;
struct task_struct *worker;
- atomic_t may_delay;
+ bool may_delay;
struct delay_class read;
struct delay_class write;
return !!dc->worker;
}
-static void flush_delayed_bios_fast(struct delay_c *dc, bool flush_all)
-{
- struct dm_delay_info *delayed, *next;
-
- mutex_lock(&delayed_bios_lock);
- list_for_each_entry_safe(delayed, next, &dc->delayed_bios, list) {
- if (flush_all || time_after_eq(jiffies, delayed->expires)) {
- struct bio *bio = dm_bio_from_per_bio_data(delayed,
- sizeof(struct dm_delay_info));
- list_del(&delayed->list);
- dm_submit_bio_remap(bio, NULL);
- delayed->class->ops--;
- }
- }
- mutex_unlock(&delayed_bios_lock);
-}
-
-static int flush_worker_fn(void *data)
-{
- struct delay_c *dc = data;
-
- while (1) {
- flush_delayed_bios_fast(dc, false);
- if (unlikely(list_empty(&dc->delayed_bios))) {
- set_current_state(TASK_INTERRUPTIBLE);
- schedule();
- } else
- cond_resched();
- }
-
- return 0;
-}
-
static void flush_bios(struct bio *bio)
{
struct bio *n;
}
}
-static struct bio *flush_delayed_bios(struct delay_c *dc, bool flush_all)
+static void flush_delayed_bios(struct delay_c *dc, bool flush_all)
{
struct dm_delay_info *delayed, *next;
+ struct bio_list flush_bio_list;
unsigned long next_expires = 0;
- unsigned long start_timer = 0;
- struct bio_list flush_bios = { };
+ bool start_timer = false;
+ bio_list_init(&flush_bio_list);
mutex_lock(&delayed_bios_lock);
list_for_each_entry_safe(delayed, next, &dc->delayed_bios, list) {
+ cond_resched();
if (flush_all || time_after_eq(jiffies, delayed->expires)) {
struct bio *bio = dm_bio_from_per_bio_data(delayed,
sizeof(struct dm_delay_info));
list_del(&delayed->list);
- bio_list_add(&flush_bios, bio);
+ bio_list_add(&flush_bio_list, bio);
delayed->class->ops--;
continue;
}
- if (!start_timer) {
- start_timer = 1;
- next_expires = delayed->expires;
- } else
- next_expires = min(next_expires, delayed->expires);
+ if (!delay_is_fast(dc)) {
+ if (!start_timer) {
+ start_timer = true;
+ next_expires = delayed->expires;
+ } else {
+ next_expires = min(next_expires, delayed->expires);
+ }
+ }
}
mutex_unlock(&delayed_bios_lock);
if (start_timer)
queue_timeout(dc, next_expires);
- return bio_list_get(&flush_bios);
+ flush_bios(bio_list_get(&flush_bio_list));
+}
+
+static int flush_worker_fn(void *data)
+{
+ struct delay_c *dc = data;
+
+ while (!kthread_should_stop()) {
+ flush_delayed_bios(dc, false);
+ mutex_lock(&delayed_bios_lock);
+ if (unlikely(list_empty(&dc->delayed_bios))) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ mutex_unlock(&delayed_bios_lock);
+ schedule();
+ } else {
+ mutex_unlock(&delayed_bios_lock);
+ cond_resched();
+ }
+ }
+
+ return 0;
}
static void flush_expired_bios(struct work_struct *work)
struct delay_c *dc;
dc = container_of(work, struct delay_c, flush_expired_bios);
- if (delay_is_fast(dc))
- flush_delayed_bios_fast(dc, false);
- else
- flush_bios(flush_delayed_bios(dc, false));
+ flush_delayed_bios(dc, false);
}
static void delay_dtr(struct dm_target *ti)
if (dc->worker)
kthread_stop(dc->worker);
- if (!delay_is_fast(dc))
- mutex_destroy(&dc->timer_lock);
+ mutex_destroy(&dc->timer_lock);
kfree(dc);
}
ti->private = dc;
INIT_LIST_HEAD(&dc->delayed_bios);
- atomic_set(&dc->may_delay, 1);
+ mutex_init(&dc->timer_lock);
+ dc->may_delay = true;
dc->argc = argc;
ret = delay_class_ctr(ti, &dc->read, argv);
"dm-delay-flush-worker");
if (IS_ERR(dc->worker)) {
ret = PTR_ERR(dc->worker);
+ dc->worker = NULL;
goto bad;
}
} else {
timer_setup(&dc->delay_timer, handle_delayed_timer, 0);
INIT_WORK(&dc->flush_expired_bios, flush_expired_bios);
- mutex_init(&dc->timer_lock);
dc->kdelayd_wq = alloc_workqueue("kdelayd", WQ_MEM_RECLAIM, 0);
if (!dc->kdelayd_wq) {
ret = -EINVAL;
struct dm_delay_info *delayed;
unsigned long expires = 0;
- if (!c->delay || !atomic_read(&dc->may_delay))
+ if (!c->delay)
return DM_MAPIO_REMAPPED;
delayed = dm_per_bio_data(bio, sizeof(struct dm_delay_info));
delayed->expires = expires = jiffies + msecs_to_jiffies(c->delay);
mutex_lock(&delayed_bios_lock);
+ if (unlikely(!dc->may_delay)) {
+ mutex_unlock(&delayed_bios_lock);
+ return DM_MAPIO_REMAPPED;
+ }
c->ops++;
list_add_tail(&delayed->list, &dc->delayed_bios);
mutex_unlock(&delayed_bios_lock);
{
struct delay_c *dc = ti->private;
- atomic_set(&dc->may_delay, 0);
+ mutex_lock(&delayed_bios_lock);
+ dc->may_delay = false;
+ mutex_unlock(&delayed_bios_lock);
- if (delay_is_fast(dc))
- flush_delayed_bios_fast(dc, true);
- else {
+ if (!delay_is_fast(dc))
del_timer_sync(&dc->delay_timer);
- flush_bios(flush_delayed_bios(dc, true));
- }
+ flush_delayed_bios(dc, true);
}
static void delay_resume(struct dm_target *ti)
{
struct delay_c *dc = ti->private;
- atomic_set(&dc->may_delay, 1);
+ dc->may_delay = true;
}
static int delay_map(struct dm_target *ti, struct bio *bio)
{
if (unlikely(verity_hash(v, verity_io_hash_req(v, io),
data, 1 << v->data_dev_block_bits,
- verity_io_real_digest(v, io))))
+ verity_io_real_digest(v, io), true)))
return 0;
return memcmp(verity_io_real_digest(v, io), want_digest,
/* Always re-validate the corrected block against the expected hash */
r = verity_hash(v, verity_io_hash_req(v, io), fio->output,
1 << v->data_dev_block_bits,
- verity_io_real_digest(v, io));
+ verity_io_real_digest(v, io), true);
if (unlikely(r < 0))
return r;
* Wrapper for crypto_ahash_init, which handles verity salting.
*/
static int verity_hash_init(struct dm_verity *v, struct ahash_request *req,
- struct crypto_wait *wait)
+ struct crypto_wait *wait, bool may_sleep)
{
int r;
ahash_request_set_tfm(req, v->tfm);
- ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP |
- CRYPTO_TFM_REQ_MAY_BACKLOG,
- crypto_req_done, (void *)wait);
+ ahash_request_set_callback(req,
+ may_sleep ? CRYPTO_TFM_REQ_MAY_SLEEP | CRYPTO_TFM_REQ_MAY_BACKLOG : 0,
+ crypto_req_done, (void *)wait);
crypto_init_wait(wait);
r = crypto_wait_req(crypto_ahash_init(req), wait);
if (unlikely(r < 0)) {
- DMERR("crypto_ahash_init failed: %d", r);
+ if (r != -ENOMEM)
+ DMERR("crypto_ahash_init failed: %d", r);
return r;
}
}
int verity_hash(struct dm_verity *v, struct ahash_request *req,
- const u8 *data, size_t len, u8 *digest)
+ const u8 *data, size_t len, u8 *digest, bool may_sleep)
{
int r;
struct crypto_wait wait;
- r = verity_hash_init(v, req, &wait);
+ r = verity_hash_init(v, req, &wait, may_sleep);
if (unlikely(r < 0))
goto out;
r = verity_hash(v, verity_io_hash_req(v, io),
data, 1 << v->hash_dev_block_bits,
- verity_io_real_digest(v, io));
+ verity_io_real_digest(v, io), !io->in_tasklet);
if (unlikely(r < 0))
goto release_ret_r;
continue;
}
- r = verity_hash_init(v, req, &wait);
+ r = verity_hash_init(v, req, &wait, !io->in_tasklet);
if (unlikely(r < 0))
return r;
io->in_tasklet = true;
err = verity_verify_io(io);
- if (err == -EAGAIN) {
+ if (err == -EAGAIN || err == -ENOMEM) {
/* fallback to retrying with work-queue */
INIT_WORK(&io->work, verity_work);
queue_work(io->v->verify_wq, &io->work);
goto out;
r = verity_hash(v, req, zero_data, 1 << v->data_dev_block_bits,
- v->zero_digest);
+ v->zero_digest, true);
out:
kfree(req);
u8 *data, size_t len));
extern int verity_hash(struct dm_verity *v, struct ahash_request *req,
- const u8 *data, size_t len, u8 *digest);
+ const u8 *data, size_t len, u8 *digest, bool may_sleep);
extern int verity_hash_for_block(struct dm_verity *v, struct dm_verity_io *io,
sector_t block, u8 *digest, bool *is_zero);
static void bond_setup_by_slave(struct net_device *bond_dev,
struct net_device *slave_dev)
{
+ bool was_up = !!(bond_dev->flags & IFF_UP);
+
+ dev_close(bond_dev);
+
bond_dev->header_ops = slave_dev->header_ops;
bond_dev->type = slave_dev->type;
bond_dev->flags &= ~(IFF_BROADCAST | IFF_MULTICAST);
bond_dev->flags |= (IFF_POINTOPOINT | IFF_NOARP);
}
+ if (was_up)
+ dev_open(bond_dev, NULL);
}
/* On bonding slaves other than the currently active slave, suppress
}
queue_work(pdsc->wq, &qcq->work);
- pds_core_intr_mask(&pdsc->intr_ctrl[irq], PDS_CORE_INTR_MASK_CLEAR);
+ pds_core_intr_mask(&pdsc->intr_ctrl[qcq->intx], PDS_CORE_INTR_MASK_CLEAR);
return IRQ_HANDLED;
}
#define PDSC_DRV_DESCRIPTION "AMD/Pensando Core Driver"
#define PDSC_WATCHDOG_SECS 5
-#define PDSC_QUEUE_NAME_MAX_SZ 32
+#define PDSC_QUEUE_NAME_MAX_SZ 16
#define PDSC_ADMINQ_MIN_LENGTH 16 /* must be a power of two */
#define PDSC_NOTIFYQ_LENGTH 64 /* must be a power of two */
#define PDSC_TEARDOWN_RECOVERY false
struct pds_core_drv_identity drv = {};
size_t sz;
int err;
+ int n;
drv.drv_type = cpu_to_le32(PDS_DRIVER_LINUX);
- snprintf(drv.driver_ver_str, sizeof(drv.driver_ver_str),
- "%s %s", PDS_CORE_DRV_NAME, utsname()->release);
+ /* Catching the return quiets a Wformat-truncation complaint */
+ n = snprintf(drv.driver_ver_str, sizeof(drv.driver_ver_str),
+ "%s %s", PDS_CORE_DRV_NAME, utsname()->release);
+ if (n > sizeof(drv.driver_ver_str))
+ dev_dbg(pdsc->dev, "release name truncated, don't care\n");
/* Next let's get some info about the device
* We use the devcmd_lock at this level in order to
struct pds_core_fw_list_info fw_list;
struct pdsc *pdsc = devlink_priv(dl);
union pds_core_dev_comp comp;
- char buf[16];
+ char buf[32];
int listlen;
int err;
int i;
desc_idx, *post_ptr);
drop_it_no_recycle:
/* Other statistics kept track of by card. */
- tp->rx_dropped++;
+ tnapi->rx_dropped++;
goto next_pkt;
}
segs = skb_gso_segment(skb, tp->dev->features &
~(NETIF_F_TSO | NETIF_F_TSO6));
- if (IS_ERR(segs) || !segs)
+ if (IS_ERR(segs) || !segs) {
+ tnapi->tx_dropped++;
goto tg3_tso_bug_end;
+ }
skb_list_walk_safe(segs, seg, next) {
skb_mark_not_on_list(seg);
drop:
dev_kfree_skb_any(skb);
drop_nofree:
- tp->tx_dropped++;
+ tnapi->tx_dropped++;
return NETDEV_TX_OK;
}
/* tp->lock is held. */
static int tg3_halt(struct tg3 *tp, int kind, bool silent)
{
- int err;
+ int err, i;
tg3_stop_fw(tp);
/* And make sure the next sample is new data */
memset(tp->hw_stats, 0, sizeof(struct tg3_hw_stats));
+
+ for (i = 0; i < TG3_IRQ_MAX_VECS; ++i) {
+ struct tg3_napi *tnapi = &tp->napi[i];
+
+ tnapi->rx_dropped = 0;
+ tnapi->tx_dropped = 0;
+ }
}
return err;
{
struct rtnl_link_stats64 *old_stats = &tp->net_stats_prev;
struct tg3_hw_stats *hw_stats = tp->hw_stats;
+ unsigned long rx_dropped;
+ unsigned long tx_dropped;
+ int i;
stats->rx_packets = old_stats->rx_packets +
get_stat64(&hw_stats->rx_ucast_packets) +
stats->rx_missed_errors = old_stats->rx_missed_errors +
get_stat64(&hw_stats->rx_discards);
- stats->rx_dropped = tp->rx_dropped;
- stats->tx_dropped = tp->tx_dropped;
+ /* Aggregate per-queue counters. The per-queue counters are updated
+ * by a single writer, race-free. The result computed by this loop
+ * might not be 100% accurate (counters can be updated in the middle of
+ * the loop) but the next tg3_get_nstats() will recompute the current
+ * value so it is acceptable.
+ *
+ * Note that these counters wrap around at 4G on 32bit machines.
+ */
+ rx_dropped = (unsigned long)(old_stats->rx_dropped);
+ tx_dropped = (unsigned long)(old_stats->tx_dropped);
+
+ for (i = 0; i < tp->irq_cnt; i++) {
+ struct tg3_napi *tnapi = &tp->napi[i];
+
+ rx_dropped += tnapi->rx_dropped;
+ tx_dropped += tnapi->tx_dropped;
+ }
+
+ stats->rx_dropped = rx_dropped;
+ stats->tx_dropped = tx_dropped;
}
static int tg3_get_regs_len(struct net_device *dev)
u16 *rx_rcb_prod_idx;
struct tg3_rx_prodring_set prodring;
struct tg3_rx_buffer_desc *rx_rcb;
+ unsigned long rx_dropped;
u32 tx_prod ____cacheline_aligned;
u32 tx_cons;
u32 prodmbox;
struct tg3_tx_buffer_desc *tx_ring;
struct tg3_tx_ring_info *tx_buffers;
+ unsigned long tx_dropped;
dma_addr_t status_mapping;
dma_addr_t rx_rcb_mapping;
/* begin "everything else" cacheline(s) section */
- unsigned long rx_dropped;
- unsigned long tx_dropped;
struct rtnl_link_stats64 net_stats_prev;
struct tg3_ethtool_stats estats_prev;
.val = CONFIG0_MAXLEN_1536,
},
{
- .max_l3_len = 1542,
- .val = CONFIG0_MAXLEN_1542,
+ .max_l3_len = 1548,
+ .val = CONFIG0_MAXLEN_1548,
},
{
.max_l3_len = 9212,
dma_addr_t mapping;
unsigned short mtu;
void *buffer;
+ int ret;
mtu = ETH_HLEN;
mtu += netdev->mtu;
word3 |= mtu;
}
- if (skb->ip_summed != CHECKSUM_NONE) {
+ if (skb->len >= ETH_FRAME_LEN) {
+ /* Hardware offloaded checksumming isn't working on frames
+ * bigger than 1514 bytes. A hypothesis about this is that the
+ * checksum buffer is only 1518 bytes, so when the frames get
+ * bigger they get truncated, or the last few bytes get
+ * overwritten by the FCS.
+ *
+ * Just use software checksumming and bypass on bigger frames.
+ */
+ if (skb->ip_summed == CHECKSUM_PARTIAL) {
+ ret = skb_checksum_help(skb);
+ if (ret)
+ return ret;
+ }
+ word1 |= TSS_BYPASS_BIT;
+ } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
int tcp = 0;
+ /* We do not switch off the checksumming on non TCP/UDP
+ * frames: as is shown from tests, the checksumming engine
+ * is smart enough to see that a frame is not actually TCP
+ * or UDP and then just pass it through without any changes
+ * to the frame.
+ */
if (skb->protocol == htons(ETH_P_IP)) {
word1 |= TSS_IP_CHKSUM_BIT;
tcp = ip_hdr(skb)->protocol == IPPROTO_TCP;
return 0;
}
-static netdev_features_t gmac_fix_features(struct net_device *netdev,
- netdev_features_t features)
-{
- if (netdev->mtu + ETH_HLEN + VLAN_HLEN > MTU_SIZE_BIT_MASK)
- features &= ~GMAC_OFFLOAD_FEATURES;
-
- return features;
-}
-
static int gmac_set_features(struct net_device *netdev,
netdev_features_t features)
{
.ndo_set_mac_address = gmac_set_mac_address,
.ndo_get_stats64 = gmac_get_stats64,
.ndo_change_mtu = gmac_change_mtu,
- .ndo_fix_features = gmac_fix_features,
.ndo_set_features = gmac_set_features,
};
netdev->hw_features = GMAC_OFFLOAD_FEATURES;
netdev->features |= GMAC_OFFLOAD_FEATURES | NETIF_F_GRO;
- /* We can handle jumbo frames up to 10236 bytes so, let's accept
- * payloads of 10236 bytes minus VLAN and ethernet header
+ /* We can receive jumbo frames up to 10236 bytes but only
+ * transmit 2047 bytes so, let's accept payloads of 2047
+ * bytes minus VLAN and ethernet header
*/
netdev->min_mtu = ETH_MIN_MTU;
- netdev->max_mtu = 10236 - VLAN_ETH_HLEN;
+ netdev->max_mtu = MTU_SIZE_BIT_MASK - VLAN_ETH_HLEN;
port->freeq_refill = 0;
netif_napi_add(netdev, &port->napi, gmac_napi_poll);
#define SOF_BIT 0x80000000
#define EOF_BIT 0x40000000
#define EOFIE_BIT BIT(29)
-#define MTU_SIZE_BIT_MASK 0x1fff
+#define MTU_SIZE_BIT_MASK 0x7ff /* Max MTU 2047 bytes */
/* GMAC Tx Descriptor */
struct gmac_txdesc {
#define CONFIG0_MAXLEN_1536 0
#define CONFIG0_MAXLEN_1518 1
#define CONFIG0_MAXLEN_1522 2
-#define CONFIG0_MAXLEN_1542 3
+#define CONFIG0_MAXLEN_1548 3
#define CONFIG0_MAXLEN_9k 4 /* 9212 */
#define CONFIG0_MAXLEN_10k 5 /* 10236 */
#define CONFIG0_MAXLEN_1518__6 6
if (block->tx) {
if (block->tx->q_num < priv->tx_cfg.num_queues)
reschedule |= gve_tx_poll(block, budget);
- else
+ else if (budget)
reschedule |= gve_xdp_poll(block, budget);
}
+ if (!budget)
+ return 0;
+
if (block->rx) {
work_done = gve_rx_poll(block, budget);
reschedule |= work_done == budget;
if (block->tx)
reschedule |= gve_tx_poll_dqo(block, /*do_clean=*/true);
+ if (!budget)
+ return 0;
+
if (block->rx) {
work_done = gve_rx_poll_dqo(block, budget);
reschedule |= work_done == budget;
feat = block->napi.dev->features;
- /* If budget is 0, do all the work */
- if (budget == 0)
- budget = INT_MAX;
-
if (budget > 0)
work_done = gve_clean_rx_done(rx, budget, feat);
bool repoll;
u32 to_do;
- /* If budget is 0, do all the work */
- if (budget == 0)
- budget = INT_MAX;
-
/* Find out how much work there is to be done */
nic_done = gve_tx_load_event_counter(priv, tx);
to_do = min_t(u32, (nic_done - tx->done), budget);
}
sprintf(result[j++], "%d", i);
- sprintf(result[j++], "%s", dim_state_str[dim->state]);
+ sprintf(result[j++], "%s", dim->state < ARRAY_SIZE(dim_state_str) ?
+ dim_state_str[dim->state] : "unknown");
sprintf(result[j++], "%u", dim->profile_ix);
- sprintf(result[j++], "%s", dim_cqe_mode_str[dim->mode]);
+ sprintf(result[j++], "%s", dim->mode < ARRAY_SIZE(dim_cqe_mode_str) ?
+ dim_cqe_mode_str[dim->mode] : "unknown");
sprintf(result[j++], "%s",
- dim_tune_stat_str[dim->tune_state]);
+ dim->tune_state < ARRAY_SIZE(dim_tune_stat_str) ?
+ dim_tune_stat_str[dim->tune_state] : "unknown");
sprintf(result[j++], "%u", dim->steps_left);
sprintf(result[j++], "%u", dim->steps_right);
sprintf(result[j++], "%u", dim->tired);
struct hns3_nic_priv *priv = netdev_priv(netdev);
char format_mac_addr[HNAE3_FORMAT_MAC_ADDR_LEN];
struct hnae3_handle *h = priv->ae_handle;
- u8 mac_addr_temp[ETH_ALEN];
+ u8 mac_addr_temp[ETH_ALEN] = {0};
int ret = 0;
if (h->ae_algo->ops->get_mac_addr)
static void hclge_update_fec_stats(struct hclge_dev *hdev);
static int hclge_mac_link_status_wait(struct hclge_dev *hdev, int link_ret,
int wait_cnt);
+static int hclge_update_port_info(struct hclge_dev *hdev);
static struct hnae3_ae_algo ae_algo;
if (state != hdev->hw.mac.link) {
hdev->hw.mac.link = state;
+ if (state == HCLGE_LINK_STATUS_UP)
+ hclge_update_port_info(hdev);
+
client->ops->link_status_change(handle, state);
hclge_config_mac_tnl_int(hdev, state);
if (rclient && rclient->ops->link_status_change)
struct hclge_vport_vlan_cfg *vlan, *tmp;
struct hclge_dev *hdev = vport->back;
- mutex_lock(&hdev->vport_lock);
-
list_for_each_entry_safe(vlan, tmp, &vport->vlan_list, node) {
if (vlan->vlan_id == vlan_id) {
if (is_write_tbl && vlan->hd_tbl_status)
break;
}
}
-
- mutex_unlock(&hdev->vport_lock);
}
void hclge_rm_vport_all_vlan_table(struct hclge_vport *vport, bool is_del_list)
* handle mailbox. Just record the vlan id, and remove it after
* reset finished.
*/
+ mutex_lock(&hdev->vport_lock);
if ((test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state) ||
test_bit(HCLGE_STATE_RST_FAIL, &hdev->state)) && is_kill) {
set_bit(vlan_id, vport->vlan_del_fail_bmap);
+ mutex_unlock(&hdev->vport_lock);
return -EBUSY;
+ } else if (!is_kill && test_bit(vlan_id, vport->vlan_del_fail_bmap)) {
+ clear_bit(vlan_id, vport->vlan_del_fail_bmap);
}
+ mutex_unlock(&hdev->vport_lock);
/* when port base vlan enabled, we use port base vlan as the vlan
* filter entry. In this case, we don't update vlan filter table
}
if (!ret) {
- if (!is_kill)
+ if (!is_kill) {
hclge_add_vport_vlan_table(vport, vlan_id,
writen_to_tbl);
- else if (is_kill && vlan_id != 0)
+ } else if (is_kill && vlan_id != 0) {
+ mutex_lock(&hdev->vport_lock);
hclge_rm_vport_vlan_table(vport, vlan_id, false);
+ mutex_unlock(&hdev->vport_lock);
+ }
} else if (is_kill) {
/* when remove hw vlan filter failed, record the vlan id,
* and try to remove it from hw later, to be consistence
* with stack
*/
+ mutex_lock(&hdev->vport_lock);
set_bit(vlan_id, vport->vlan_del_fail_bmap);
+ mutex_unlock(&hdev->vport_lock);
}
hclge_set_vport_vlan_fltr_change(vport);
int i, ret, sync_cnt = 0;
u16 vlan_id;
+ mutex_lock(&hdev->vport_lock);
/* start from vport 1 for PF is always alive */
for (i = 0; i < hdev->num_alloc_vport; i++) {
struct hclge_vport *vport = &hdev->vport[i];
ret = hclge_set_vlan_filter_hw(hdev, htons(ETH_P_8021Q),
vport->vport_id, vlan_id,
true);
- if (ret && ret != -EINVAL)
+ if (ret && ret != -EINVAL) {
+ mutex_unlock(&hdev->vport_lock);
return;
+ }
clear_bit(vlan_id, vport->vlan_del_fail_bmap);
hclge_rm_vport_vlan_table(vport, vlan_id, false);
hclge_set_vport_vlan_fltr_change(vport);
sync_cnt++;
- if (sync_cnt >= HCLGE_MAX_SYNC_COUNT)
+ if (sync_cnt >= HCLGE_MAX_SYNC_COUNT) {
+ mutex_unlock(&hdev->vport_lock);
return;
+ }
vlan_id = find_first_bit(vport->vlan_del_fail_bmap,
VLAN_N_VID);
}
}
+ mutex_unlock(&hdev->vport_lock);
hclge_sync_vlan_fltr_state(hdev);
}
goto err_msi_irq_uninit;
if (hdev->hw.mac.media_type == HNAE3_MEDIA_TYPE_COPPER) {
+ clear_bit(HNAE3_DEV_SUPPORT_FEC_B, ae_dev->caps);
if (hnae3_dev_phy_imp_supported(hdev))
ret = hclge_update_tp_port_info(hdev);
else
test_bit(HCLGEVF_STATE_RST_FAIL, &hdev->state)) && is_kill) {
set_bit(vlan_id, hdev->vlan_del_fail_bmap);
return -EBUSY;
+ } else if (!is_kill && test_bit(vlan_id, hdev->vlan_del_fail_bmap)) {
+ clear_bit(vlan_id, hdev->vlan_del_fail_bmap);
}
hclgevf_build_send_msg(&send_msg, HCLGE_MBX_SET_VLAN,
int ret, sync_cnt = 0;
u16 vlan_id;
+ if (bitmap_empty(hdev->vlan_del_fail_bmap, VLAN_N_VID))
+ return;
+
+ rtnl_lock();
vlan_id = find_first_bit(hdev->vlan_del_fail_bmap, VLAN_N_VID);
while (vlan_id != VLAN_N_VID) {
ret = hclgevf_set_vlan_filter(handle, htons(ETH_P_8021Q),
vlan_id, true);
if (ret)
- return;
+ break;
clear_bit(vlan_id, hdev->vlan_del_fail_bmap);
sync_cnt++;
if (sync_cnt >= HCLGEVF_MAX_SYNC_COUNT)
- return;
+ break;
vlan_id = find_first_bit(hdev->vlan_del_fail_bmap, VLAN_N_VID);
}
+ rtnl_unlock();
}
static int hclgevf_en_hw_strip_rxvtag(struct hnae3_handle *handle, bool enable)
return HCLGEVF_VECTOR0_EVENT_OTHER;
}
+static void hclgevf_reset_timer(struct timer_list *t)
+{
+ struct hclgevf_dev *hdev = from_timer(hdev, t, reset_timer);
+
+ hclgevf_clear_event_cause(hdev, HCLGEVF_VECTOR0_EVENT_RST);
+ hclgevf_reset_task_schedule(hdev);
+}
+
static irqreturn_t hclgevf_misc_irq_handle(int irq, void *data)
{
+#define HCLGEVF_RESET_DELAY 5
+
enum hclgevf_evt_cause event_cause;
struct hclgevf_dev *hdev = data;
u32 clearval;
switch (event_cause) {
case HCLGEVF_VECTOR0_EVENT_RST:
- hclgevf_reset_task_schedule(hdev);
+ mod_timer(&hdev->reset_timer,
+ jiffies + msecs_to_jiffies(HCLGEVF_RESET_DELAY));
break;
case HCLGEVF_VECTOR0_EVENT_MBX:
hclgevf_mbx_handler(hdev);
HCLGEVF_DRIVER_NAME);
hclgevf_task_schedule(hdev, round_jiffies_relative(HZ));
+ timer_setup(&hdev->reset_timer, hclgevf_reset_timer, 0);
return 0;
enum hnae3_reset_type reset_level;
unsigned long reset_pending;
enum hnae3_reset_type reset_type;
+ struct timer_list reset_timer;
#define HCLGEVF_RESET_REQUESTED 0
#define HCLGEVF_RESET_PENDING 1
i++;
}
+ /* ensure additional_info will be seen after received_resp */
+ smp_rmb();
+
if (i >= HCLGEVF_MAX_TRY_TIMES) {
dev_err(&hdev->pdev->dev,
"VF could not get mbx(%u,%u) resp(=%d) from PF in %d tries\n",
resp->resp_status = hclgevf_resp_to_errno(resp_status);
memcpy(resp->additional_info, req->msg.resp_data,
HCLGE_MBX_MAX_RESP_DATA_SIZE * sizeof(u8));
+
+ /* ensure additional_info will be seen before setting received_resp */
+ smp_wmb();
+
if (match_id) {
/* If match_id is not zero, it means PF support match_id.
* if the match_id is right, VF get the right response, or
}
/**
- * ice_download_pkg
+ * ice_download_pkg_with_sig_seg
* @hw: pointer to the hardware structure
* @pkg_hdr: pointer to package header
*
* Handles the download of a complete package.
*/
static enum ice_ddp_state
-ice_download_pkg(struct ice_hw *hw, struct ice_pkg_hdr *pkg_hdr)
+ice_download_pkg_with_sig_seg(struct ice_hw *hw, struct ice_pkg_hdr *pkg_hdr)
{
enum ice_aq_err aq_err = hw->adminq.sq_last_status;
enum ice_ddp_state state = ICE_DDP_PKG_ERR;
state = ice_post_dwnld_pkg_actions(hw);
ice_release_global_cfg_lock(hw);
+
+ return state;
+}
+
+/**
+ * ice_dwnld_cfg_bufs
+ * @hw: pointer to the hardware structure
+ * @bufs: pointer to an array of buffers
+ * @count: the number of buffers in the array
+ *
+ * Obtains global config lock and downloads the package configuration buffers
+ * to the firmware.
+ */
+static enum ice_ddp_state
+ice_dwnld_cfg_bufs(struct ice_hw *hw, struct ice_buf *bufs, u32 count)
+{
+ enum ice_ddp_state state;
+ struct ice_buf_hdr *bh;
+ int status;
+
+ if (!bufs || !count)
+ return ICE_DDP_PKG_ERR;
+
+ /* If the first buffer's first section has its metadata bit set
+ * then there are no buffers to be downloaded, and the operation is
+ * considered a success.
+ */
+ bh = (struct ice_buf_hdr *)bufs;
+ if (le32_to_cpu(bh->section_entry[0].type) & ICE_METADATA_BUF)
+ return ICE_DDP_PKG_SUCCESS;
+
+ status = ice_acquire_global_cfg_lock(hw, ICE_RES_WRITE);
+ if (status) {
+ if (status == -EALREADY)
+ return ICE_DDP_PKG_ALREADY_LOADED;
+ return ice_map_aq_err_to_ddp_state(hw->adminq.sq_last_status);
+ }
+
+ state = ice_dwnld_cfg_bufs_no_lock(hw, bufs, 0, count, true);
+ if (!state)
+ state = ice_post_dwnld_pkg_actions(hw);
+
+ ice_release_global_cfg_lock(hw);
+
+ return state;
+}
+
+/**
+ * ice_download_pkg_without_sig_seg
+ * @hw: pointer to the hardware structure
+ * @ice_seg: pointer to the segment of the package to be downloaded
+ *
+ * Handles the download of a complete package without signature segment.
+ */
+static enum ice_ddp_state
+ice_download_pkg_without_sig_seg(struct ice_hw *hw, struct ice_seg *ice_seg)
+{
+ struct ice_buf_table *ice_buf_tbl;
+
+ ice_debug(hw, ICE_DBG_PKG, "Segment format version: %d.%d.%d.%d\n",
+ ice_seg->hdr.seg_format_ver.major,
+ ice_seg->hdr.seg_format_ver.minor,
+ ice_seg->hdr.seg_format_ver.update,
+ ice_seg->hdr.seg_format_ver.draft);
+
+ ice_debug(hw, ICE_DBG_PKG, "Seg: type 0x%X, size %d, name %s\n",
+ le32_to_cpu(ice_seg->hdr.seg_type),
+ le32_to_cpu(ice_seg->hdr.seg_size), ice_seg->hdr.seg_id);
+
+ ice_buf_tbl = ice_find_buf_table(ice_seg);
+
+ ice_debug(hw, ICE_DBG_PKG, "Seg buf count: %d\n",
+ le32_to_cpu(ice_buf_tbl->buf_count));
+
+ return ice_dwnld_cfg_bufs(hw, ice_buf_tbl->buf_array,
+ le32_to_cpu(ice_buf_tbl->buf_count));
+}
+
+/**
+ * ice_download_pkg
+ * @hw: pointer to the hardware structure
+ * @pkg_hdr: pointer to package header
+ * @ice_seg: pointer to the segment of the package to be downloaded
+ *
+ * Handles the download of a complete package.
+ */
+static enum ice_ddp_state
+ice_download_pkg(struct ice_hw *hw, struct ice_pkg_hdr *pkg_hdr,
+ struct ice_seg *ice_seg)
+{
+ enum ice_ddp_state state;
+
+ if (hw->pkg_has_signing_seg)
+ state = ice_download_pkg_with_sig_seg(hw, pkg_hdr);
+ else
+ state = ice_download_pkg_without_sig_seg(hw, ice_seg);
+
ice_post_pkg_dwnld_vlan_mode_cfg(hw);
return state;
/* initialize package hints and then download package */
ice_init_pkg_hints(hw, seg);
- state = ice_download_pkg(hw, pkg);
+ state = ice_download_pkg(hw, pkg, seg);
if (state == ICE_DDP_PKG_ALREADY_LOADED) {
ice_debug(hw, ICE_DBG_INIT,
"package previously loaded - no work.\n");
struct ice_pf *pf = d->pf;
int ret;
- if (prio > ICE_DPLL_PRIO_MAX) {
- NL_SET_ERR_MSG_FMT(extack, "prio out of supported range 0-%d",
- ICE_DPLL_PRIO_MAX);
- return -EINVAL;
- }
-
mutex_lock(&pf->dplls.lock);
ret = ice_dpll_hw_input_prio_set(pf, d, p, prio, extack);
mutex_unlock(&pf->dplls.lock);
}
d->pf = pf;
if (cgu) {
+ ice_dpll_update_state(pf, d, true);
ret = dpll_device_register(d->dpll, type, &ice_dpll_ops, d);
if (ret) {
dpll_device_put(d->dpll);
struct ice_dplls *d = &pf->dplls;
struct kthread_worker *kworker;
- ice_dpll_update_state(pf, &d->eec, true);
- ice_dpll_update_state(pf, &d->pps, true);
kthread_init_delayed_work(&d->work, ice_dpll_periodic_work);
kworker = kthread_create_worker(0, "ice-dplls-%s",
dev_name(ice_pf_to_dev(pf)));
int num_pins, i, ret = -EINVAL;
struct ice_hw *hw = &pf->hw;
struct ice_dpll_pin *pins;
+ unsigned long caps;
u8 freq_supp_num;
bool input;
}
for (i = 0; i < num_pins; i++) {
+ caps = 0;
pins[i].idx = i;
pins[i].prop.board_label = ice_cgu_get_pin_name(hw, i, input);
pins[i].prop.type = ice_cgu_get_pin_type(hw, i, input);
&dp->input_prio[i]);
if (ret)
return ret;
- pins[i].prop.capabilities |=
- DPLL_PIN_CAPABILITIES_PRIORITY_CAN_CHANGE;
+ caps |= (DPLL_PIN_CAPABILITIES_PRIORITY_CAN_CHANGE |
+ DPLL_PIN_CAPABILITIES_STATE_CAN_CHANGE);
pins[i].prop.phase_range.min =
pf->dplls.input_phase_adj_max;
pins[i].prop.phase_range.max =
pf->dplls.output_phase_adj_max;
pins[i].prop.phase_range.max =
-pf->dplls.output_phase_adj_max;
+ ret = ice_cgu_get_output_pin_state_caps(hw, i, &caps);
+ if (ret)
+ return ret;
}
- pins[i].prop.capabilities |=
- DPLL_PIN_CAPABILITIES_STATE_CAN_CHANGE;
+ pins[i].prop.capabilities = caps;
ret = ice_dpll_pin_state_update(pf, &pins[i], pin_type, NULL);
if (ret)
return ret;
#include "ice.h"
-#define ICE_DPLL_PRIO_MAX 0xF
#define ICE_DPLL_RCLK_NUM_MAX 4
/** ice_dpll_pin - store info about pins
return ret;
}
+
+/**
+ * ice_cgu_get_output_pin_state_caps - get output pin state capabilities
+ * @hw: pointer to the hw struct
+ * @pin_id: id of a pin
+ * @caps: capabilities to modify
+ *
+ * Return:
+ * * 0 - success, state capabilities were modified
+ * * negative - failure, capabilities were not modified
+ */
+int ice_cgu_get_output_pin_state_caps(struct ice_hw *hw, u8 pin_id,
+ unsigned long *caps)
+{
+ bool can_change = true;
+
+ switch (hw->device_id) {
+ case ICE_DEV_ID_E810C_SFP:
+ if (pin_id == ZL_OUT2 || pin_id == ZL_OUT3)
+ can_change = false;
+ break;
+ case ICE_DEV_ID_E810C_QSFP:
+ if (pin_id == ZL_OUT2 || pin_id == ZL_OUT3 || pin_id == ZL_OUT4)
+ can_change = false;
+ break;
+ case ICE_DEV_ID_E823L_10G_BASE_T:
+ case ICE_DEV_ID_E823L_1GBE:
+ case ICE_DEV_ID_E823L_BACKPLANE:
+ case ICE_DEV_ID_E823L_QSFP:
+ case ICE_DEV_ID_E823L_SFP:
+ case ICE_DEV_ID_E823C_10G_BASE_T:
+ case ICE_DEV_ID_E823C_BACKPLANE:
+ case ICE_DEV_ID_E823C_QSFP:
+ case ICE_DEV_ID_E823C_SFP:
+ case ICE_DEV_ID_E823C_SGMII:
+ if (hw->cgu_part_number ==
+ ICE_AQC_GET_LINK_TOPO_NODE_NR_ZL30632_80032 &&
+ pin_id == ZL_OUT2)
+ can_change = false;
+ else if (hw->cgu_part_number ==
+ ICE_AQC_GET_LINK_TOPO_NODE_NR_SI5383_5384 &&
+ pin_id == SI_OUT1)
+ can_change = false;
+ break;
+ default:
+ return -EINVAL;
+ }
+ if (can_change)
+ *caps |= DPLL_PIN_CAPABILITIES_STATE_CAN_CHANGE;
+ else
+ *caps &= ~DPLL_PIN_CAPABILITIES_STATE_CAN_CHANGE;
+
+ return 0;
+}
int ice_get_cgu_rclk_pin_info(struct ice_hw *hw, u8 *base_idx, u8 *pin_num);
void ice_ptp_init_phy_model(struct ice_hw *hw);
+int ice_cgu_get_output_pin_state_caps(struct ice_hw *hw, u8 pin_id,
+ unsigned long *caps);
#define PFTSYN_SEM_BYTES 4
u8 *data)
{
if (sset == ETH_SS_STATS) {
+ struct mvneta_port *pp = netdev_priv(netdev);
int i;
for (i = 0; i < ARRAY_SIZE(mvneta_statistics); i++)
memcpy(data + i * ETH_GSTRING_LEN,
mvneta_statistics[i].name, ETH_GSTRING_LEN);
- data += ETH_GSTRING_LEN * ARRAY_SIZE(mvneta_statistics);
- page_pool_ethtool_stats_get_strings(data);
+ if (!pp->bm_priv) {
+ data += ETH_GSTRING_LEN * ARRAY_SIZE(mvneta_statistics);
+ page_pool_ethtool_stats_get_strings(data);
+ }
}
}
struct page_pool_stats stats = {};
int i;
- for (i = 0; i < rxq_number; i++)
- page_pool_get_stats(pp->rxqs[i].page_pool, &stats);
+ for (i = 0; i < rxq_number; i++) {
+ if (pp->rxqs[i].page_pool)
+ page_pool_get_stats(pp->rxqs[i].page_pool, &stats);
+ }
page_pool_ethtool_stats_get(data, &stats);
}
for (i = 0; i < ARRAY_SIZE(mvneta_statistics); i++)
*data++ = pp->ethtool_stats[i];
- mvneta_ethtool_pp_stats(pp, data);
+ if (!pp->bm_priv)
+ mvneta_ethtool_pp_stats(pp, data);
}
static int mvneta_ethtool_get_sset_count(struct net_device *dev, int sset)
{
- if (sset == ETH_SS_STATS)
- return ARRAY_SIZE(mvneta_statistics) +
- page_pool_ethtool_stats_get_count();
+ if (sset == ETH_SS_STATS) {
+ int count = ARRAY_SIZE(mvneta_statistics);
+ struct mvneta_port *pp = netdev_priv(dev);
+
+ if (!pp->bm_priv)
+ count += page_pool_ethtool_stats_get_count();
+
+ return count;
+ }
return -EOPNOTSUPP;
}
static void mlx5e_ptp_handle_ts_cqe(struct mlx5e_ptpsq *ptpsq,
struct mlx5_cqe64 *cqe,
+ u8 *md_buff,
+ u8 *md_buff_sz,
int budget)
{
struct mlx5e_ptp_port_ts_cqe_list *pending_cqe_list = ptpsq->ts_cqe_pending_list;
mlx5e_ptpsq_mark_ts_cqes_undelivered(ptpsq, hwtstamp);
out:
napi_consume_skb(skb, budget);
- mlx5e_ptp_metadata_fifo_push(&ptpsq->metadata_freelist, metadata_id);
+ md_buff[*md_buff_sz++] = metadata_id;
if (unlikely(mlx5e_ptp_metadata_map_unhealthy(&ptpsq->metadata_map)) &&
!test_and_set_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state))
queue_work(ptpsq->txqsq.priv->wq, &ptpsq->report_unhealthy_work);
}
-static bool mlx5e_ptp_poll_ts_cq(struct mlx5e_cq *cq, int budget)
+static bool mlx5e_ptp_poll_ts_cq(struct mlx5e_cq *cq, int napi_budget)
{
struct mlx5e_ptpsq *ptpsq = container_of(cq, struct mlx5e_ptpsq, ts_cq);
- struct mlx5_cqwq *cqwq = &cq->wq;
+ int budget = min(napi_budget, MLX5E_TX_CQ_POLL_BUDGET);
+ u8 metadata_buff[MLX5E_TX_CQ_POLL_BUDGET];
+ u8 metadata_buff_sz = 0;
+ struct mlx5_cqwq *cqwq;
struct mlx5_cqe64 *cqe;
int work_done = 0;
+ cqwq = &cq->wq;
+
if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &ptpsq->txqsq.state)))
return false;
do {
mlx5_cqwq_pop(cqwq);
- mlx5e_ptp_handle_ts_cqe(ptpsq, cqe, budget);
+ mlx5e_ptp_handle_ts_cqe(ptpsq, cqe,
+ metadata_buff, &metadata_buff_sz, napi_budget);
} while ((++work_done < budget) && (cqe = mlx5_cqwq_get_cqe(cqwq)));
mlx5_cqwq_update_db_record(cqwq);
/* ensure cq space is freed before enabling more cqes */
wmb();
+ while (metadata_buff_sz > 0)
+ mlx5e_ptp_metadata_fifo_push(&ptpsq->metadata_freelist,
+ metadata_buff[--metadata_buff_sz]);
+
mlx5e_txqsq_wake(&ptpsq->txqsq);
return work_done == budget;
void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq)
{
- char icosq_str[MLX5E_REPORTER_PER_Q_MAX_LEN] = {};
char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN];
struct mlx5e_icosq *icosq = rq->icosq;
struct mlx5e_priv *priv = rq->priv;
struct mlx5e_err_ctx err_ctx = {};
+ char icosq_str[32] = {};
err_ctx.ctx = rq;
err_ctx.recover = mlx5e_rx_reporter_timeout_recover;
if (icosq)
snprintf(icosq_str, sizeof(icosq_str), "ICOSQ: 0x%x, ", icosq->sqn);
snprintf(err_str, sizeof(err_str),
- "RX timeout on channel: %d, %sRQ: 0x%x, CQ: 0x%x",
+ "RX timeout on channel: %d, %s RQ: 0x%x, CQ: 0x%x",
rq->ix, icosq_str, rq->rqn, rq->cq.mcq.cqn);
mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx);
if (err)
goto destroy_neigh_entry;
- e->encap_size = ipv4_encap_size;
- e->encap_header = encap_header;
-
if (!(nud_state & NUD_VALID)) {
neigh_event_send(attr.n, NULL);
/* the encap entry will be made valid on neigh update event
goto destroy_neigh_entry;
}
+ e->encap_size = ipv4_encap_size;
+ e->encap_header = encap_header;
e->flags |= MLX5_ENCAP_ENTRY_VALID;
mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev));
mlx5e_route_lookup_ipv4_put(&attr);
if (err)
goto free_encap;
- e->encap_size = ipv4_encap_size;
- kfree(e->encap_header);
- e->encap_header = encap_header;
-
if (!(nud_state & NUD_VALID)) {
neigh_event_send(attr.n, NULL);
/* the encap entry will be made valid on neigh update event
* and not used before that.
*/
- goto release_neigh;
+ goto free_encap;
}
memset(&reformat_params, 0, sizeof(reformat_params));
goto free_encap;
}
+ e->encap_size = ipv4_encap_size;
+ kfree(e->encap_header);
+ e->encap_header = encap_header;
+
e->flags |= MLX5_ENCAP_ENTRY_VALID;
mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev));
mlx5e_route_lookup_ipv4_put(&attr);
if (err)
goto destroy_neigh_entry;
- e->encap_size = ipv6_encap_size;
- e->encap_header = encap_header;
-
if (!(nud_state & NUD_VALID)) {
neigh_event_send(attr.n, NULL);
/* the encap entry will be made valid on neigh update event
goto destroy_neigh_entry;
}
+ e->encap_size = ipv6_encap_size;
+ e->encap_header = encap_header;
e->flags |= MLX5_ENCAP_ENTRY_VALID;
mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev));
mlx5e_route_lookup_ipv6_put(&attr);
if (err)
goto free_encap;
- e->encap_size = ipv6_encap_size;
- kfree(e->encap_header);
- e->encap_header = encap_header;
-
if (!(nud_state & NUD_VALID)) {
neigh_event_send(attr.n, NULL);
/* the encap entry will be made valid on neigh update event
* and not used before that.
*/
- goto release_neigh;
+ goto free_encap;
}
memset(&reformat_params, 0, sizeof(reformat_params));
goto free_encap;
}
+ e->encap_size = ipv6_encap_size;
+ kfree(e->encap_header);
+ e->encap_header = encap_header;
+
e->flags |= MLX5_ENCAP_ENTRY_VALID;
mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev));
mlx5e_route_lookup_ipv6_put(&attr);
struct ethtool_drvinfo *drvinfo)
{
struct mlx5_core_dev *mdev = priv->mdev;
+ int count;
strscpy(drvinfo->driver, KBUILD_MODNAME, sizeof(drvinfo->driver));
- snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
- "%d.%d.%04d (%.16s)",
- fw_rev_maj(mdev), fw_rev_min(mdev), fw_rev_sub(mdev),
- mdev->board_id);
+ count = snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
+ "%d.%d.%04d (%.16s)", fw_rev_maj(mdev),
+ fw_rev_min(mdev), fw_rev_sub(mdev), mdev->board_id);
+ if (count == sizeof(drvinfo->fw_version))
+ snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
+ "%d.%d.%04d", fw_rev_maj(mdev),
+ fw_rev_min(mdev), fw_rev_sub(mdev));
+
strscpy(drvinfo->bus_info, dev_name(mdev->device),
sizeof(drvinfo->bus_info));
}
{
struct mlx5e_priv *priv = netdev_priv(dev);
struct mlx5_core_dev *mdev = priv->mdev;
+ int count;
strscpy(drvinfo->driver, mlx5e_rep_driver_name,
sizeof(drvinfo->driver));
- snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
- "%d.%d.%04d (%.16s)",
- fw_rev_maj(mdev), fw_rev_min(mdev),
- fw_rev_sub(mdev), mdev->board_id);
+ count = snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
+ "%d.%d.%04d (%.16s)", fw_rev_maj(mdev),
+ fw_rev_min(mdev), fw_rev_sub(mdev), mdev->board_id);
+ if (count == sizeof(drvinfo->fw_version))
+ snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
+ "%d.%d.%04d", fw_rev_maj(mdev),
+ fw_rev_min(mdev), fw_rev_sub(mdev));
}
static const struct counter_desc sw_rep_stats_desc[] = {
OFFLOAD(DIPV6_31_0, 32, U32_MAX, ip6.daddr.s6_addr32[3], 0,
dst_ipv4_dst_ipv6.ipv6_layout.ipv6[12]),
OFFLOAD(IPV6_HOPLIMIT, 8, U8_MAX, ip6.hop_limit, 0, ttl_hoplimit),
- OFFLOAD(IP_DSCP, 16, 0xc00f, ip6, 0, ip_dscp),
+ OFFLOAD(IP_DSCP, 16, 0x0fc0, ip6, 0, ip_dscp),
OFFLOAD(TCP_SPORT, 16, U16_MAX, tcp.source, 0, tcp_sport),
OFFLOAD(TCP_DPORT, 16, U16_MAX, tcp.dest, 0, tcp_dport),
OFFLOAD(UDP_DPORT, 16, U16_MAX, udp.dest, 0, udp_dport),
};
-static unsigned long mask_to_le(unsigned long mask, int size)
+static u32 mask_field_get(void *mask, struct mlx5_fields *f)
{
- __be32 mask_be32;
- __be16 mask_be16;
-
- if (size == 32) {
- mask_be32 = (__force __be32)(mask);
- mask = (__force unsigned long)cpu_to_le32(be32_to_cpu(mask_be32));
- } else if (size == 16) {
- mask_be32 = (__force __be32)(mask);
- mask_be16 = *(__be16 *)&mask_be32;
- mask = (__force unsigned long)cpu_to_le16(be16_to_cpu(mask_be16));
+ switch (f->field_bsize) {
+ case 32:
+ return be32_to_cpu(*(__be32 *)mask) & f->field_mask;
+ case 16:
+ return be16_to_cpu(*(__be16 *)mask) & (u16)f->field_mask;
+ default:
+ return *(u8 *)mask & (u8)f->field_mask;
}
+}
- return mask;
+static void mask_field_clear(void *mask, struct mlx5_fields *f)
+{
+ switch (f->field_bsize) {
+ case 32:
+ *(__be32 *)mask &= ~cpu_to_be32(f->field_mask);
+ break;
+ case 16:
+ *(__be16 *)mask &= ~cpu_to_be16((u16)f->field_mask);
+ break;
+ default:
+ *(u8 *)mask &= ~(u8)f->field_mask;
+ break;
+ }
}
static int offload_pedit_fields(struct mlx5e_priv *priv,
struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals;
struct pedit_headers_action *hdrs = parse_attr->hdrs;
void *headers_c, *headers_v, *action, *vals_p;
- u32 *s_masks_p, *a_masks_p, s_mask, a_mask;
struct mlx5e_tc_mod_hdr_acts *mod_acts;
- unsigned long mask, field_mask;
+ void *s_masks_p, *a_masks_p;
int i, first, last, next_z;
struct mlx5_fields *f;
+ unsigned long mask;
+ u32 s_mask, a_mask;
u8 cmd;
mod_acts = &parse_attr->mod_hdr_acts;
bool skip;
f = &fields[i];
- /* avoid seeing bits set from previous iterations */
- s_mask = 0;
- a_mask = 0;
-
s_masks_p = (void *)set_masks + f->offset;
a_masks_p = (void *)add_masks + f->offset;
- s_mask = *s_masks_p & f->field_mask;
- a_mask = *a_masks_p & f->field_mask;
+ s_mask = mask_field_get(s_masks_p, f);
+ a_mask = mask_field_get(a_masks_p, f);
if (!s_mask && !a_mask) /* nothing to offload here */
continue;
match_mask, f->field_bsize))
skip = true;
/* clear to denote we consumed this field */
- *s_masks_p &= ~f->field_mask;
+ mask_field_clear(s_masks_p, f);
} else {
cmd = MLX5_ACTION_TYPE_ADD;
mask = a_mask;
vals_p = (void *)add_vals + f->offset;
/* add 0 is no change */
- if ((*(u32 *)vals_p & f->field_mask) == 0)
+ if (!mask_field_get(vals_p, f))
skip = true;
/* clear to denote we consumed this field */
- *a_masks_p &= ~f->field_mask;
+ mask_field_clear(a_masks_p, f);
}
if (skip)
continue;
- mask = mask_to_le(mask, f->field_bsize);
-
first = find_first_bit(&mask, f->field_bsize);
next_z = find_next_zero_bit(&mask, f->field_bsize, first);
last = find_last_bit(&mask, f->field_bsize);
MLX5_SET(set_action_in, action, field, f->field);
if (cmd == MLX5_ACTION_TYPE_SET) {
+ unsigned long field_mask = f->field_mask;
int start;
- field_mask = mask_to_le(f->field_mask, f->field_bsize);
-
/* if field is bit sized it can start not from first bit */
start = find_first_bit(&field_mask, f->field_bsize);
u8 metadata_index = be32_to_cpu(eseg->flow_table_metadata);
mlx5e_skb_cb_hwtstamp_init(skb);
- mlx5e_ptpsq_track_metadata(sq->ptpsq, metadata_index);
mlx5e_ptp_metadata_map_put(&sq->ptpsq->metadata_map, skb,
metadata_index);
+ mlx5e_ptpsq_track_metadata(sq->ptpsq, metadata_index);
if (!netif_tx_queue_stopped(sq->txq) &&
mlx5e_ptpsq_metadata_freelist_empty(sq->ptpsq)) {
netif_tx_stop_queue(sq->txq);
err_drop:
stats->dropped++;
- dev_kfree_skb_any(skb);
if (unlikely(sq->ptpsq && (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)))
mlx5e_ptp_metadata_fifo_push(&sq->ptpsq->metadata_freelist,
be32_to_cpu(eseg->flow_table_metadata));
+ dev_kfree_skb_any(skb);
mlx5e_tx_flush(sq);
}
{
struct mlx5_eq_table *table = dev->priv.eq_table;
struct mlx5_irq *irq;
+ int cpu;
irq = xa_load(&table->comp_irqs, vecidx);
if (!irq)
return;
+ cpu = cpumask_first(mlx5_irq_get_affinity_mask(irq));
+ cpumask_clear_cpu(cpu, &table->used_cpus);
xa_erase(&table->comp_irqs, vecidx);
mlx5_irq_affinity_irq_release(dev, irq);
}
static int comp_irq_request_sf(struct mlx5_core_dev *dev, u16 vecidx)
{
struct mlx5_eq_table *table = dev->priv.eq_table;
+ struct mlx5_irq_pool *pool = mlx5_irq_pool_get(dev);
+ struct irq_affinity_desc af_desc = {};
struct mlx5_irq *irq;
- irq = mlx5_irq_affinity_irq_request_auto(dev, &table->used_cpus, vecidx);
- if (IS_ERR(irq)) {
- /* In case SF irq pool does not exist, fallback to the PF irqs*/
- if (PTR_ERR(irq) == -ENOENT)
- return comp_irq_request_pci(dev, vecidx);
+ /* In case SF irq pool does not exist, fallback to the PF irqs*/
+ if (!mlx5_irq_pool_is_sf_pool(pool))
+ return comp_irq_request_pci(dev, vecidx);
+ af_desc.is_managed = 1;
+ cpumask_copy(&af_desc.mask, cpu_online_mask);
+ cpumask_andnot(&af_desc.mask, &af_desc.mask, &table->used_cpus);
+ irq = mlx5_irq_affinity_request(pool, &af_desc);
+ if (IS_ERR(irq))
return PTR_ERR(irq);
- }
+
+ cpumask_or(&table->used_cpus, &table->used_cpus, mlx5_irq_get_affinity_mask(irq));
+ mlx5_core_dbg(pool->dev, "IRQ %u mapped to cpu %*pbl, %u EQs on this irq\n",
+ pci_irq_vector(dev->pdev, mlx5_irq_get_index(irq)),
+ cpumask_pr_args(mlx5_irq_get_affinity_mask(irq)),
+ mlx5_irq_read_locked(irq) / MLX5_EQ_REFS_PER_IRQ);
return xa_err(xa_store(&table->comp_irqs, vecidx, irq, GFP_KERNEL));
}
dest.vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
- if (rep->vport == MLX5_VPORT_UPLINK && on_esw->offloads.ft_ipsec_tx_pol) {
+ if (rep->vport == MLX5_VPORT_UPLINK &&
+ on_esw == from_esw && on_esw->offloads.ft_ipsec_tx_pol) {
dest.ft = on_esw->offloads.ft_ipsec_tx_pol;
flow_act.flags = FLOW_ACT_IGNORE_FLOW_LEVEL;
dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
if (pool->irqs_per_cpu)
cpu_put(pool, cpu);
}
-
-/**
- * mlx5_irq_affinity_irq_request_auto - request one IRQ for mlx5 device.
- * @dev: mlx5 device that is requesting the IRQ.
- * @used_cpus: cpumask of bounded cpus by the device
- * @vecidx: vector index to request an IRQ for.
- *
- * Each IRQ is bounded to at most 1 CPU.
- * This function is requesting an IRQ according to the default assignment.
- * The default assignment policy is:
- * - request the least loaded IRQ which is not bound to any
- * CPU of the previous IRQs requested.
- *
- * On success, this function updates used_cpus mask and returns an irq pointer.
- * In case of an error, an appropriate error pointer is returned.
- */
-struct mlx5_irq *mlx5_irq_affinity_irq_request_auto(struct mlx5_core_dev *dev,
- struct cpumask *used_cpus, u16 vecidx)
-{
- struct mlx5_irq_pool *pool = mlx5_irq_pool_get(dev);
- struct irq_affinity_desc af_desc = {};
- struct mlx5_irq *irq;
-
- if (!mlx5_irq_pool_is_sf_pool(pool))
- return ERR_PTR(-ENOENT);
-
- af_desc.is_managed = 1;
- cpumask_copy(&af_desc.mask, cpu_online_mask);
- cpumask_andnot(&af_desc.mask, &af_desc.mask, used_cpus);
- irq = mlx5_irq_affinity_request(pool, &af_desc);
-
- if (IS_ERR(irq))
- return irq;
-
- cpumask_or(used_cpus, used_cpus, mlx5_irq_get_affinity_mask(irq));
- mlx5_core_dbg(pool->dev, "IRQ %u mapped to cpu %*pbl, %u EQs on this irq\n",
- pci_irq_vector(dev->pdev, mlx5_irq_get_index(irq)),
- cpumask_pr_args(mlx5_irq_get_affinity_mask(irq)),
- mlx5_irq_read_locked(irq) / MLX5_EQ_REFS_PER_IRQ);
-
- return irq;
-}
static int mlx5_ptp_adjphase(struct ptp_clock_info *ptp, s32 delta)
{
- return mlx5_ptp_adjtime(ptp, delta);
+ struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info);
+ struct mlx5_core_dev *mdev;
+
+ mdev = container_of(clock, struct mlx5_core_dev, clock);
+
+ return mlx5_ptp_adjtime_real_time(mdev, delta);
}
static int mlx5_ptp_freq_adj_real_time(struct mlx5_core_dev *mdev, long scaled_ppm)
struct mlx5_irq {
struct atomic_notifier_head nh;
cpumask_var_t mask;
- char name[MLX5_MAX_IRQ_NAME];
+ char name[MLX5_MAX_IRQ_FORMATTED_NAME];
struct mlx5_irq_pool *pool;
int refcount;
struct msi_map map;
else
irq_sf_set_name(pool, name, i);
ATOMIC_INIT_NOTIFIER_HEAD(&irq->nh);
- snprintf(irq->name, MLX5_MAX_IRQ_NAME,
- "%s@pci:%s", name, pci_name(dev->pdev));
+ snprintf(irq->name, MLX5_MAX_IRQ_FORMATTED_NAME,
+ MLX5_IRQ_NAME_FORMAT_STR, name, pci_name(dev->pdev));
err = request_irq(irq->map.virq, irq_int_handler, 0, irq->name,
&irq->nh);
if (err) {
#include <linux/mlx5/driver.h>
#define MLX5_MAX_IRQ_NAME (32)
+#define MLX5_IRQ_NAME_FORMAT_STR ("%s@pci:%s")
+#define MLX5_MAX_IRQ_FORMATTED_NAME \
+ (MLX5_MAX_IRQ_NAME + sizeof(MLX5_IRQ_NAME_FORMAT_STR))
/* max irq_index is 2047, so four chars */
#define MLX5_MAX_IRQ_IDX_CHARS (4)
#define MLX5_EQ_REFS_PER_IRQ (2)
static bool mlx5dr_action_supp_fwd_fdb_multi_ft(struct mlx5_core_dev *dev)
{
- return (MLX5_CAP_ESW_FLOWTABLE(dev, fdb_multi_path_any_table_limit_regc) ||
+ return (MLX5_CAP_GEN(dev, steering_format_version) < MLX5_STEERING_FORMAT_CONNECTX_6DX ||
+ MLX5_CAP_ESW_FLOWTABLE(dev, fdb_multi_path_any_table_limit_regc) ||
MLX5_CAP_ESW_FLOWTABLE(dev, fdb_multi_path_any_table));
}
u32 cqn;
u32 pdn;
u32 max_send_wr;
- u32 max_send_sge;
struct mlx5_uars_page *uar;
u8 isolate_vl_tc:1;
};
return err == CQ_POLL_ERR ? err : npolled;
}
-static int dr_qp_get_args_update_send_wqe_size(struct dr_qp_init_attr *attr)
-{
- return roundup_pow_of_two(sizeof(struct mlx5_wqe_ctrl_seg) +
- sizeof(struct mlx5_wqe_flow_update_ctrl_seg) +
- sizeof(struct mlx5_wqe_header_modify_argument_update_seg));
-}
-
-/* We calculate for specific RC QP with the required functionality */
-static int dr_qp_calc_rc_send_wqe(struct dr_qp_init_attr *attr)
-{
- int update_arg_size;
- int inl_size = 0;
- int tot_size;
- int size;
-
- update_arg_size = dr_qp_get_args_update_send_wqe_size(attr);
-
- size = sizeof(struct mlx5_wqe_ctrl_seg) +
- sizeof(struct mlx5_wqe_raddr_seg);
- inl_size = size + ALIGN(sizeof(struct mlx5_wqe_inline_seg) +
- DR_STE_SIZE, 16);
-
- size += attr->max_send_sge * sizeof(struct mlx5_wqe_data_seg);
-
- size = max(size, update_arg_size);
-
- tot_size = max(size, inl_size);
-
- return ALIGN(tot_size, MLX5_SEND_WQE_BB);
-}
-
static struct mlx5dr_qp *dr_create_rc_qp(struct mlx5_core_dev *mdev,
struct dr_qp_init_attr *attr)
{
u32 temp_qpc[MLX5_ST_SZ_DW(qpc)] = {};
struct mlx5_wq_param wqp;
struct mlx5dr_qp *dr_qp;
- int wqe_size;
int inlen;
void *qpc;
void *in;
if (err)
goto err_in;
dr_qp->uar = attr->uar;
- wqe_size = dr_qp_calc_rc_send_wqe(attr);
- dr_qp->max_inline_data = min(wqe_size -
- (sizeof(struct mlx5_wqe_ctrl_seg) +
- sizeof(struct mlx5_wqe_raddr_seg) +
- sizeof(struct mlx5_wqe_inline_seg)),
- (2 * MLX5_SEND_WQE_BB -
- (sizeof(struct mlx5_wqe_ctrl_seg) +
- sizeof(struct mlx5_wqe_raddr_seg) +
- sizeof(struct mlx5_wqe_inline_seg))));
return dr_qp;
MLX5_SEND_WQE_DS;
}
-static int dr_set_data_inl_seg(struct mlx5dr_qp *dr_qp,
- struct dr_data_seg *data_seg, void *wqe)
-{
- int inline_header_size = sizeof(struct mlx5_wqe_ctrl_seg) +
- sizeof(struct mlx5_wqe_raddr_seg) +
- sizeof(struct mlx5_wqe_inline_seg);
- struct mlx5_wqe_inline_seg *seg;
- int left_space;
- int inl = 0;
- void *addr;
- int len;
- int idx;
-
- seg = wqe;
- wqe += sizeof(*seg);
- addr = (void *)(unsigned long)(data_seg->addr);
- len = data_seg->length;
- inl += len;
- left_space = MLX5_SEND_WQE_BB - inline_header_size;
-
- if (likely(len > left_space)) {
- memcpy(wqe, addr, left_space);
- len -= left_space;
- addr += left_space;
- idx = (dr_qp->sq.pc + 1) & (dr_qp->sq.wqe_cnt - 1);
- wqe = mlx5_wq_cyc_get_wqe(&dr_qp->wq.sq, idx);
- }
-
- memcpy(wqe, addr, len);
-
- if (likely(inl)) {
- seg->byte_count = cpu_to_be32(inl | MLX5_INLINE_SEG);
- return DIV_ROUND_UP(inl + sizeof(seg->byte_count),
- MLX5_SEND_WQE_DS);
- } else {
- return 0;
- }
-}
-
static void
-dr_rdma_handle_icm_write_segments(struct mlx5dr_qp *dr_qp,
- struct mlx5_wqe_ctrl_seg *wq_ctrl,
+dr_rdma_handle_icm_write_segments(struct mlx5_wqe_ctrl_seg *wq_ctrl,
u64 remote_addr,
u32 rkey,
struct dr_data_seg *data_seg,
wq_raddr->reserved = 0;
wq_dseg = (void *)(wq_raddr + 1);
- /* WQE ctrl segment + WQE remote addr segment */
- *size = (sizeof(*wq_ctrl) + sizeof(*wq_raddr)) / MLX5_SEND_WQE_DS;
- if (data_seg->send_flags & IB_SEND_INLINE) {
- *size += dr_set_data_inl_seg(dr_qp, data_seg, wq_dseg);
- } else {
- wq_dseg->byte_count = cpu_to_be32(data_seg->length);
- wq_dseg->lkey = cpu_to_be32(data_seg->lkey);
- wq_dseg->addr = cpu_to_be64(data_seg->addr);
- *size += sizeof(*wq_dseg) / MLX5_SEND_WQE_DS; /* WQE data segment */
- }
+ wq_dseg->byte_count = cpu_to_be32(data_seg->length);
+ wq_dseg->lkey = cpu_to_be32(data_seg->lkey);
+ wq_dseg->addr = cpu_to_be64(data_seg->addr);
+
+ *size = (sizeof(*wq_ctrl) + /* WQE ctrl segment */
+ sizeof(*wq_dseg) + /* WQE data segment */
+ sizeof(*wq_raddr)) / /* WQE remote addr segment */
+ MLX5_SEND_WQE_DS;
}
static void dr_set_ctrl_seg(struct mlx5_wqe_ctrl_seg *wq_ctrl,
switch (opcode) {
case MLX5_OPCODE_RDMA_READ:
case MLX5_OPCODE_RDMA_WRITE:
- dr_rdma_handle_icm_write_segments(dr_qp, wq_ctrl, remote_addr,
+ dr_rdma_handle_icm_write_segments(wq_ctrl, remote_addr,
rkey, data_seg, &size);
break;
case MLX5_OPCODE_FLOW_TBL_ACCESS:
if (send_ring->pending_wqe % send_ring->signal_th == 0)
send_info->write.send_flags |= IB_SEND_SIGNALED;
else
- send_info->write.send_flags &= ~IB_SEND_SIGNALED;
+ send_info->write.send_flags = 0;
}
static void dr_fill_write_icm_segs(struct mlx5dr_domain *dmn,
}
send_ring->pending_wqe++;
- if (!send_info->write.lkey)
- send_info->write.send_flags |= IB_SEND_INLINE;
if (send_ring->pending_wqe % send_ring->signal_th == 0)
send_info->write.send_flags |= IB_SEND_SIGNALED;
- else
- send_info->write.send_flags &= ~IB_SEND_SIGNALED;
send_ring->pending_wqe++;
send_info->read.length = send_info->write.length;
send_info->read.lkey = send_ring->sync_mr->mkey;
if (send_ring->pending_wqe % send_ring->signal_th == 0)
- send_info->read.send_flags |= IB_SEND_SIGNALED;
+ send_info->read.send_flags = IB_SEND_SIGNALED;
else
- send_info->read.send_flags &= ~IB_SEND_SIGNALED;
+ send_info->read.send_flags = 0;
}
static void dr_fill_data_segs(struct mlx5dr_domain *dmn,
dmn->send_ring->cq->qp = dmn->send_ring->qp;
dmn->info.max_send_wr = QUEUE_SIZE;
- init_attr.max_send_sge = 1;
dmn->info.max_inline_size = min(dmn->send_ring->qp->max_inline_data,
DR_STE_SIZE);
unsigned supports_gmii:1;
unsigned aspm_manageable:1;
+ unsigned dash_enabled:1;
dma_addr_t counters_phys_addr;
struct rtl8169_counters *counters;
struct rtl8169_tc_offsets tc_offset;
return r8168ep_ocp_read(tp, 0x128) & BIT(0);
}
-static enum rtl_dash_type rtl_check_dash(struct rtl8169_private *tp)
+static bool rtl_dash_is_enabled(struct rtl8169_private *tp)
+{
+ switch (tp->dash_type) {
+ case RTL_DASH_DP:
+ return r8168dp_check_dash(tp);
+ case RTL_DASH_EP:
+ return r8168ep_check_dash(tp);
+ default:
+ return false;
+ }
+}
+
+static enum rtl_dash_type rtl_get_dash_type(struct rtl8169_private *tp)
{
switch (tp->mac_version) {
case RTL_GIGA_MAC_VER_28:
case RTL_GIGA_MAC_VER_31:
- return r8168dp_check_dash(tp) ? RTL_DASH_DP : RTL_DASH_NONE;
+ return RTL_DASH_DP;
case RTL_GIGA_MAC_VER_51 ... RTL_GIGA_MAC_VER_53:
- return r8168ep_check_dash(tp) ? RTL_DASH_EP : RTL_DASH_NONE;
+ return RTL_DASH_EP;
default:
return RTL_DASH_NONE;
}
device_set_wakeup_enable(tp_to_dev(tp), wolopts);
- if (tp->dash_type == RTL_DASH_NONE) {
+ if (!tp->dash_enabled) {
rtl_set_d3_pll_down(tp, !wolopts);
tp->dev->wol_enabled = wolopts ? 1 : 0;
}
static void rtl_prepare_power_down(struct rtl8169_private *tp)
{
- if (tp->dash_type != RTL_DASH_NONE)
+ if (tp->dash_enabled)
return;
if (tp->mac_version == RTL_GIGA_MAC_VER_32 ||
rtl8169_cleanup(tp);
rtl_disable_exit_l1(tp);
rtl_prepare_power_down(tp);
+
+ if (tp->dash_type != RTL_DASH_NONE)
+ rtl8168_driver_stop(tp);
}
static void rtl8169_up(struct rtl8169_private *tp)
{
+ if (tp->dash_type != RTL_DASH_NONE)
+ rtl8168_driver_start(tp);
+
pci_set_master(tp->pci_dev);
phy_init_hw(tp->phydev);
phy_resume(tp->phydev);
{
struct rtl8169_private *tp = dev_get_drvdata(device);
- if (tp->dash_type != RTL_DASH_NONE)
+ if (tp->dash_enabled)
return -EBUSY;
if (!netif_running(tp->dev) || !netif_carrier_ok(tp->dev))
/* Restore original MAC address */
rtl_rar_set(tp, tp->dev->perm_addr);
- if (system_state == SYSTEM_POWER_OFF &&
- tp->dash_type == RTL_DASH_NONE) {
+ if (system_state == SYSTEM_POWER_OFF && !tp->dash_enabled) {
pci_wake_from_d3(pdev, tp->saved_wolopts);
pci_set_power_state(pdev, PCI_D3hot);
}
rc = pci_disable_link_state(pdev, PCIE_LINK_STATE_L1);
tp->aspm_manageable = !rc;
- tp->dash_type = rtl_check_dash(tp);
+ tp->dash_type = rtl_get_dash_type(tp);
+ tp->dash_enabled = rtl_dash_is_enabled(tp);
tp->cp_cmd = RTL_R16(tp, CPlusCmd) & CPCMD_MASK;
/* configure chip for default features */
rtl8169_set_features(dev, dev->features);
- if (tp->dash_type == RTL_DASH_NONE) {
+ if (!tp->dash_enabled) {
rtl_set_d3_pll_down(tp, true);
} else {
rtl_set_d3_pll_down(tp, false);
"ok" : "ko");
if (tp->dash_type != RTL_DASH_NONE) {
- netdev_info(dev, "DASH enabled\n");
+ netdev_info(dev, "DASH %s\n",
+ tp->dash_enabled ? "enabled" : "disabled");
rtl8168_driver_start(tp);
}
dma_dir = page_pool_get_dma_dir(rx_q->page_pool);
buf_sz = DIV_ROUND_UP(priv->dma_conf.dma_buf_sz, PAGE_SIZE) * PAGE_SIZE;
+ limit = min(priv->dma_conf.dma_rx_size - 1, (unsigned int)limit);
if (netif_msg_rx_status(priv)) {
void *rx_head;
len = 0;
}
+read_again:
if (count >= limit)
break;
-read_again:
buf1_len = 0;
buf2_len = 0;
entry = next_entry;
&prueth->shram);
if (ret) {
dev_err(dev, "unable to get PRUSS SHRD RAM2: %d\n", ret);
- pruss_put(prueth->pruss);
+ goto put_pruss;
}
prueth->sram_pool = of_gen_pool_get(np, "sram", 0);
prueth->iep1 = icss_iep_get_idx(np, 1);
if (IS_ERR(prueth->iep1)) {
ret = dev_err_probe(dev, PTR_ERR(prueth->iep1), "iep1 get failed\n");
- icss_iep_put(prueth->iep0);
- prueth->iep0 = NULL;
- prueth->iep1 = NULL;
- goto free_pool;
+ goto put_iep0;
}
if (prueth->pdata.quirk_10m_link_issue) {
exit_iep:
if (prueth->pdata.quirk_10m_link_issue)
icss_iep_exit_fw(prueth->iep1);
+ icss_iep_put(prueth->iep1);
+
+put_iep0:
+ icss_iep_put(prueth->iep0);
+ prueth->iep0 = NULL;
+ prueth->iep1 = NULL;
free_pool:
gen_pool_free(prueth->sram_pool,
put_mem:
pruss_release_mem_region(prueth->pruss, &prueth->shram);
+
+put_pruss:
pruss_put(prueth->pruss);
put_cores:
return addr;
}
-static int ipvlan_process_v4_outbound(struct sk_buff *skb)
+static noinline_for_stack int ipvlan_process_v4_outbound(struct sk_buff *skb)
{
const struct iphdr *ip4h = ip_hdr(skb);
struct net_device *dev = skb->dev;
}
#if IS_ENABLED(CONFIG_IPV6)
-static int ipvlan_process_v6_outbound(struct sk_buff *skb)
+
+static noinline_for_stack int
+ipvlan_route_v6_outbound(struct net_device *dev, struct sk_buff *skb)
{
const struct ipv6hdr *ip6h = ipv6_hdr(skb);
- struct net_device *dev = skb->dev;
- struct net *net = dev_net(dev);
- struct dst_entry *dst;
- int err, ret = NET_XMIT_DROP;
struct flowi6 fl6 = {
.flowi6_oif = dev->ifindex,
.daddr = ip6h->daddr,
.flowi6_mark = skb->mark,
.flowi6_proto = ip6h->nexthdr,
};
+ struct dst_entry *dst;
+ int err;
- dst = ip6_route_output(net, NULL, &fl6);
- if (dst->error) {
- ret = dst->error;
+ dst = ip6_route_output(dev_net(dev), NULL, &fl6);
+ err = dst->error;
+ if (err) {
dst_release(dst);
- goto err;
+ return err;
}
skb_dst_set(skb, dst);
+ return 0;
+}
+
+static int ipvlan_process_v6_outbound(struct sk_buff *skb)
+{
+ struct net_device *dev = skb->dev;
+ int err, ret = NET_XMIT_DROP;
+
+ err = ipvlan_route_v6_outbound(dev, skb);
+ if (unlikely(err)) {
+ DEV_STATS_INC(dev, tx_errors);
+ kfree_skb(skb);
+ return err;
+ }
memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
- err = ip6_local_out(net, skb->sk, skb);
+ err = ip6_local_out(dev_net(dev), skb->sk, skb);
if (unlikely(net_xmit_eval(err)))
DEV_STATS_INC(dev, tx_errors);
else
ret = NET_XMIT_SUCCESS;
- goto out;
-err:
- DEV_STATS_INC(dev, tx_errors);
- kfree_skb(skb);
-out:
return ret;
}
#else
if (dev->flags & IFF_UP) {
if (change & IFF_ALLMULTI)
dev_set_allmulti(lowerdev, dev->flags & IFF_ALLMULTI ? 1 : -1);
- if (change & IFF_PROMISC)
+ if (!macvlan_passthru(vlan->port) && change & IFF_PROMISC)
dev_set_promiscuity(lowerdev,
dev->flags & IFF_PROMISC ? 1 : -1);
case PPPIOCSMRU:
if (get_user(val, (int __user *) argp))
break;
+ if (val > U16_MAX) {
+ err = -EINVAL;
+ break;
+ }
if (val < PPP_MRU)
val = PPP_MRU;
ap->mru = val;
/* strip address/control field if present */
p = skb->data;
- if (p[0] == PPP_ALLSTATIONS && p[1] == PPP_UI) {
+ if (skb->len >= 2 && p[0] == PPP_ALLSTATIONS && p[1] == PPP_UI) {
/* chop off address/control */
if (skb->len < 3)
goto err;
static int qemu_power_off(struct sys_off_data *data)
{
/* this turns the system off via SeaBIOS */
- *(int *)data->cb_data = 0;
+ gsc_writel(0, (unsigned long) data->cb_data);
pdc_soft_power_button(1);
return NOTIFY_DONE;
}
for (i = 0; i < cnt; i++) {
event[i] = queue->buf[queue->head];
- queue->head = (queue->head + 1) % PTP_MAX_TIMESTAMPS;
+ /* Paired with READ_ONCE() in queue_cnt() */
+ WRITE_ONCE(queue->head, (queue->head + 1) % PTP_MAX_TIMESTAMPS);
}
spin_unlock_irqrestore(&queue->lock, flags);
dst->t.sec = seconds;
dst->t.nsec = remainder;
+ /* Both WRITE_ONCE() are paired with READ_ONCE() in queue_cnt() */
if (!queue_free(queue))
- queue->head = (queue->head + 1) % PTP_MAX_TIMESTAMPS;
+ WRITE_ONCE(queue->head, (queue->head + 1) % PTP_MAX_TIMESTAMPS);
- queue->tail = (queue->tail + 1) % PTP_MAX_TIMESTAMPS;
+ WRITE_ONCE(queue->tail, (queue->tail + 1) % PTP_MAX_TIMESTAMPS);
spin_unlock_irqrestore(&queue->lock, flags);
}
* that a writer might concurrently increment the tail does not
* matter, since the queue remains nonempty nonetheless.
*/
-static inline int queue_cnt(struct timestamp_event_queue *q)
+static inline int queue_cnt(const struct timestamp_event_queue *q)
{
- int cnt = q->tail - q->head;
+ /*
+ * Paired with WRITE_ONCE() in enqueue_external_timestamp(),
+ * ptp_read(), extts_fifo_show().
+ */
+ int cnt = READ_ONCE(q->tail) - READ_ONCE(q->head);
return cnt < 0 ? PTP_MAX_TIMESTAMPS + cnt : cnt;
}
qcnt = queue_cnt(queue);
if (qcnt) {
event = queue->buf[queue->head];
- queue->head = (queue->head + 1) % PTP_MAX_TIMESTAMPS;
+ /* Paired with READ_ONCE() in queue_cnt() */
+ WRITE_ONCE(queue->head, (queue->head + 1) % PTP_MAX_TIMESTAMPS);
}
spin_unlock_irqrestore(&queue->lock, flags);
}
spin_lock_irqsave(qp->qp_lock_ptr, *flags);
- if (ret_cmd && blk_mq_request_started(scsi_cmd_to_rq(cmd)))
- sp->done(sp, res);
+ switch (sp->type) {
+ case SRB_SCSI_CMD:
+ if (ret_cmd && blk_mq_request_started(scsi_cmd_to_rq(cmd)))
+ sp->done(sp, res);
+ break;
+ default:
+ if (ret_cmd)
+ sp->done(sp, res);
+ break;
+ }
} else {
sp->done(sp, res);
}
struct sdebug_err_inject *inject;
struct scsi_device *sdev = (struct scsi_device *)file->f_inode->i_private;
- buf = kmalloc(count, GFP_KERNEL);
+ buf = kzalloc(count + 1, GFP_KERNEL);
if (!buf)
return -ENOMEM;
static int sdebug_target_alloc(struct scsi_target *starget)
{
struct sdebug_target_info *targetip;
- struct dentry *dentry;
targetip = kzalloc(sizeof(struct sdebug_target_info), GFP_KERNEL);
if (!targetip)
targetip->debugfs_entry = debugfs_create_dir(dev_name(&starget->dev),
sdebug_debugfs_root);
- if (IS_ERR_OR_NULL(targetip->debugfs_entry))
- pr_info("%s: failed to create debugfs directory for target %s\n",
- __func__, dev_name(&starget->dev));
debugfs_create_file("fail_reset", 0600, targetip->debugfs_entry, starget,
&sdebug_target_reset_fail_fops);
- if (IS_ERR_OR_NULL(dentry))
- pr_info("%s: failed to create fail_reset file for target %s\n",
- __func__, dev_name(&starget->dev));
starget->hostdata = targetip;
return disk_changed ? DISK_EVENT_MEDIA_CHANGE : 0;
}
-static int sd_sync_cache(struct scsi_disk *sdkp, struct scsi_sense_hdr *sshdr)
+static int sd_sync_cache(struct scsi_disk *sdkp)
{
int retries, res;
struct scsi_device *sdp = sdkp->device;
const int timeout = sdp->request_queue->rq_timeout
* SD_FLUSH_TIMEOUT_MULTIPLIER;
- struct scsi_sense_hdr my_sshdr;
+ struct scsi_sense_hdr sshdr;
const struct scsi_exec_args exec_args = {
.req_flags = BLK_MQ_REQ_PM,
- /* caller might not be interested in sense, but we need it */
- .sshdr = sshdr ? : &my_sshdr,
+ .sshdr = &sshdr,
};
if (!scsi_device_online(sdp))
return -ENODEV;
- sshdr = exec_args.sshdr;
-
for (retries = 3; retries > 0; --retries) {
unsigned char cmd[16] = { 0 };
return res;
if (scsi_status_is_check_condition(res) &&
- scsi_sense_valid(sshdr)) {
- sd_print_sense_hdr(sdkp, sshdr);
+ scsi_sense_valid(&sshdr)) {
+ sd_print_sense_hdr(sdkp, &sshdr);
/* we need to evaluate the error return */
- if (sshdr->asc == 0x3a || /* medium not present */
- sshdr->asc == 0x20 || /* invalid command */
- (sshdr->asc == 0x74 && sshdr->ascq == 0x71)) /* drive is password locked */
+ if (sshdr.asc == 0x3a || /* medium not present */
+ sshdr.asc == 0x20 || /* invalid command */
+ (sshdr.asc == 0x74 && sshdr.ascq == 0x71)) /* drive is password locked */
/* this is no error here */
return 0;
+ /*
+ * This drive doesn't support sync and there's not much
+ * we can do because this is called during shutdown
+ * or suspend so just return success so those operations
+ * can proceed.
+ */
+ if (sshdr.sense_key == ILLEGAL_REQUEST)
+ return 0;
}
switch (host_byte(res)) {
if (sdkp->WCE && sdkp->media_present) {
sd_printk(KERN_NOTICE, sdkp, "Synchronizing SCSI cache\n");
- sd_sync_cache(sdkp, NULL);
+ sd_sync_cache(sdkp);
}
if ((system_state != SYSTEM_RESTART &&
static int sd_suspend_common(struct device *dev, bool runtime)
{
struct scsi_disk *sdkp = dev_get_drvdata(dev);
- struct scsi_sense_hdr sshdr;
int ret = 0;
if (!sdkp) /* E.g.: runtime suspend following sd_remove() */
if (sdkp->WCE && sdkp->media_present) {
if (!sdkp->device->silence_suspend)
sd_printk(KERN_NOTICE, sdkp, "Synchronizing SCSI cache\n");
- ret = sd_sync_cache(sdkp, &sshdr);
-
- if (ret) {
- /* ignore OFFLINE device */
- if (ret == -ENODEV)
- return 0;
-
- if (!scsi_sense_valid(&sshdr) ||
- sshdr.sense_key != ILLEGAL_REQUEST)
- return ret;
+ ret = sd_sync_cache(sdkp);
+ /* ignore OFFLINE device */
+ if (ret == -ENODEV)
+ return 0;
- /*
- * sshdr.sense_key == ILLEGAL_REQUEST means this drive
- * doesn't support sync. There's not much to do and
- * suspend shouldn't fail.
- */
- ret = 0;
- }
+ if (ret)
+ return ret;
}
if (sd_do_start_stop(sdkp->device, runtime)) {
for (i = 0; i < hba->nr_hw_queues; i++) {
hwq = &hba->uhq[i];
- hwq->max_entries = hba->nutrs;
+ hwq->max_entries = hba->nutrs + 1;
spin_lock_init(&hwq->sq_lock);
spin_lock_init(&hwq->cq_lock);
mutex_init(&hwq->sq_mutex);
int tag = scsi_cmd_to_rq(cmd)->tag;
struct ufshcd_lrb *lrbp = &hba->lrb[tag];
struct ufs_hw_queue *hwq;
+ unsigned long flags;
int err = FAILED;
if (!ufshcd_cmd_inflight(lrbp->cmd)) {
}
err = SUCCESS;
+ spin_lock_irqsave(&hwq->cq_lock, flags);
if (ufshcd_cmd_inflight(lrbp->cmd))
ufshcd_release_scsi_cmd(hba, lrbp);
+ spin_unlock_irqrestore(&hwq->cq_lock, flags);
out:
return err;
if (blk->shared_backend) {
blk->buffer = shared_buffer;
} else {
- blk->buffer = kvmalloc(VDPASIM_BLK_CAPACITY << SECTOR_SHIFT,
+ blk->buffer = kvzalloc(VDPASIM_BLK_CAPACITY << SECTOR_SHIFT,
GFP_KERNEL);
if (!blk->buffer) {
ret = -ENOMEM;
goto parent_err;
if (shared_backend) {
- shared_buffer = kvmalloc(VDPASIM_BLK_CAPACITY << SECTOR_SHIFT,
+ shared_buffer = kvzalloc(VDPASIM_BLK_CAPACITY << SECTOR_SHIFT,
GFP_KERNEL);
if (!shared_buffer) {
ret = -ENOMEM;
err:
put_device(&v->dev);
- ida_simple_remove(&vhost_vdpa_ida, v->minor);
return r;
}
if (v != VIRTIO_MSI_NO_VECTOR) {
int irq = pci_irq_vector(vp_dev->pci_dev, v);
- irq_set_affinity_hint(irq, NULL);
+ irq_update_affinity_hint(irq, NULL);
free_irq(irq, vq);
}
}
mask = vp_dev->msix_affinity_masks[info->msix_vector];
irq = pci_irq_vector(vp_dev->pci_dev, info->msix_vector);
if (!cpu_mask)
- irq_set_affinity_hint(irq, NULL);
+ irq_update_affinity_hint(irq, NULL);
else {
cpumask_copy(mask, cpu_mask);
- irq_set_affinity_hint(irq, mask);
+ irq_set_affinity_and_hint(irq, mask);
}
}
return 0;
err = -EINVAL;
mdev->common = vp_modern_map_capability(mdev, common,
- sizeof(struct virtio_pci_common_cfg), 4,
- 0, sizeof(struct virtio_pci_modern_common_cfg),
- &mdev->common_len, NULL);
+ sizeof(struct virtio_pci_common_cfg), 4, 0,
+ offsetofend(struct virtio_pci_modern_common_cfg,
+ queue_reset),
+ &mdev->common_len, NULL);
if (!mdev->common)
goto err_map_common;
mdev->isr = vp_modern_map_capability(mdev, isr, sizeof(u8), 1,
int i;
struct shared_info *s = HYPERVISOR_shared_info;
struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
+ evtchn_port_t evtchn;
/* Timer interrupt has highest priority. */
- irq = irq_from_virq(cpu, VIRQ_TIMER);
+ irq = irq_evtchn_from_virq(cpu, VIRQ_TIMER, &evtchn);
if (irq != -1) {
- evtchn_port_t evtchn = evtchn_from_irq(irq);
word_idx = evtchn / BITS_PER_LONG;
bit_idx = evtchn % BITS_PER_LONG;
if (active_evtchns(cpu, s, word_idx) & (1ULL << bit_idx))
for (i = 0; i < EVTCHN_2L_NR_CHANNELS; i++) {
if (sync_test_bit(i, BM(sh->evtchn_pending))) {
int word_idx = i / BITS_PER_EVTCHN_WORD;
- printk(" %d: event %d -> irq %d%s%s%s\n",
+ printk(" %d: event %d -> irq %u%s%s%s\n",
cpu_from_evtchn(i), i,
- get_evtchn_to_irq(i),
+ irq_from_evtchn(i),
sync_test_bit(word_idx, BM(&v->evtchn_pending_sel))
? "" : " l2-clear",
!sync_test_bit(i, BM(sh->evtchn_mask))
/* IRQ <-> IPI mapping */
static DEFINE_PER_CPU(int [XEN_NR_IPIS], ipi_to_irq) = {[0 ... XEN_NR_IPIS-1] = -1};
+/* Cache for IPI event channels - needed for hot cpu unplug (avoid RCU usage). */
+static DEFINE_PER_CPU(evtchn_port_t [XEN_NR_IPIS], ipi_to_evtchn) = {[0 ... XEN_NR_IPIS-1] = 0};
/* Event channel distribution data */
static atomic_t channels_on_cpu[NR_CPUS];
#ifdef CONFIG_X86
static unsigned long *pirq_eoi_map;
#endif
-static bool (*pirq_needs_eoi)(unsigned irq);
+static bool (*pirq_needs_eoi)(struct irq_info *info);
#define EVTCHN_ROW(e) (e / (PAGE_SIZE/sizeof(**evtchn_to_irq)))
#define EVTCHN_COL(e) (e % (PAGE_SIZE/sizeof(**evtchn_to_irq)))
static struct irq_chip xen_percpu_chip;
static struct irq_chip xen_pirq_chip;
static void enable_dynirq(struct irq_data *data);
-static void disable_dynirq(struct irq_data *data);
static DEFINE_PER_CPU(unsigned int, irq_epoch);
return 0;
}
-int get_evtchn_to_irq(evtchn_port_t evtchn)
-{
- if (evtchn >= xen_evtchn_max_channels())
- return -1;
- if (evtchn_to_irq[EVTCHN_ROW(evtchn)] == NULL)
- return -1;
- return READ_ONCE(evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)]);
-}
-
/* Get info for IRQ */
static struct irq_info *info_for_irq(unsigned irq)
{
irq_set_chip_data(irq, info);
}
+static struct irq_info *evtchn_to_info(evtchn_port_t evtchn)
+{
+ int irq;
+
+ if (evtchn >= xen_evtchn_max_channels())
+ return NULL;
+ if (evtchn_to_irq[EVTCHN_ROW(evtchn)] == NULL)
+ return NULL;
+ irq = READ_ONCE(evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)]);
+
+ return (irq < 0) ? NULL : info_for_irq(irq);
+}
+
/* Per CPU channel accounting */
static void channels_on_cpu_dec(struct irq_info *info)
{
info->is_accounted = 1;
}
+static void xen_irq_free_desc(unsigned int irq)
+{
+ /* Legacy IRQ descriptors are managed by the arch. */
+ if (irq >= nr_legacy_irqs())
+ irq_free_desc(irq);
+}
+
static void delayed_free_irq(struct work_struct *work)
{
struct irq_info *info = container_of(to_rcu_work(work), struct irq_info,
kfree(info);
- /* Legacy IRQ descriptors are managed by the arch. */
- if (irq >= nr_legacy_irqs())
- irq_free_desc(irq);
+ xen_irq_free_desc(irq);
}
/* Constructors for packed IRQ information. */
static int xen_irq_info_common_setup(struct irq_info *info,
- unsigned irq,
enum xen_irq_type type,
evtchn_port_t evtchn,
unsigned short cpu)
BUG_ON(info->type != IRQT_UNBOUND && info->type != type);
info->type = type;
- info->irq = irq;
info->evtchn = evtchn;
info->cpu = cpu;
info->mask_reason = EVT_MASK_REASON_EXPLICIT;
raw_spin_lock_init(&info->lock);
- ret = set_evtchn_to_irq(evtchn, irq);
+ ret = set_evtchn_to_irq(evtchn, info->irq);
if (ret < 0)
return ret;
- irq_clear_status_flags(irq, IRQ_NOREQUEST|IRQ_NOAUTOEN);
+ irq_clear_status_flags(info->irq, IRQ_NOREQUEST | IRQ_NOAUTOEN);
return xen_evtchn_port_setup(evtchn);
}
-static int xen_irq_info_evtchn_setup(unsigned irq,
+static int xen_irq_info_evtchn_setup(struct irq_info *info,
evtchn_port_t evtchn,
struct xenbus_device *dev)
{
- struct irq_info *info = info_for_irq(irq);
int ret;
- ret = xen_irq_info_common_setup(info, irq, IRQT_EVTCHN, evtchn, 0);
+ ret = xen_irq_info_common_setup(info, IRQT_EVTCHN, evtchn, 0);
info->u.interdomain = dev;
if (dev)
atomic_inc(&dev->event_channels);
return ret;
}
-static int xen_irq_info_ipi_setup(unsigned cpu,
- unsigned irq,
- evtchn_port_t evtchn,
- enum ipi_vector ipi)
+static int xen_irq_info_ipi_setup(struct irq_info *info, unsigned int cpu,
+ evtchn_port_t evtchn, enum ipi_vector ipi)
{
- struct irq_info *info = info_for_irq(irq);
-
info->u.ipi = ipi;
- per_cpu(ipi_to_irq, cpu)[ipi] = irq;
+ per_cpu(ipi_to_irq, cpu)[ipi] = info->irq;
+ per_cpu(ipi_to_evtchn, cpu)[ipi] = evtchn;
- return xen_irq_info_common_setup(info, irq, IRQT_IPI, evtchn, 0);
+ return xen_irq_info_common_setup(info, IRQT_IPI, evtchn, 0);
}
-static int xen_irq_info_virq_setup(unsigned cpu,
- unsigned irq,
- evtchn_port_t evtchn,
- unsigned virq)
+static int xen_irq_info_virq_setup(struct irq_info *info, unsigned int cpu,
+ evtchn_port_t evtchn, unsigned int virq)
{
- struct irq_info *info = info_for_irq(irq);
-
info->u.virq = virq;
- per_cpu(virq_to_irq, cpu)[virq] = irq;
+ per_cpu(virq_to_irq, cpu)[virq] = info->irq;
- return xen_irq_info_common_setup(info, irq, IRQT_VIRQ, evtchn, 0);
+ return xen_irq_info_common_setup(info, IRQT_VIRQ, evtchn, 0);
}
-static int xen_irq_info_pirq_setup(unsigned irq,
- evtchn_port_t evtchn,
- unsigned pirq,
- unsigned gsi,
- uint16_t domid,
- unsigned char flags)
+static int xen_irq_info_pirq_setup(struct irq_info *info, evtchn_port_t evtchn,
+ unsigned int pirq, unsigned int gsi,
+ uint16_t domid, unsigned char flags)
{
- struct irq_info *info = info_for_irq(irq);
-
info->u.pirq.pirq = pirq;
info->u.pirq.gsi = gsi;
info->u.pirq.domid = domid;
info->u.pirq.flags = flags;
- return xen_irq_info_common_setup(info, irq, IRQT_PIRQ, evtchn, 0);
+ return xen_irq_info_common_setup(info, IRQT_PIRQ, evtchn, 0);
}
static void xen_irq_info_cleanup(struct irq_info *info)
/*
* Accessors for packed IRQ information.
*/
-evtchn_port_t evtchn_from_irq(unsigned irq)
+static evtchn_port_t evtchn_from_irq(unsigned int irq)
{
const struct irq_info *info = NULL;
unsigned int irq_from_evtchn(evtchn_port_t evtchn)
{
- return get_evtchn_to_irq(evtchn);
+ struct irq_info *info = evtchn_to_info(evtchn);
+
+ return info ? info->irq : -1;
}
EXPORT_SYMBOL_GPL(irq_from_evtchn);
-int irq_from_virq(unsigned int cpu, unsigned int virq)
+int irq_evtchn_from_virq(unsigned int cpu, unsigned int virq,
+ evtchn_port_t *evtchn)
{
- return per_cpu(virq_to_irq, cpu)[virq];
+ int irq = per_cpu(virq_to_irq, cpu)[virq];
+
+ *evtchn = evtchn_from_irq(irq);
+
+ return irq;
}
-static enum ipi_vector ipi_from_irq(unsigned irq)
+static enum ipi_vector ipi_from_irq(struct irq_info *info)
{
- struct irq_info *info = info_for_irq(irq);
-
BUG_ON(info == NULL);
BUG_ON(info->type != IRQT_IPI);
return info->u.ipi;
}
-static unsigned virq_from_irq(unsigned irq)
+static unsigned int virq_from_irq(struct irq_info *info)
{
- struct irq_info *info = info_for_irq(irq);
-
BUG_ON(info == NULL);
BUG_ON(info->type != IRQT_VIRQ);
return info->u.virq;
}
-static unsigned pirq_from_irq(unsigned irq)
+static unsigned int pirq_from_irq(struct irq_info *info)
{
- struct irq_info *info = info_for_irq(irq);
-
BUG_ON(info == NULL);
BUG_ON(info->type != IRQT_PIRQ);
return info->u.pirq.pirq;
}
-static enum xen_irq_type type_from_irq(unsigned irq)
-{
- return info_for_irq(irq)->type;
-}
-
-static unsigned cpu_from_irq(unsigned irq)
-{
- return info_for_irq(irq)->cpu;
-}
-
unsigned int cpu_from_evtchn(evtchn_port_t evtchn)
{
- int irq = get_evtchn_to_irq(evtchn);
- unsigned ret = 0;
-
- if (irq != -1)
- ret = cpu_from_irq(irq);
+ struct irq_info *info = evtchn_to_info(evtchn);
- return ret;
+ return info ? info->cpu : 0;
}
static void do_mask(struct irq_info *info, u8 reason)
}
#ifdef CONFIG_X86
-static bool pirq_check_eoi_map(unsigned irq)
+static bool pirq_check_eoi_map(struct irq_info *info)
{
- return test_bit(pirq_from_irq(irq), pirq_eoi_map);
+ return test_bit(pirq_from_irq(info), pirq_eoi_map);
}
#endif
-static bool pirq_needs_eoi_flag(unsigned irq)
+static bool pirq_needs_eoi_flag(struct irq_info *info)
{
- struct irq_info *info = info_for_irq(irq);
BUG_ON(info->type != IRQT_PIRQ);
return info->u.pirq.flags & PIRQ_NEEDS_EOI;
}
-static void bind_evtchn_to_cpu(evtchn_port_t evtchn, unsigned int cpu,
+static void bind_evtchn_to_cpu(struct irq_info *info, unsigned int cpu,
bool force_affinity)
{
- int irq = get_evtchn_to_irq(evtchn);
- struct irq_info *info = info_for_irq(irq);
-
- BUG_ON(irq == -1);
-
if (IS_ENABLED(CONFIG_SMP) && force_affinity) {
- struct irq_data *data = irq_get_irq_data(irq);
+ struct irq_data *data = irq_get_irq_data(info->irq);
irq_data_update_affinity(data, cpumask_of(cpu));
irq_data_update_effective_affinity(data, cpumask_of(cpu));
}
- xen_evtchn_port_bind_to_cpu(evtchn, cpu, info->cpu);
+ xen_evtchn_port_bind_to_cpu(info->evtchn, cpu, info->cpu);
channels_on_cpu_dec(info);
info->cpu = cpu;
spin_lock_irqsave(&eoi->eoi_list_lock, flags);
- if (list_empty(&eoi->eoi_list)) {
+ elem = list_first_entry_or_null(&eoi->eoi_list, struct irq_info,
+ eoi_list);
+ if (!elem || info->eoi_time < elem->eoi_time) {
list_add(&info->eoi_list, &eoi->eoi_list);
mod_delayed_work_on(info->eoi_cpu, system_wq,
&eoi->delayed, delay);
}
EXPORT_SYMBOL_GPL(xen_irq_lateeoi);
-static void xen_irq_init(unsigned irq)
+static struct irq_info *xen_irq_init(unsigned int irq)
{
struct irq_info *info;
info = kzalloc(sizeof(*info), GFP_KERNEL);
- if (info == NULL)
- panic("Unable to allocate metadata for IRQ%d\n", irq);
+ if (info) {
+ info->irq = irq;
+ info->type = IRQT_UNBOUND;
+ info->refcnt = -1;
+ INIT_RCU_WORK(&info->rwork, delayed_free_irq);
- info->type = IRQT_UNBOUND;
- info->refcnt = -1;
- INIT_RCU_WORK(&info->rwork, delayed_free_irq);
+ set_info_for_irq(irq, info);
+ /*
+ * Interrupt affinity setting can be immediate. No point
+ * in delaying it until an interrupt is handled.
+ */
+ irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
- set_info_for_irq(irq, info);
- /*
- * Interrupt affinity setting can be immediate. No point
- * in delaying it until an interrupt is handled.
- */
- irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
+ INIT_LIST_HEAD(&info->eoi_list);
+ list_add_tail(&info->list, &xen_irq_list_head);
+ }
- INIT_LIST_HEAD(&info->eoi_list);
- list_add_tail(&info->list, &xen_irq_list_head);
+ return info;
}
-static int __must_check xen_allocate_irqs_dynamic(int nvec)
+static struct irq_info *xen_allocate_irq_dynamic(void)
{
- int i, irq = irq_alloc_descs(-1, 0, nvec, -1);
+ int irq = irq_alloc_desc_from(0, -1);
+ struct irq_info *info = NULL;
if (irq >= 0) {
- for (i = 0; i < nvec; i++)
- xen_irq_init(irq + i);
+ info = xen_irq_init(irq);
+ if (!info)
+ xen_irq_free_desc(irq);
}
- return irq;
-}
-
-static inline int __must_check xen_allocate_irq_dynamic(void)
-{
-
- return xen_allocate_irqs_dynamic(1);
+ return info;
}
-static int __must_check xen_allocate_irq_gsi(unsigned gsi)
+static struct irq_info *xen_allocate_irq_gsi(unsigned int gsi)
{
int irq;
+ struct irq_info *info;
/*
* A PV guest has no concept of a GSI (since it has no ACPI
else
irq = irq_alloc_desc_at(gsi, -1);
- xen_irq_init(irq);
+ info = xen_irq_init(irq);
+ if (!info)
+ xen_irq_free_desc(irq);
- return irq;
+ return info;
}
-static void xen_free_irq(unsigned irq)
+static void xen_free_irq(struct irq_info *info)
{
- struct irq_info *info = info_for_irq(irq);
-
if (WARN_ON(!info))
return;
clear_evtchn(info->evtchn);
}
-static void pirq_query_unmask(int irq)
+static void pirq_query_unmask(struct irq_info *info)
{
struct physdev_irq_status_query irq_status;
- struct irq_info *info = info_for_irq(irq);
-
- BUG_ON(info->type != IRQT_PIRQ);
- irq_status.irq = pirq_from_irq(irq);
+ irq_status.irq = pirq_from_irq(info);
if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status))
irq_status.flags = 0;
info->u.pirq.flags |= PIRQ_NEEDS_EOI;
}
-static void eoi_pirq(struct irq_data *data)
+static void do_eoi_pirq(struct irq_info *info)
{
- struct irq_info *info = info_for_irq(data->irq);
- evtchn_port_t evtchn = info ? info->evtchn : 0;
- struct physdev_eoi eoi = { .irq = pirq_from_irq(data->irq) };
+ struct physdev_eoi eoi = { .irq = pirq_from_irq(info) };
int rc = 0;
- if (!VALID_EVTCHN(evtchn))
+ if (!VALID_EVTCHN(info->evtchn))
return;
event_handler_exit(info);
- if (pirq_needs_eoi(data->irq)) {
+ if (pirq_needs_eoi(info)) {
rc = HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi);
WARN_ON(rc);
}
}
+static void eoi_pirq(struct irq_data *data)
+{
+ struct irq_info *info = info_for_irq(data->irq);
+
+ do_eoi_pirq(info);
+}
+
+static void do_disable_dynirq(struct irq_info *info)
+{
+ if (VALID_EVTCHN(info->evtchn))
+ do_mask(info, EVT_MASK_REASON_EXPLICIT);
+}
+
+static void disable_dynirq(struct irq_data *data)
+{
+ struct irq_info *info = info_for_irq(data->irq);
+
+ if (info)
+ do_disable_dynirq(info);
+}
+
static void mask_ack_pirq(struct irq_data *data)
{
- disable_dynirq(data);
- eoi_pirq(data);
+ struct irq_info *info = info_for_irq(data->irq);
+
+ if (info) {
+ do_disable_dynirq(info);
+ do_eoi_pirq(info);
+ }
}
-static unsigned int __startup_pirq(unsigned int irq)
+static unsigned int __startup_pirq(struct irq_info *info)
{
struct evtchn_bind_pirq bind_pirq;
- struct irq_info *info = info_for_irq(irq);
- evtchn_port_t evtchn = evtchn_from_irq(irq);
+ evtchn_port_t evtchn = info->evtchn;
int rc;
- BUG_ON(info->type != IRQT_PIRQ);
-
if (VALID_EVTCHN(evtchn))
goto out;
- bind_pirq.pirq = pirq_from_irq(irq);
+ bind_pirq.pirq = pirq_from_irq(info);
/* NB. We are happy to share unless we are probing. */
bind_pirq.flags = info->u.pirq.flags & PIRQ_SHAREABLE ?
BIND_PIRQ__WILL_SHARE : 0;
rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_pirq, &bind_pirq);
if (rc != 0) {
- pr_warn("Failed to obtain physical IRQ %d\n", irq);
+ pr_warn("Failed to obtain physical IRQ %d\n", info->irq);
return 0;
}
evtchn = bind_pirq.port;
- pirq_query_unmask(irq);
+ pirq_query_unmask(info);
- rc = set_evtchn_to_irq(evtchn, irq);
+ rc = set_evtchn_to_irq(evtchn, info->irq);
if (rc)
goto err;
info->evtchn = evtchn;
- bind_evtchn_to_cpu(evtchn, 0, false);
+ bind_evtchn_to_cpu(info, 0, false);
rc = xen_evtchn_port_setup(evtchn);
if (rc)
out:
do_unmask(info, EVT_MASK_REASON_EXPLICIT);
- eoi_pirq(irq_get_irq_data(irq));
+ do_eoi_pirq(info);
return 0;
err:
- pr_err("irq%d: Failed to set port to irq mapping (%d)\n", irq, rc);
+ pr_err("irq%d: Failed to set port to irq mapping (%d)\n", info->irq,
+ rc);
xen_evtchn_close(evtchn);
return 0;
}
static unsigned int startup_pirq(struct irq_data *data)
{
- return __startup_pirq(data->irq);
+ struct irq_info *info = info_for_irq(data->irq);
+
+ return __startup_pirq(info);
}
static void shutdown_pirq(struct irq_data *data)
{
- unsigned int irq = data->irq;
- struct irq_info *info = info_for_irq(irq);
- evtchn_port_t evtchn = evtchn_from_irq(irq);
+ struct irq_info *info = info_for_irq(data->irq);
+ evtchn_port_t evtchn = info->evtchn;
BUG_ON(info->type != IRQT_PIRQ);
}
EXPORT_SYMBOL_GPL(xen_irq_from_gsi);
-static void __unbind_from_irq(unsigned int irq)
+static void __unbind_from_irq(struct irq_info *info, unsigned int irq)
{
- evtchn_port_t evtchn = evtchn_from_irq(irq);
- struct irq_info *info = info_for_irq(irq);
+ evtchn_port_t evtchn;
+
+ if (!info) {
+ xen_irq_free_desc(irq);
+ return;
+ }
if (info->refcnt > 0) {
info->refcnt--;
return;
}
+ evtchn = info->evtchn;
+
if (VALID_EVTCHN(evtchn)) {
- unsigned int cpu = cpu_from_irq(irq);
+ unsigned int cpu = info->cpu;
struct xenbus_device *dev;
if (!info->is_static)
xen_evtchn_close(evtchn);
- switch (type_from_irq(irq)) {
+ switch (info->type) {
case IRQT_VIRQ:
- per_cpu(virq_to_irq, cpu)[virq_from_irq(irq)] = -1;
+ per_cpu(virq_to_irq, cpu)[virq_from_irq(info)] = -1;
break;
case IRQT_IPI:
- per_cpu(ipi_to_irq, cpu)[ipi_from_irq(irq)] = -1;
+ per_cpu(ipi_to_irq, cpu)[ipi_from_irq(info)] = -1;
+ per_cpu(ipi_to_evtchn, cpu)[ipi_from_irq(info)] = 0;
break;
case IRQT_EVTCHN:
dev = info->u.interdomain;
xen_irq_info_cleanup(info);
}
- xen_free_irq(irq);
+ xen_free_irq(info);
}
/*
int xen_bind_pirq_gsi_to_irq(unsigned gsi,
unsigned pirq, int shareable, char *name)
{
- int irq;
+ struct irq_info *info;
struct physdev_irq irq_op;
int ret;
mutex_lock(&irq_mapping_update_lock);
- irq = xen_irq_from_gsi(gsi);
- if (irq != -1) {
+ ret = xen_irq_from_gsi(gsi);
+ if (ret != -1) {
pr_info("%s: returning irq %d for gsi %u\n",
- __func__, irq, gsi);
+ __func__, ret, gsi);
goto out;
}
- irq = xen_allocate_irq_gsi(gsi);
- if (irq < 0)
+ info = xen_allocate_irq_gsi(gsi);
+ if (!info)
goto out;
- irq_op.irq = irq;
+ irq_op.irq = info->irq;
irq_op.vector = 0;
/* Only the privileged domain can do this. For non-priv, the pcifront
* this in the priv domain. */
if (xen_initial_domain() &&
HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) {
- xen_free_irq(irq);
- irq = -ENOSPC;
+ xen_free_irq(info);
+ ret = -ENOSPC;
goto out;
}
- ret = xen_irq_info_pirq_setup(irq, 0, pirq, gsi, DOMID_SELF,
+ ret = xen_irq_info_pirq_setup(info, 0, pirq, gsi, DOMID_SELF,
shareable ? PIRQ_SHAREABLE : 0);
if (ret < 0) {
- __unbind_from_irq(irq);
- irq = ret;
+ __unbind_from_irq(info, info->irq);
goto out;
}
- pirq_query_unmask(irq);
+ pirq_query_unmask(info);
/* We try to use the handler with the appropriate semantic for the
* type of interrupt: if the interrupt is an edge triggered
* interrupt we use handle_edge_irq.
* is the right choice either way.
*/
if (shareable)
- irq_set_chip_and_handler_name(irq, &xen_pirq_chip,
+ irq_set_chip_and_handler_name(info->irq, &xen_pirq_chip,
handle_fasteoi_irq, name);
else
- irq_set_chip_and_handler_name(irq, &xen_pirq_chip,
+ irq_set_chip_and_handler_name(info->irq, &xen_pirq_chip,
handle_edge_irq, name);
+ ret = info->irq;
+
out:
mutex_unlock(&irq_mapping_update_lock);
- return irq;
+ return ret;
}
#ifdef CONFIG_PCI_MSI
int pirq, int nvec, const char *name, domid_t domid)
{
int i, irq, ret;
+ struct irq_info *info;
mutex_lock(&irq_mapping_update_lock);
- irq = xen_allocate_irqs_dynamic(nvec);
+ irq = irq_alloc_descs(-1, 0, nvec, -1);
if (irq < 0)
goto out;
for (i = 0; i < nvec; i++) {
+ info = xen_irq_init(irq + i);
+ if (!info)
+ goto error_irq;
+
irq_set_chip_and_handler_name(irq + i, &xen_pirq_chip, handle_edge_irq, name);
- ret = xen_irq_info_pirq_setup(irq + i, 0, pirq + i, 0, domid,
+ ret = xen_irq_info_pirq_setup(info, 0, pirq + i, 0, domid,
i == 0 ? 0 : PIRQ_MSI_GROUP);
if (ret < 0)
goto error_irq;
out:
mutex_unlock(&irq_mapping_update_lock);
return irq;
+
error_irq:
- while (nvec--)
- __unbind_from_irq(irq + nvec);
+ while (nvec--) {
+ info = info_for_irq(irq + nvec);
+ __unbind_from_irq(info, irq + nvec);
+ }
mutex_unlock(&irq_mapping_update_lock);
return ret;
}
}
}
- xen_free_irq(irq);
+ xen_free_irq(info);
out:
mutex_unlock(&irq_mapping_update_lock);
return rc;
}
-int xen_irq_from_pirq(unsigned pirq)
-{
- int irq;
-
- struct irq_info *info;
-
- mutex_lock(&irq_mapping_update_lock);
-
- list_for_each_entry(info, &xen_irq_list_head, list) {
- if (info->type != IRQT_PIRQ)
- continue;
- irq = info->irq;
- if (info->u.pirq.pirq == pirq)
- goto out;
- }
- irq = -1;
-out:
- mutex_unlock(&irq_mapping_update_lock);
-
- return irq;
-}
-
-
int xen_pirq_from_irq(unsigned irq)
{
- return pirq_from_irq(irq);
+ struct irq_info *info = info_for_irq(irq);
+
+ return pirq_from_irq(info);
}
EXPORT_SYMBOL_GPL(xen_pirq_from_irq);
static int bind_evtchn_to_irq_chip(evtchn_port_t evtchn, struct irq_chip *chip,
struct xenbus_device *dev)
{
- int irq;
- int ret;
+ int ret = -ENOMEM;
+ struct irq_info *info;
if (evtchn >= xen_evtchn_max_channels())
return -ENOMEM;
mutex_lock(&irq_mapping_update_lock);
- irq = get_evtchn_to_irq(evtchn);
+ info = evtchn_to_info(evtchn);
- if (irq == -1) {
- irq = xen_allocate_irq_dynamic();
- if (irq < 0)
+ if (!info) {
+ info = xen_allocate_irq_dynamic();
+ if (!info)
goto out;
- irq_set_chip_and_handler_name(irq, chip,
+ irq_set_chip_and_handler_name(info->irq, chip,
handle_edge_irq, "event");
- ret = xen_irq_info_evtchn_setup(irq, evtchn, dev);
+ ret = xen_irq_info_evtchn_setup(info, evtchn, dev);
if (ret < 0) {
- __unbind_from_irq(irq);
- irq = ret;
+ __unbind_from_irq(info, info->irq);
goto out;
}
/*
* affinity setting is not invoked on them so nothing would
* bind the channel.
*/
- bind_evtchn_to_cpu(evtchn, 0, false);
- } else {
- struct irq_info *info = info_for_irq(irq);
- if (!WARN_ON(!info || info->type != IRQT_EVTCHN))
- info->refcnt++;
+ bind_evtchn_to_cpu(info, 0, false);
+ } else if (!WARN_ON(info->type != IRQT_EVTCHN)) {
+ info->refcnt++;
}
+ ret = info->irq;
+
out:
mutex_unlock(&irq_mapping_update_lock);
- return irq;
+ return ret;
}
int bind_evtchn_to_irq(evtchn_port_t evtchn)
{
struct evtchn_bind_ipi bind_ipi;
evtchn_port_t evtchn;
- int ret, irq;
+ struct irq_info *info;
+ int ret;
mutex_lock(&irq_mapping_update_lock);
- irq = per_cpu(ipi_to_irq, cpu)[ipi];
+ ret = per_cpu(ipi_to_irq, cpu)[ipi];
- if (irq == -1) {
- irq = xen_allocate_irq_dynamic();
- if (irq < 0)
+ if (ret == -1) {
+ info = xen_allocate_irq_dynamic();
+ if (!info)
goto out;
- irq_set_chip_and_handler_name(irq, &xen_percpu_chip,
+ irq_set_chip_and_handler_name(info->irq, &xen_percpu_chip,
handle_percpu_irq, "ipi");
bind_ipi.vcpu = xen_vcpu_nr(cpu);
BUG();
evtchn = bind_ipi.port;
- ret = xen_irq_info_ipi_setup(cpu, irq, evtchn, ipi);
+ ret = xen_irq_info_ipi_setup(info, cpu, evtchn, ipi);
if (ret < 0) {
- __unbind_from_irq(irq);
- irq = ret;
+ __unbind_from_irq(info, info->irq);
goto out;
}
/*
* Force the affinity mask to the target CPU so proc shows
* the correct target.
*/
- bind_evtchn_to_cpu(evtchn, cpu, true);
+ bind_evtchn_to_cpu(info, cpu, true);
+ ret = info->irq;
} else {
- struct irq_info *info = info_for_irq(irq);
+ info = info_for_irq(ret);
WARN_ON(info == NULL || info->type != IRQT_IPI);
}
out:
mutex_unlock(&irq_mapping_update_lock);
- return irq;
+ return ret;
}
static int bind_interdomain_evtchn_to_irq_chip(struct xenbus_device *dev,
{
struct evtchn_bind_virq bind_virq;
evtchn_port_t evtchn = 0;
- int irq, ret;
+ struct irq_info *info;
+ int ret;
mutex_lock(&irq_mapping_update_lock);
- irq = per_cpu(virq_to_irq, cpu)[virq];
+ ret = per_cpu(virq_to_irq, cpu)[virq];
- if (irq == -1) {
- irq = xen_allocate_irq_dynamic();
- if (irq < 0)
+ if (ret == -1) {
+ info = xen_allocate_irq_dynamic();
+ if (!info)
goto out;
if (percpu)
- irq_set_chip_and_handler_name(irq, &xen_percpu_chip,
+ irq_set_chip_and_handler_name(info->irq, &xen_percpu_chip,
handle_percpu_irq, "virq");
else
- irq_set_chip_and_handler_name(irq, &xen_dynamic_chip,
+ irq_set_chip_and_handler_name(info->irq, &xen_dynamic_chip,
handle_edge_irq, "virq");
bind_virq.virq = virq;
BUG_ON(ret < 0);
}
- ret = xen_irq_info_virq_setup(cpu, irq, evtchn, virq);
+ ret = xen_irq_info_virq_setup(info, cpu, evtchn, virq);
if (ret < 0) {
- __unbind_from_irq(irq);
- irq = ret;
+ __unbind_from_irq(info, info->irq);
goto out;
}
* Force the affinity mask for percpu interrupts so proc
* shows the correct target.
*/
- bind_evtchn_to_cpu(evtchn, cpu, percpu);
+ bind_evtchn_to_cpu(info, cpu, percpu);
+ ret = info->irq;
} else {
- struct irq_info *info = info_for_irq(irq);
+ info = info_for_irq(ret);
WARN_ON(info == NULL || info->type != IRQT_VIRQ);
}
out:
mutex_unlock(&irq_mapping_update_lock);
- return irq;
+ return ret;
}
static void unbind_from_irq(unsigned int irq)
{
+ struct irq_info *info;
+
mutex_lock(&irq_mapping_update_lock);
- __unbind_from_irq(irq);
+ info = info_for_irq(irq);
+ __unbind_from_irq(info, irq);
mutex_unlock(&irq_mapping_update_lock);
}
int evtchn_make_refcounted(evtchn_port_t evtchn, bool is_static)
{
- int irq = get_evtchn_to_irq(evtchn);
- struct irq_info *info;
-
- if (irq == -1)
- return -ENOENT;
-
- info = info_for_irq(irq);
+ struct irq_info *info = evtchn_to_info(evtchn);
if (!info)
return -ENOENT;
int evtchn_get(evtchn_port_t evtchn)
{
- int irq;
struct irq_info *info;
int err = -ENOENT;
mutex_lock(&irq_mapping_update_lock);
- irq = get_evtchn_to_irq(evtchn);
- if (irq == -1)
- goto done;
-
- info = info_for_irq(irq);
+ info = evtchn_to_info(evtchn);
if (!info)
goto done;
void evtchn_put(evtchn_port_t evtchn)
{
- int irq = get_evtchn_to_irq(evtchn);
- if (WARN_ON(irq == -1))
+ struct irq_info *info = evtchn_to_info(evtchn);
+
+ if (WARN_ON(!info))
return;
- unbind_from_irq(irq);
+ unbind_from_irq(info->irq);
}
EXPORT_SYMBOL_GPL(evtchn_put);
void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector)
{
- int irq;
+ evtchn_port_t evtchn;
#ifdef CONFIG_X86
if (unlikely(vector == XEN_NMI_VECTOR)) {
return;
}
#endif
- irq = per_cpu(ipi_to_irq, cpu)[vector];
- BUG_ON(irq < 0);
- notify_remote_via_irq(irq);
+ evtchn = per_cpu(ipi_to_evtchn, cpu)[vector];
+ BUG_ON(evtchn == 0);
+ notify_remote_via_evtchn(evtchn);
}
struct evtchn_loop_ctrl {
void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl)
{
- int irq;
- struct irq_info *info;
+ struct irq_info *info = evtchn_to_info(port);
struct xenbus_device *dev;
- irq = get_evtchn_to_irq(port);
- if (irq == -1)
+ if (!info)
return;
/*
}
}
- info = info_for_irq(irq);
if (xchg_acquire(&info->is_active, 1))
return;
info->eoi_time = get_jiffies_64() + event_eoi_delay;
}
- generic_handle_irq(irq);
+ generic_handle_irq(info->irq);
}
int xen_evtchn_do_upcall(void)
mutex_lock(&irq_mapping_update_lock);
/* After resume the irq<->evtchn mappings are all cleared out */
- BUG_ON(get_evtchn_to_irq(evtchn) != -1);
+ BUG_ON(evtchn_to_info(evtchn));
/* Expect irq to have been bound before,
so there should be a proper type */
BUG_ON(info->type == IRQT_UNBOUND);
- (void)xen_irq_info_evtchn_setup(irq, evtchn, NULL);
+ info->irq = irq;
+ (void)xen_irq_info_evtchn_setup(info, evtchn, NULL);
mutex_unlock(&irq_mapping_update_lock);
- bind_evtchn_to_cpu(evtchn, info->cpu, false);
+ bind_evtchn_to_cpu(info, info->cpu, false);
/* Unmask the event channel. */
enable_irq(irq);
* it, but don't do the xenlinux-level rebind in that case.
*/
if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0)
- bind_evtchn_to_cpu(evtchn, tcpu, false);
+ bind_evtchn_to_cpu(info, tcpu, false);
do_unmask(info, EVT_MASK_REASON_TEMPORARY);
do_unmask(info, EVT_MASK_REASON_EXPLICIT);
}
-static void disable_dynirq(struct irq_data *data)
+static void do_ack_dynirq(struct irq_info *info)
{
- struct irq_info *info = info_for_irq(data->irq);
- evtchn_port_t evtchn = info ? info->evtchn : 0;
+ evtchn_port_t evtchn = info->evtchn;
if (VALID_EVTCHN(evtchn))
- do_mask(info, EVT_MASK_REASON_EXPLICIT);
+ event_handler_exit(info);
}
static void ack_dynirq(struct irq_data *data)
{
struct irq_info *info = info_for_irq(data->irq);
- evtchn_port_t evtchn = info ? info->evtchn : 0;
- if (VALID_EVTCHN(evtchn))
- event_handler_exit(info);
+ if (info)
+ do_ack_dynirq(info);
}
static void mask_ack_dynirq(struct irq_data *data)
{
- disable_dynirq(data);
- ack_dynirq(data);
+ struct irq_info *info = info_for_irq(data->irq);
+
+ if (info) {
+ do_disable_dynirq(info);
+ do_ack_dynirq(info);
+ }
}
static void lateeoi_ack_dynirq(struct irq_data *data)
if (rc) {
pr_warn("xen map irq failed gsi=%d irq=%d pirq=%d rc=%d\n",
gsi, irq, pirq, rc);
- xen_free_irq(irq);
+ xen_free_irq(info);
continue;
}
printk(KERN_DEBUG "xen: --> irq=%d, pirq=%d\n", irq, map_irq.pirq);
- __startup_pirq(irq);
+ __startup_pirq(info);
}
}
{
struct evtchn_bind_virq bind_virq;
evtchn_port_t evtchn;
+ struct irq_info *info;
int virq, irq;
for (virq = 0; virq < NR_VIRQS; virq++) {
if ((irq = per_cpu(virq_to_irq, cpu)[virq]) == -1)
continue;
+ info = info_for_irq(irq);
- BUG_ON(virq_from_irq(irq) != virq);
+ BUG_ON(virq_from_irq(info) != virq);
/* Get a new binding from Xen. */
bind_virq.virq = virq;
evtchn = bind_virq.port;
/* Record the new mapping. */
- (void)xen_irq_info_virq_setup(cpu, irq, evtchn, virq);
+ xen_irq_info_virq_setup(info, cpu, evtchn, virq);
/* The affinity mask is still valid */
- bind_evtchn_to_cpu(evtchn, cpu, false);
+ bind_evtchn_to_cpu(info, cpu, false);
}
}
{
struct evtchn_bind_ipi bind_ipi;
evtchn_port_t evtchn;
+ struct irq_info *info;
int ipi, irq;
for (ipi = 0; ipi < XEN_NR_IPIS; ipi++) {
if ((irq = per_cpu(ipi_to_irq, cpu)[ipi]) == -1)
continue;
+ info = info_for_irq(irq);
- BUG_ON(ipi_from_irq(irq) != ipi);
+ BUG_ON(ipi_from_irq(info) != ipi);
/* Get a new binding from Xen. */
bind_ipi.vcpu = xen_vcpu_nr(cpu);
evtchn = bind_ipi.port;
/* Record the new mapping. */
- (void)xen_irq_info_ipi_setup(cpu, irq, evtchn, ipi);
+ xen_irq_info_ipi_setup(info, cpu, evtchn, ipi);
/* The affinity mask is still valid */
- bind_evtchn_to_cpu(evtchn, cpu, false);
+ bind_evtchn_to_cpu(info, cpu, false);
}
}
event_handler_exit(info);
}
EXPORT_SYMBOL(xen_clear_irq_pending);
-void xen_set_irq_pending(int irq)
-{
- evtchn_port_t evtchn = evtchn_from_irq(irq);
-
- if (VALID_EVTCHN(evtchn))
- set_evtchn(evtchn);
-}
bool xen_test_irq_pending(int irq)
{
extern const struct evtchn_ops *evtchn_ops;
-int get_evtchn_to_irq(evtchn_port_t evtchn);
void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl);
unsigned int cpu_from_evtchn(evtchn_port_t evtchn);
#include <asm/xen/hypervisor.h>
#include <asm/xen/hypercall.h>
+#ifdef CONFIG_ACPI
+#include <acpi/processor.h>
+#endif
/*
* @cpu_id: Xen physical cpu logic number
return online;
}
+
+void xen_sanitize_proc_cap_bits(uint32_t *cap)
+{
+ struct xen_platform_op op = {
+ .cmd = XENPF_set_processor_pminfo,
+ .u.set_pminfo.id = -1,
+ .u.set_pminfo.type = XEN_PM_PDC,
+ };
+ u32 buf[3] = { ACPI_PDC_REVISION_ID, 1, *cap };
+ int ret;
+
+ set_xen_guest_handle(op.u.set_pminfo.pdc, buf);
+ ret = HYPERVISOR_platform_op(&op);
+ if (ret)
+ pr_err("sanitize of _PDC buffer bits from Xen failed: %d\n",
+ ret);
+ else
+ *cap = buf[2];
+}
#endif
#include <xen/xen-front-pgdir-shbuf.h>
-/**
+/*
* This structure represents the structure of a shared page
* that contains grant references to the pages of the shared
* buffer. This structure is common to many Xen para-virtualized
grant_ref_t gref[]; /* Variable length */
};
-/**
+/*
* Shared buffer ops which are differently implemented
* depending on the allocation mode, e.g. if the buffer
* is allocated by the corresponding backend or frontend.
int (*unmap)(struct xen_front_pgdir_shbuf *buf);
};
-/**
+/*
* Get granted reference to the very first page of the
* page directory. Usually this is passed to the backend,
* so it can find/fill the grant references to the buffer's
}
EXPORT_SYMBOL_GPL(xen_front_pgdir_shbuf_get_dir_start);
-/**
+/*
* Map granted references of the shared buffer.
*
* Depending on the shared buffer mode of allocation
}
EXPORT_SYMBOL_GPL(xen_front_pgdir_shbuf_map);
-/**
+/*
* Unmap granted references of the shared buffer.
*
* Depending on the shared buffer mode of allocation
}
EXPORT_SYMBOL_GPL(xen_front_pgdir_shbuf_unmap);
-/**
+/*
* Free all the resources of the shared buffer.
*
* \param buf shared buffer which resources to be freed.
offsetof(struct xen_page_directory, \
gref)) / sizeof(grant_ref_t))
-/**
+/*
* Get the number of pages the page directory consumes itself.
*
* \param buf shared buffer.
return DIV_ROUND_UP(buf->num_pages, XEN_NUM_GREFS_PER_PAGE);
}
-/**
+/*
* Calculate the number of grant references needed to share the buffer
* and its pages when backend allocates the buffer.
*
buf->num_grefs = get_num_pages_dir(buf);
}
-/**
+/*
* Calculate the number of grant references needed to share the buffer
* and its pages when frontend allocates the buffer.
*
#define xen_page_to_vaddr(page) \
((uintptr_t)pfn_to_kaddr(page_to_xen_pfn(page)))
-/**
+/*
* Unmap the buffer previously mapped with grant references
* provided by the backend.
*
return ret;
}
-/**
+/*
* Map the buffer with grant references provided by the backend.
*
* \param buf shared buffer.
return ret;
}
-/**
+/*
* Fill page directory with grant references to the pages of the
* page directory itself.
*
page_dir->gref_dir_next_page = XEN_GREF_LIST_END;
}
-/**
+/*
* Fill page directory with grant references to the pages of the
* page directory and the buffer we share with the backend.
*
}
}
-/**
+/*
* Grant references to the frontend's buffer pages.
*
* These will be shared with the backend, so it can
return 0;
}
-/**
+/*
* Grant all the references needed to share the buffer.
*
* Grant references to the page directory pages and, if
return 0;
}
-/**
+/*
* Allocate all required structures to mange shared buffer.
*
* \param buf shared buffer.
.grant_refs_for_buffer = guest_grant_refs_for_buffer,
};
-/**
+/*
* Allocate a new instance of a shared buffer.
*
* \param cfg configuration to be used while allocating a new shared buffer.
bp.level - 1,
0);
b = bch2_btree_iter_peek_node(iter);
- if (IS_ERR(b))
+ if (IS_ERR_OR_NULL(b))
goto err;
BUG_ON(b->c.level != bp.level - 1);
- if (b && extent_matches_bp(c, bp.btree_id, bp.level,
- bkey_i_to_s_c(&b->key),
- bucket, bp))
+ if (extent_matches_bp(c, bp.btree_id, bp.level,
+ bkey_i_to_s_c(&b->key),
+ bucket, bp))
return b;
- if (b && btree_node_will_make_reachable(b)) {
+ if (btree_node_will_make_reachable(b)) {
b = ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node);
} else {
backpointer_not_found(trans, bp_pos, bp, bkey_i_to_s_c(&b->key));
u64 start;
u64 end;
bool dirty;
- } entries[0];
+ } entries[];
};
struct journal_keys {
srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx);
}
- bch2_journal_preres_put(&c->journal, &trans->journal_preres);
-
kfree(trans->extra_journal_entries.data);
if (trans->fs_usage_deltas) {
ck->btree_trans_barrier_seq =
start_poll_synchronize_srcu(&c->btree_trans_barrier);
- if (ck->c.lock.readers)
+ if (ck->c.lock.readers) {
list_move_tail(&ck->list, &bc->freed_pcpu);
- else
+ bc->nr_freed_pcpu++;
+ } else {
list_move_tail(&ck->list, &bc->freed_nonpcpu);
+ bc->nr_freed_nonpcpu++;
+ }
atomic_long_inc(&bc->nr_freed);
kfree(ck->k);
{
struct bkey_cached *pos;
+ bc->nr_freed_nonpcpu++;
+
list_for_each_entry_reverse(pos, &bc->freed_nonpcpu, list) {
if (ULONG_CMP_GE(ck->btree_trans_barrier_seq,
pos->btree_trans_barrier_seq)) {
#else
mutex_lock(&bc->lock);
list_move_tail(&ck->list, &bc->freed_nonpcpu);
+ bc->nr_freed_nonpcpu++;
mutex_unlock(&bc->lock);
#endif
} else {
f->nr < ARRAY_SIZE(f->objs) / 2) {
ck = list_last_entry(&bc->freed_nonpcpu, struct bkey_cached, list);
list_del_init(&ck->list);
+ bc->nr_freed_nonpcpu--;
f->objs[f->nr++] = ck;
}
if (!list_empty(&bc->freed_nonpcpu)) {
ck = list_last_entry(&bc->freed_nonpcpu, struct bkey_cached, list);
list_del_init(&ck->list);
+ bc->nr_freed_nonpcpu--;
}
mutex_unlock(&bc->lock);
#endif
goto out;
bch2_journal_pin_drop(j, &ck->journal);
- bch2_journal_preres_put(j, &ck->res);
BUG_ON(!btree_node_locked(c_iter.path, 0));
BUG_ON(insert->k.u64s > ck->u64s);
- if (likely(!(flags & BTREE_INSERT_JOURNAL_REPLAY))) {
- int difference;
-
- BUG_ON(jset_u64s(insert->k.u64s) > trans->journal_preres.u64s);
-
- difference = jset_u64s(insert->k.u64s) - ck->res.u64s;
- if (difference > 0) {
- trans->journal_preres.u64s -= difference;
- ck->res.u64s += difference;
- }
- }
-
bkey_copy(ck->k, insert);
ck->valid = true;
* Newest freed entries are at the end of the list - once we hit one
* that's too new to be freed, we can bail out:
*/
+ scanned += bc->nr_freed_nonpcpu;
+
list_for_each_entry_safe(ck, t, &bc->freed_nonpcpu, list) {
if (!poll_state_synchronize_srcu(&c->btree_trans_barrier,
ck->btree_trans_barrier_seq))
six_lock_exit(&ck->c.lock);
kmem_cache_free(bch2_key_cache, ck);
atomic_long_dec(&bc->nr_freed);
- scanned++;
freed++;
+ bc->nr_freed_nonpcpu--;
}
if (scanned >= nr)
goto out;
+ scanned += bc->nr_freed_pcpu;
+
list_for_each_entry_safe(ck, t, &bc->freed_pcpu, list) {
if (!poll_state_synchronize_srcu(&c->btree_trans_barrier,
ck->btree_trans_barrier_seq))
six_lock_exit(&ck->c.lock);
kmem_cache_free(bch2_key_cache, ck);
atomic_long_dec(&bc->nr_freed);
- scanned++;
freed++;
+ bc->nr_freed_pcpu--;
}
if (scanned >= nr)
}
#endif
+ BUG_ON(list_count_nodes(&bc->freed_pcpu) != bc->nr_freed_pcpu);
+ BUG_ON(list_count_nodes(&bc->freed_nonpcpu) != bc->nr_freed_nonpcpu);
+
list_splice(&bc->freed_pcpu, &items);
list_splice(&bc->freed_nonpcpu, &items);
cond_resched();
bch2_journal_pin_drop(&c->journal, &ck->journal);
- bch2_journal_preres_put(&c->journal, &ck->res);
list_del(&ck->list);
kfree(ck->k);
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_BTREE_KEY_CACHE_TYPES_H
+#define _BCACHEFS_BTREE_KEY_CACHE_TYPES_H
+
+struct btree_key_cache_freelist {
+ struct bkey_cached *objs[16];
+ unsigned nr;
+};
+
+struct btree_key_cache {
+ struct mutex lock;
+ struct rhashtable table;
+ bool table_init_done;
+
+ struct list_head freed_pcpu;
+ size_t nr_freed_pcpu;
+ struct list_head freed_nonpcpu;
+ size_t nr_freed_nonpcpu;
+
+ struct shrinker *shrink;
+ unsigned shrink_iter;
+ struct btree_key_cache_freelist __percpu *pcpu_freed;
+
+ atomic_long_t nr_freed;
+ atomic_long_t nr_keys;
+ atomic_long_t nr_dirty;
+};
+
+struct bkey_cached_key {
+ u32 btree_id;
+ struct bpos pos;
+} __packed __aligned(4);
+
+#endif /* _BCACHEFS_BTREE_KEY_CACHE_TYPES_H */
bch2_btree_init_next(trans, b);
}
+static noinline int trans_lock_write_fail(struct btree_trans *trans, struct btree_insert_entry *i)
+{
+ while (--i >= trans->updates) {
+ if (same_leaf_as_prev(trans, i))
+ continue;
+
+ bch2_btree_node_unlock_write(trans, i->path, insert_l(i)->b);
+ }
+
+ trace_and_count(trans->c, trans_restart_would_deadlock_write, trans);
+ return btree_trans_restart(trans, BCH_ERR_transaction_restart_would_deadlock_write);
+}
+
+static inline int bch2_trans_lock_write(struct btree_trans *trans)
+{
+ struct btree_insert_entry *i;
+
+ EBUG_ON(trans->write_locked);
+
+ trans_for_each_update(trans, i) {
+ if (same_leaf_as_prev(trans, i))
+ continue;
+
+ if (bch2_btree_node_lock_write(trans, i->path, &insert_l(i)->b->c))
+ return trans_lock_write_fail(trans, i);
+
+ if (!i->cached)
+ bch2_btree_node_prep_for_write(trans, i->path, insert_l(i)->b);
+ }
+
+ trans->write_locked = true;
+ return 0;
+}
+
+static inline void bch2_trans_unlock_write(struct btree_trans *trans)
+{
+ if (likely(trans->write_locked)) {
+ struct btree_insert_entry *i;
+
+ trans_for_each_update(trans, i)
+ if (!same_leaf_as_prev(trans, i))
+ bch2_btree_node_unlock_write_inlined(trans, i->path,
+ insert_l(i)->b);
+ trans->write_locked = false;
+ }
+}
+
/* Inserting into a given leaf node (last stage of insert): */
/* Handle overwrites and do insert, for non extents: */
bch2_snapshot_is_internal_node(trans->c, i->k->k.p.snapshot));
}
-static noinline int
-bch2_trans_journal_preres_get_cold(struct btree_trans *trans, unsigned flags,
- unsigned long trace_ip)
-{
- return drop_locks_do(trans,
- bch2_journal_preres_get(&trans->c->journal,
- &trans->journal_preres,
- trans->journal_preres_u64s,
- (flags & BCH_WATERMARK_MASK)));
-}
-
static __always_inline int bch2_trans_journal_res_get(struct btree_trans *trans,
unsigned flags)
{
return 0;
}
+noinline static int
+btree_key_can_insert_cached_slowpath(struct btree_trans *trans, unsigned flags,
+ struct btree_path *path, unsigned new_u64s)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_insert_entry *i;
+ struct bkey_cached *ck = (void *) path->l[0].b;
+ struct bkey_i *new_k;
+ int ret;
+
+ bch2_trans_unlock_write(trans);
+ bch2_trans_unlock(trans);
+
+ new_k = kmalloc(new_u64s * sizeof(u64), GFP_KERNEL);
+ if (!new_k) {
+ bch_err(c, "error allocating memory for key cache key, btree %s u64s %u",
+ bch2_btree_id_str(path->btree_id), new_u64s);
+ return -BCH_ERR_ENOMEM_btree_key_cache_insert;
+ }
+
+ ret = bch2_trans_relock(trans) ?:
+ bch2_trans_lock_write(trans);
+ if (unlikely(ret)) {
+ kfree(new_k);
+ return ret;
+ }
+
+ memcpy(new_k, ck->k, ck->u64s * sizeof(u64));
+
+ trans_for_each_update(trans, i)
+ if (i->old_v == &ck->k->v)
+ i->old_v = &new_k->v;
+
+ kfree(ck->k);
+ ck->u64s = new_u64s;
+ ck->k = new_k;
+ return 0;
+}
+
static int btree_key_can_insert_cached(struct btree_trans *trans, unsigned flags,
struct btree_path *path, unsigned u64s)
{
return 0;
new_u64s = roundup_pow_of_two(u64s);
- new_k = krealloc(ck->k, new_u64s * sizeof(u64), GFP_NOFS);
- if (!new_k) {
- bch_err(c, "error allocating memory for key cache key, btree %s u64s %u",
- bch2_btree_id_str(path->btree_id), new_u64s);
- return -BCH_ERR_ENOMEM_btree_key_cache_insert;
- }
+ new_k = krealloc(ck->k, new_u64s * sizeof(u64), GFP_NOWAIT);
+ if (unlikely(!new_k))
+ return btree_key_can_insert_cached_slowpath(trans, flags, path, new_u64s);
trans_for_each_update(trans, i)
if (i->old_v == &ck->k->v)
return ret;
}
-static noinline int trans_lock_write_fail(struct btree_trans *trans, struct btree_insert_entry *i)
-{
- while (--i >= trans->updates) {
- if (same_leaf_as_prev(trans, i))
- continue;
-
- bch2_btree_node_unlock_write(trans, i->path, insert_l(i)->b);
- }
-
- trace_and_count(trans->c, trans_restart_would_deadlock_write, trans);
- return btree_trans_restart(trans, BCH_ERR_transaction_restart_would_deadlock_write);
-}
-
-static inline int trans_lock_write(struct btree_trans *trans)
-{
- struct btree_insert_entry *i;
-
- trans_for_each_update(trans, i) {
- if (same_leaf_as_prev(trans, i))
- continue;
-
- if (bch2_btree_node_lock_write(trans, i->path, &insert_l(i)->b->c))
- return trans_lock_write_fail(trans, i);
-
- if (!i->cached)
- bch2_btree_node_prep_for_write(trans, i->path, insert_l(i)->b);
- }
-
- return 0;
-}
-
static noinline void bch2_drop_overwrites_from_journal(struct btree_trans *trans)
{
struct btree_insert_entry *i;
}
}
- ret = bch2_journal_preres_get(&c->journal,
- &trans->journal_preres, trans->journal_preres_u64s,
- (flags & BCH_WATERMARK_MASK)|JOURNAL_RES_GET_NONBLOCK);
- if (unlikely(ret == -BCH_ERR_journal_preres_get_blocked))
- ret = bch2_trans_journal_preres_get_cold(trans, flags, trace_ip);
- if (unlikely(ret))
- return ret;
-
- ret = trans_lock_write(trans);
+ ret = bch2_trans_lock_write(trans);
if (unlikely(ret))
return ret;
if (!ret && unlikely(trans->journal_replay_not_finished))
bch2_drop_overwrites_from_journal(trans);
- trans_for_each_update(trans, i)
- if (!same_leaf_as_prev(trans, i))
- bch2_btree_node_unlock_write_inlined(trans, i->path,
- insert_l(i)->b);
+ bch2_trans_unlock_write(trans);
if (!ret && trans->journal_pin)
bch2_journal_pin_add(&c->journal, trans->journal_res.seq,
struct bch_fs *c = trans->c;
struct btree_insert_entry *i = NULL;
struct btree_write_buffered_key *wb;
- unsigned u64s;
int ret = 0;
if (!trans->nr_updates &&
EBUG_ON(test_bit(BCH_FS_CLEAN_SHUTDOWN, &c->flags));
- memset(&trans->journal_preres, 0, sizeof(trans->journal_preres));
-
trans->journal_u64s = trans->extra_journal_entries.nr;
- trans->journal_preres_u64s = 0;
-
trans->journal_transaction_names = READ_ONCE(c->opts.journal_transaction_names);
-
if (trans->journal_transaction_names)
trans->journal_u64s += jset_u64s(JSET_ENTRY_LOG_U64s);
if (i->key_cache_already_flushed)
continue;
- /* we're going to journal the key being updated: */
- u64s = jset_u64s(i->k->k.u64s);
- if (i->cached &&
- likely(!(flags & BTREE_INSERT_JOURNAL_REPLAY)))
- trans->journal_preres_u64s += u64s;
-
if (i->flags & BTREE_UPDATE_NOJOURNAL)
continue;
- trans->journal_u64s += u64s;
+ /* we're going to journal the key being updated: */
+ trans->journal_u64s += jset_u64s(i->k->k.u64s);
/* and we're also going to log the overwrite: */
if (trans->journal_transaction_names)
trace_and_count(c, transaction_commit, trans, _RET_IP_);
out:
- bch2_journal_preres_put(&c->journal, &trans->journal_preres);
-
if (likely(!(flags & BTREE_INSERT_NOCHECK_RW)))
bch2_write_ref_put(c, BCH_WRITE_REF_trans);
out_reset:
#include <linux/list.h>
#include <linux/rhashtable.h>
-//#include "bkey_methods.h"
+#include "btree_key_cache_types.h"
#include "buckets_types.h"
#include "darray.h"
#include "errcode.h"
#endif
};
-struct btree_key_cache_freelist {
- struct bkey_cached *objs[16];
- unsigned nr;
-};
-
-struct btree_key_cache {
- struct mutex lock;
- struct rhashtable table;
- bool table_init_done;
- struct list_head freed_pcpu;
- struct list_head freed_nonpcpu;
- struct shrinker *shrink;
- unsigned shrink_iter;
- struct btree_key_cache_freelist __percpu *pcpu_freed;
-
- atomic_long_t nr_freed;
- atomic_long_t nr_keys;
- atomic_long_t nr_dirty;
-};
-
-struct bkey_cached_key {
- u32 btree_id;
- struct bpos pos;
-} __packed __aligned(4);
-
#define BKEY_CACHED_ACCESSED 0
#define BKEY_CACHED_DIRTY 1
struct rhash_head hash;
struct list_head list;
- struct journal_preres res;
struct journal_entry_pin journal;
u64 seq;
unsigned long ip_allocated;
};
-#ifndef CONFIG_LOCKDEP
#define BTREE_ITER_MAX 64
-#else
-#define BTREE_ITER_MAX 32
-#endif
struct btree_trans_commit_hook;
typedef int (btree_trans_commit_hook_fn)(struct btree_trans *, struct btree_trans_commit_hook *);
bool journal_transaction_names:1;
bool journal_replay_not_finished:1;
bool notrace_relock_fail:1;
+ bool write_locked:1;
enum bch_errcode restarted:16;
u32 restart_count;
unsigned long last_begin_ip;
struct journal_entry_pin *journal_pin;
struct journal_res journal_res;
- struct journal_preres journal_preres;
u64 *journal_seq;
struct disk_reservation *disk_res;
unsigned journal_u64s;
- unsigned journal_preres_u64s;
struct replicas_delta_list *fs_usage_deltas;
};
up_read(&c->gc_lock);
as->took_gc_lock = false;
- bch2_journal_preres_put(&c->journal, &as->journal_preres);
-
bch2_journal_pin_drop(&c->journal, &as->journal);
bch2_journal_pin_flush(&c->journal, &as->journal);
bch2_disk_reservation_put(c, &as->disk_res);
bch2_journal_pin_drop(&c->journal, &as->journal);
- bch2_journal_preres_put(&c->journal, &as->journal_preres);
-
mutex_lock(&c->btree_interior_update_lock);
for (i = 0; i < as->nr_new_nodes; i++) {
b = as->new_nodes[i];
unsigned nr_nodes[2] = { 0, 0 };
unsigned update_level = level;
enum bch_watermark watermark = flags & BCH_WATERMARK_MASK;
- unsigned journal_flags = 0;
int ret = 0;
u32 restart_count = trans->restart_count;
flags &= ~BCH_WATERMARK_MASK;
flags |= watermark;
- if (flags & BTREE_INSERT_JOURNAL_RECLAIM)
- journal_flags |= JOURNAL_RES_GET_NONBLOCK;
- journal_flags |= watermark;
-
while (1) {
nr_nodes[!!update_level] += 1 + split;
update_level++;
if (ret)
goto err;
- ret = bch2_journal_preres_get(&c->journal, &as->journal_preres,
- BTREE_UPDATE_JOURNAL_RES,
- journal_flags|JOURNAL_RES_GET_NONBLOCK);
- if (ret) {
- if (flags & BTREE_INSERT_JOURNAL_RECLAIM) {
- ret = -BCH_ERR_journal_reclaim_would_deadlock;
- goto err;
- }
-
- ret = drop_locks_do(trans,
- bch2_journal_preres_get(&c->journal, &as->journal_preres,
- BTREE_UPDATE_JOURNAL_RES,
- journal_flags));
- if (ret == -BCH_ERR_journal_preres_get_blocked) {
- trace_and_count(c, trans_restart_journal_preres_get, trans, _RET_IP_, journal_flags);
- ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_journal_preres_get);
- }
- if (ret)
- goto err;
- }
-
ret = bch2_disk_reservation_get(c, &as->disk_res,
(nr_nodes[0] + nr_nodes[1]) * btree_sectors(c),
c->opts.metadata_replicas,
unsigned update_level;
struct disk_reservation disk_res;
- struct journal_preres journal_preres;
/*
* BTREE_INTERIOR_UPDATING_NODE:
next_pos = insert->k.p;
+ /*
+ * Check for nonce offset inconsistency:
+ * This is debug code - we've been seeing this bug rarely, and
+ * it's been hard to reproduce, so this should give us some more
+ * information when it does occur:
+ */
+ struct printbuf err = PRINTBUF;
+ int invalid = bch2_bkey_invalid(c, bkey_i_to_s_c(insert), __btree_node_type(0, m->btree_id), 0, &err);
+ printbuf_exit(&err);
+
+ if (invalid) {
+ struct printbuf buf = PRINTBUF;
+
+ prt_str(&buf, "about to insert invalid key in data update path");
+ prt_str(&buf, "\nold: ");
+ bch2_bkey_val_to_text(&buf, c, old);
+ prt_str(&buf, "\nk: ");
+ bch2_bkey_val_to_text(&buf, c, k);
+ prt_str(&buf, "\nnew: ");
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(insert));
+
+ bch2_print_string_as_lines(KERN_ERR, buf.buf);
+ printbuf_exit(&buf);
+
+ bch2_fatal_error(c);
+ goto out;
+ }
+
ret = bch2_insert_snapshot_whiteouts(trans, m->btree_id,
k.k->p, bkey_start_pos(&insert->k)) ?:
bch2_insert_snapshot_whiteouts(trans, m->btree_id,
case TARGET_DEV: {
struct bch_dev *ca;
+ out->atomic++;
rcu_read_lock();
ca = t.dev < c->sb.nr_devices
? rcu_dereference(c->devs[t.dev])
}
rcu_read_unlock();
+ out->atomic--;
break;
}
case TARGET_GROUP:
}
}
-void bch2_target_to_text_sb(struct printbuf *out, struct bch_sb *sb, unsigned v)
+static void bch2_target_to_text_sb(struct printbuf *out, struct bch_sb *sb, unsigned v)
{
struct target t = target_decode(v);
h->nr_active_devs++;
rcu_read_unlock();
+
+ /*
+ * If we only have redundancy + 1 devices, we're better off with just
+ * replication:
+ */
+ if (h->nr_active_devs < h->redundancy + 2)
+ bch_err(c, "insufficient devices available to create stripe (have %u, need %u) - mismatched bucket sizes?",
+ h->nr_active_devs, h->redundancy + 2);
+
list_add(&h->list, &c->ec_stripe_head_list);
return h;
}
h = ec_new_stripe_head_alloc(c, target, algo, redundancy, watermark);
found:
+ if (!IS_ERR_OR_NULL(h) &&
+ h->nr_active_devs < h->redundancy + 2) {
+ mutex_unlock(&h->lock);
+ h = NULL;
+ }
mutex_unlock(&c->ec_stripe_head_lock);
return h;
}
int ret;
h = __bch2_ec_stripe_head_get(trans, target, algo, redundancy, watermark);
- if (!h)
- bch_err(c, "no stripe head");
if (IS_ERR_OR_NULL(h))
return h;
int bch2_filemap_get_contig_folios_d(struct address_space *mapping,
loff_t start, u64 end,
- int fgp_flags, gfp_t gfp,
+ fgf_t fgp_flags, gfp_t gfp,
folios *fs)
{
struct folio *f;
typedef DARRAY(struct folio *) folios;
int bch2_filemap_get_contig_folios_d(struct address_space *, loff_t,
- u64, int, gfp_t, folios *);
+ u64, fgf_t, gfp_t, folios *);
int bch2_write_invalidate_inode_pages_range(struct address_space *, loff_t, loff_t);
/*
return dget(sb->s_root);
err_put_super:
- sb->s_fs_info = NULL;
- c->vfs_sb = NULL;
deactivate_locked_super(sb);
- bch2_fs_stop(c);
return ERR_PTR(bch2_err_class(ret));
}
{
struct bch_fs *c = sb->s_fs_info;
- if (c)
- c->vfs_sb = NULL;
generic_shutdown_super(sb);
- if (c)
- bch2_fs_free(c);
+ bch2_fs_free(c);
}
static struct file_system_type bcache_fs_type = {
const struct nlink *l = _l;
const struct nlink *r = _r;
- return cmp_int(l->inum, r->inum) ?: cmp_int(l->snapshot, r->snapshot);
+ return cmp_int(l->inum, r->inum);
}
static void inc_link(struct bch_fs *c, struct snapshots_seen *s,
* unlinked inodes in the snapshot leaves:
*/
*need_another_pass = true;
- return 0;
+ goto out;
}
ret = 1;
*/
for_each_btree_key(trans, iter, BTREE_ID_deleted_inodes, POS_MIN,
BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
- ret = lockrestart_do(trans, may_delete_deleted_inode(trans, &iter, k.k->p,
- &need_another_pass));
+ ret = commit_do(trans, NULL, NULL,
+ BTREE_INSERT_NOFAIL|
+ BTREE_INSERT_LAZY_RW,
+ may_delete_deleted_inode(trans, &iter, k.k->p, &need_another_pass));
if (ret < 0)
break;
* checksum:
*/
csum = bch2_checksum_bio(c, op->crc.csum_type, nonce, &op->wbio.bio);
- if (bch2_crc_cmp(op->crc.csum, csum))
+ if (bch2_crc_cmp(op->crc.csum, csum) && !c->opts.no_data_io)
return -EIO;
ret = bch2_encrypt_bio(c, op->crc.csum_type, nonce, &op->wbio.bio);
return ret;
}
-/* journal_preres: */
-
-static bool journal_preres_available(struct journal *j,
- struct journal_preres *res,
- unsigned new_u64s,
- unsigned flags)
-{
- bool ret = bch2_journal_preres_get_fast(j, res, new_u64s, flags, true);
-
- if (!ret && mutex_trylock(&j->reclaim_lock)) {
- bch2_journal_reclaim(j);
- mutex_unlock(&j->reclaim_lock);
- }
-
- return ret;
-}
-
-int __bch2_journal_preres_get(struct journal *j,
- struct journal_preres *res,
- unsigned new_u64s,
- unsigned flags)
-{
- int ret;
-
- closure_wait_event(&j->preres_wait,
- (ret = bch2_journal_error(j)) ||
- journal_preres_available(j, res, new_u64s, flags));
- return ret;
-}
-
/* journal_entry_res: */
void bch2_journal_entry_res_resize(struct journal *j,
prt_printf(out, "last_seq:\t\t%llu\n", journal_last_seq(j));
prt_printf(out, "last_seq_ondisk:\t%llu\n", j->last_seq_ondisk);
prt_printf(out, "flushed_seq_ondisk:\t%llu\n", j->flushed_seq_ondisk);
- prt_printf(out, "prereserved:\t\t%u/%u\n", j->prereserved.reserved, j->prereserved.remaining);
prt_printf(out, "watermark:\t\t%s\n", bch2_watermarks[j->watermark]);
prt_printf(out, "each entry reserved:\t%u\n", j->entry_u64s_reserved);
prt_printf(out, "nr flush writes:\t%llu\n", j->nr_flush_writes);
return 0;
}
-/* journal_preres: */
-
-static inline void journal_set_watermark(struct journal *j)
-{
- union journal_preres_state s = READ_ONCE(j->prereserved);
- unsigned watermark = BCH_WATERMARK_stripe;
-
- if (fifo_free(&j->pin) < j->pin.size / 4)
- watermark = max_t(unsigned, watermark, BCH_WATERMARK_copygc);
- if (fifo_free(&j->pin) < j->pin.size / 8)
- watermark = max_t(unsigned, watermark, BCH_WATERMARK_reclaim);
-
- if (s.reserved > s.remaining)
- watermark = max_t(unsigned, watermark, BCH_WATERMARK_copygc);
- if (!s.remaining)
- watermark = max_t(unsigned, watermark, BCH_WATERMARK_reclaim);
-
- if (watermark == j->watermark)
- return;
-
- swap(watermark, j->watermark);
- if (watermark > j->watermark)
- journal_wake(j);
-}
-
-static inline void bch2_journal_preres_put(struct journal *j,
- struct journal_preres *res)
-{
- union journal_preres_state s = { .reserved = res->u64s };
-
- if (!res->u64s)
- return;
-
- s.v = atomic64_sub_return(s.v, &j->prereserved.counter);
- res->u64s = 0;
-
- if (unlikely(s.waiting)) {
- clear_bit(ilog2((((union journal_preres_state) { .waiting = 1 }).v)),
- (unsigned long *) &j->prereserved.v);
- closure_wake_up(&j->preres_wait);
- }
-
- if (s.reserved <= s.remaining && j->watermark)
- journal_set_watermark(j);
-}
-
-int __bch2_journal_preres_get(struct journal *,
- struct journal_preres *, unsigned, unsigned);
-
-static inline int bch2_journal_preres_get_fast(struct journal *j,
- struct journal_preres *res,
- unsigned new_u64s,
- unsigned flags,
- bool set_waiting)
-{
- int d = new_u64s - res->u64s;
- union journal_preres_state old, new;
- u64 v = atomic64_read(&j->prereserved.counter);
- enum bch_watermark watermark = flags & BCH_WATERMARK_MASK;
- int ret;
-
- do {
- old.v = new.v = v;
- ret = 0;
-
- if (watermark == BCH_WATERMARK_reclaim ||
- new.reserved + d < new.remaining) {
- new.reserved += d;
- ret = 1;
- } else if (set_waiting && !new.waiting)
- new.waiting = true;
- else
- return 0;
- } while ((v = atomic64_cmpxchg(&j->prereserved.counter,
- old.v, new.v)) != old.v);
-
- if (ret)
- res->u64s += d;
- return ret;
-}
-
-static inline int bch2_journal_preres_get(struct journal *j,
- struct journal_preres *res,
- unsigned new_u64s,
- unsigned flags)
-{
- if (new_u64s <= res->u64s)
- return 0;
-
- if (bch2_journal_preres_get_fast(j, res, new_u64s, flags, false))
- return 0;
-
- if (flags & JOURNAL_RES_GET_NONBLOCK)
- return -BCH_ERR_journal_preres_get_blocked;
-
- return __bch2_journal_preres_get(j, res, new_u64s, flags);
-}
-
/* journal_entry_res: */
void bch2_journal_entry_res_resize(struct journal *,
if (ja->bucket_seq[ja->cur_idx] &&
ja->sectors_free == ca->mi.bucket_size) {
+#if 0
+ /*
+ * Debug code for ZNS support, where we (probably) want to be
+ * correlated where we stopped in the journal to the zone write
+ * points:
+ */
bch_err(c, "ja->sectors_free == ca->mi.bucket_size");
bch_err(c, "cur_idx %u/%u", ja->cur_idx, ja->nr);
for (i = 0; i < 3; i++) {
bch_err(c, "bucket_seq[%u] = %llu", idx, ja->bucket_seq[idx]);
}
+#endif
ja->sectors_free = 0;
}
return available;
}
-static void journal_set_remaining(struct journal *j, unsigned u64s_remaining)
+static inline void journal_set_watermark(struct journal *j, bool low_on_space)
{
- union journal_preres_state old, new;
- u64 v = atomic64_read(&j->prereserved.counter);
+ unsigned watermark = BCH_WATERMARK_stripe;
- do {
- old.v = new.v = v;
- new.remaining = u64s_remaining;
- } while ((v = atomic64_cmpxchg(&j->prereserved.counter,
- old.v, new.v)) != old.v);
+ if (low_on_space)
+ watermark = max_t(unsigned, watermark, BCH_WATERMARK_reclaim);
+ if (fifo_free(&j->pin) < j->pin.size / 4)
+ watermark = max_t(unsigned, watermark, BCH_WATERMARK_reclaim);
+
+ if (watermark == j->watermark)
+ return;
+
+ swap(watermark, j->watermark);
+ if (watermark > j->watermark)
+ journal_wake(j);
}
static struct journal_space
struct bch_fs *c = container_of(j, struct bch_fs, journal);
struct bch_dev *ca;
unsigned clean, clean_ondisk, total;
- s64 u64s_remaining = 0;
unsigned max_entry_size = min(j->buf[0].buf_size >> 9,
j->buf[1].buf_size >> 9);
unsigned i, nr_online = 0, nr_devs_want;
else
clear_bit(JOURNAL_MAY_SKIP_FLUSH, &j->flags);
- u64s_remaining = (u64) clean << 6;
- u64s_remaining -= (u64) total << 3;
- u64s_remaining = max(0LL, u64s_remaining);
- u64s_remaining /= 4;
- u64s_remaining = min_t(u64, u64s_remaining, U32_MAX);
+ journal_set_watermark(j, clean * 4 <= total);
out:
j->cur_entry_sectors = !ret ? j->space[journal_space_discarded].next_entry : 0;
j->cur_entry_error = ret;
- journal_set_remaining(j, u64s_remaining);
- journal_set_watermark(j);
if (!ret)
journal_wake(j);
/* Try to keep the journal at most half full: */
nr_buckets = ja->nr / 2;
- /* And include pre-reservations: */
- nr_buckets += DIV_ROUND_UP(j->prereserved.reserved,
- (ca->mi.bucket_size << 6) -
- journal_entry_overhead(j));
-
nr_buckets = min(nr_buckets, ja->nr);
bucket_to_flush = (ja->cur_idx + nr_buckets) % ja->nr;
msecs_to_jiffies(c->opts.journal_reclaim_delay)))
min_nr = 1;
- if (j->prereserved.reserved * 4 > j->prereserved.remaining)
- min_nr = 1;
-
- if (fifo_free(&j->pin) <= 32)
+ if (j->watermark != BCH_WATERMARK_stripe)
min_nr = 1;
if (atomic_read(&c->btree_cache.dirty) * 2 > c->btree_cache.used)
trace_and_count(c, journal_reclaim_start, c,
direct, kicked,
min_nr, min_key_cache,
- j->prereserved.reserved,
- j->prereserved.remaining,
atomic_read(&c->btree_cache.dirty),
c->btree_cache.used,
atomic_long_read(&c->btree_key_cache.nr_dirty),
u64 seq;
};
-/*
- * For reserving space in the journal prior to getting a reservation on a
- * particular journal entry:
- */
-struct journal_preres {
- unsigned u64s;
-};
-
union journal_res_state {
struct {
atomic64_t counter;
};
};
-union journal_preres_state {
- struct {
- atomic64_t counter;
- };
-
- struct {
- u64 v;
- };
-
- struct {
- u64 waiting:1,
- reserved:31,
- remaining:32;
- };
-};
-
/* bytes: */
#define JOURNAL_ENTRY_SIZE_MIN (64U << 10) /* 64k */
#define JOURNAL_ENTRY_SIZE_MAX (4U << 20) /* 4M */
union journal_res_state reservations;
enum bch_watermark watermark;
- union journal_preres_state prereserved;
-
} __aligned(SMP_CACHE_BYTES);
unsigned long flags;
this_cpu_sub(*lock->readers, !ret);
preempt_enable();
- if (!ret && (old & SIX_LOCK_WAITING_write))
- ret = -1 - SIX_LOCK_write;
+ if (!ret) {
+ smp_mb();
+ if (atomic_read(&lock->state) & SIX_LOCK_WAITING_write)
+ ret = -1 - SIX_LOCK_write;
+ }
} else if (type == SIX_LOCK_write && lock->readers) {
if (try) {
atomic_add(SIX_LOCK_HELD_write, &lock->state);
};
struct snapshot_table {
- struct snapshot_t s[0];
+ DECLARE_FLEX_ARRAY(struct snapshot_t, s);
};
typedef struct {
TRACE_EVENT(journal_reclaim_start,
TP_PROTO(struct bch_fs *c, bool direct, bool kicked,
u64 min_nr, u64 min_key_cache,
- u64 prereserved, u64 prereserved_total,
u64 btree_cache_dirty, u64 btree_cache_total,
u64 btree_key_cache_dirty, u64 btree_key_cache_total),
- TP_ARGS(c, direct, kicked, min_nr, min_key_cache, prereserved, prereserved_total,
+ TP_ARGS(c, direct, kicked, min_nr, min_key_cache,
btree_cache_dirty, btree_cache_total,
btree_key_cache_dirty, btree_key_cache_total),
__field(bool, kicked )
__field(u64, min_nr )
__field(u64, min_key_cache )
- __field(u64, prereserved )
- __field(u64, prereserved_total )
__field(u64, btree_cache_dirty )
__field(u64, btree_cache_total )
__field(u64, btree_key_cache_dirty )
__entry->kicked = kicked;
__entry->min_nr = min_nr;
__entry->min_key_cache = min_key_cache;
- __entry->prereserved = prereserved;
- __entry->prereserved_total = prereserved_total;
__entry->btree_cache_dirty = btree_cache_dirty;
__entry->btree_cache_total = btree_cache_total;
__entry->btree_key_cache_dirty = btree_key_cache_dirty;
__entry->btree_key_cache_total = btree_key_cache_total;
),
- TP_printk("%d,%d direct %u kicked %u min %llu key cache %llu prereserved %llu/%llu btree cache %llu/%llu key cache %llu/%llu",
+ TP_printk("%d,%d direct %u kicked %u min %llu key cache %llu btree cache %llu/%llu key cache %llu/%llu",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->direct,
__entry->kicked,
__entry->min_nr,
__entry->min_key_cache,
- __entry->prereserved,
- __entry->prereserved_total,
__entry->btree_cache_dirty,
__entry->btree_cache_total,
__entry->btree_key_cache_dirty,
s.v = v + 1;
s.defined = true;
} else {
+ /*
+ * Check if this option was set on the parent - if so, switched
+ * back to inheriting from the parent:
+ *
+ * rename() also has to deal with keeping inherited options up
+ * to date - see bch2_reinherit_attrs()
+ */
+ spin_lock(&dentry->d_lock);
if (!IS_ROOT(dentry)) {
struct bch_inode_info *dir =
to_bch_ei(d_inode(dentry->d_parent));
} else {
s.v = 0;
}
+ spin_unlock(&dentry->d_lock);
s.defined = false;
}
if (btrfs_block_can_be_shared(trans, root, buf)) {
ret = btrfs_lookup_extent_info(trans, fs_info, buf->start,
btrfs_header_level(buf), 1,
- &refs, &flags);
+ &refs, &flags, NULL);
if (ret)
return ret;
if (unlikely(refs == 0)) {
return -ENOMEM;
}
- if (btrfs_qgroup_enabled(fs_info) && !generic_ref->skip_qgroup) {
+ if (btrfs_qgroup_full_accounting(fs_info) && !generic_ref->skip_qgroup) {
record = kzalloc(sizeof(*record), GFP_NOFS);
if (!record) {
kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
return -ENOMEM;
}
- if (btrfs_qgroup_enabled(fs_info) && !generic_ref->skip_qgroup) {
+ if (btrfs_qgroup_full_accounting(fs_info) && !generic_ref->skip_qgroup) {
record = kzalloc(sizeof(*record), GFP_NOFS);
if (!record) {
kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
*/
int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr,
- u64 offset, int metadata, u64 *refs, u64 *flags)
+ u64 offset, int metadata, u64 *refs, u64 *flags,
+ u64 *owning_root)
{
struct btrfs_root *extent_root;
struct btrfs_delayed_ref_head *head;
u32 item_size;
u64 num_refs;
u64 extent_flags;
+ u64 owner = 0;
int ret;
/*
struct btrfs_extent_item);
num_refs = btrfs_extent_refs(leaf, ei);
extent_flags = btrfs_extent_flags(leaf, ei);
+ owner = btrfs_get_extent_owner_root(fs_info, leaf,
+ path->slots[0]);
} else {
ret = -EUCLEAN;
btrfs_err(fs_info,
*refs = num_refs;
if (flags)
*flags = extent_flags;
+ if (owning_root)
+ *owning_root = owner;
out_free:
btrfs_free_path(path);
return ret;
/* We don't lock the tree block, it's OK to be racy here */
ret = btrfs_lookup_extent_info(trans, fs_info, bytenr,
wc->level - 1, 1, &refs,
- &flags);
+ &flags, NULL);
/* We don't care about errors in readahead. */
if (ret < 0)
continue;
ret = btrfs_lookup_extent_info(trans, fs_info,
eb->start, level, 1,
&wc->refs[level],
- &wc->flags[level]);
+ &wc->flags[level],
+ NULL);
BUG_ON(ret == -ENOMEM);
if (ret)
return ret;
u64 bytenr;
u64 generation;
u64 parent;
+ u64 owner_root = 0;
struct btrfs_tree_parent_check check = { 0 };
struct btrfs_key key;
struct btrfs_ref ref = { 0 };
ret = btrfs_lookup_extent_info(trans, fs_info, bytenr, level - 1, 1,
&wc->refs[level - 1],
- &wc->flags[level - 1]);
+ &wc->flags[level - 1],
+ &owner_root);
if (ret < 0)
goto out_unlock;
find_next_key(path, level, &wc->drop_progress);
btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, bytenr,
- fs_info->nodesize, parent,
- btrfs_header_owner(next));
+ fs_info->nodesize, parent, owner_root);
btrfs_init_tree_ref(&ref, level - 1, root->root_key.objectid,
0, false);
ret = btrfs_free_extent(trans, &ref);
ret = btrfs_lookup_extent_info(trans, fs_info,
eb->start, level, 1,
&wc->refs[level],
- &wc->flags[level]);
+ &wc->flags[level],
+ NULL);
if (ret < 0) {
btrfs_tree_unlock_rw(eb, path->locks[level]);
path->locks[level] = 0;
ret = btrfs_lookup_extent_info(trans, fs_info,
path->nodes[level]->start,
level, 1, &wc->refs[level],
- &wc->flags[level]);
+ &wc->flags[level], NULL);
if (ret < 0) {
err = ret;
goto out_end_trans;
int btrfs_lookup_data_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len);
int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr,
- u64 offset, int metadata, u64 *refs, u64 *flags);
+ u64 offset, int metadata, u64 *refs, u64 *flags,
+ u64 *owner_root);
int btrfs_pin_extent(struct btrfs_trans_handle *trans, u64 bytenr, u64 num,
int reserved);
int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans,
int ret;
alloc_hint = get_extent_allocation_hint(inode, start, len);
+again:
ret = btrfs_reserve_extent(root, len, len, fs_info->sectorsize,
0, alloc_hint, &ins, 1, 1);
+ if (ret == -EAGAIN) {
+ ASSERT(btrfs_is_zoned(fs_info));
+ wait_on_bit_io(&inode->root->fs_info->flags, BTRFS_FS_NEED_ZONE_FINISH,
+ TASK_UNINTERRUPTIBLE);
+ goto again;
+ }
if (ret)
return ERR_PTR(ret);
static noinline int copy_to_sk(struct btrfs_path *path,
struct btrfs_key *key,
struct btrfs_ioctl_search_key *sk,
- size_t *buf_size,
+ u64 *buf_size,
char __user *ubuf,
unsigned long *sk_offset,
int *num_found)
static noinline int search_ioctl(struct inode *inode,
struct btrfs_ioctl_search_key *sk,
- size_t *buf_size,
+ u64 *buf_size,
char __user *ubuf)
{
struct btrfs_fs_info *info = btrfs_sb(inode->i_sb);
struct btrfs_ioctl_search_args __user *uargs = argp;
struct btrfs_ioctl_search_key sk;
int ret;
- size_t buf_size;
+ u64 buf_size;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
struct btrfs_ioctl_search_args_v2 __user *uarg = argp;
struct btrfs_ioctl_search_args_v2 args;
int ret;
- size_t buf_size;
- const size_t buf_limit = SZ_16M;
+ u64 buf_size;
+ const u64 buf_limit = SZ_16M;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
u64 bytenr = record->bytenr;
if (!btrfs_qgroup_full_accounting(fs_info))
- return 0;
+ return 1;
lockdep_assert_held(&delayed_refs->lock);
trace_btrfs_qgroup_trace_extent(fs_info, record);
qgroup_update_counters(fs_info, &qgroups, nr_old_roots, nr_new_roots,
num_bytes, seq);
+ /*
+ * We're done using the iterator, release all its qgroups while holding
+ * fs_info->qgroup_lock so that we don't race with btrfs_remove_qgroup()
+ * and trigger use-after-free accesses to qgroups.
+ */
+ qgroup_iterator_nested_clean(&qgroups);
+
/*
* Bump qgroup_seq to avoid seq overlap
*/
fs_info->qgroup_seq += max(nr_old_roots, nr_new_roots) + 1;
spin_unlock(&fs_info->qgroup_lock);
out_free:
- qgroup_iterator_nested_clean(&qgroups);
ulist_free(old_roots);
ulist_free(new_roots);
return ret;
btrfs_put_bioc(bioc);
}
- return ret;
+ return 0;
}
int btrfs_get_raid_extent_offset(struct btrfs_fs_info *fs_info,
*/
ASSERT(sctx->cur_stripe < SCRUB_TOTAL_STRIPES);
+ /* @found_logical_ret must be specified. */
+ ASSERT(found_logical_ret);
+
stripe = &sctx->stripes[sctx->cur_stripe];
scrub_reset_stripe(stripe);
ret = scrub_find_fill_first_stripe(bg, &sctx->extent_path,
/* Either >0 as no more extents or <0 for error. */
if (ret)
return ret;
- if (found_logical_ret)
- *found_logical_ret = stripe->logical;
+ *found_logical_ret = stripe->logical;
sctx->cur_stripe++;
/* We filled one group, submit it. */
/* Go through each extent items inside the logical range */
while (cur_logical < logical_end) {
- u64 found_logical;
+ u64 found_logical = U64_MAX;
u64 cur_physical = physical + cur_logical - logical_start;
/* Canceled? */
if (ret < 0)
break;
+ /* queue_scrub_stripe() returned 0, @found_logical must be updated. */
+ ASSERT(found_logical != U64_MAX);
cur_logical = found_logical + BTRFS_STRIPE_LEN;
/* Don't hold CPU for too long time */
if (!fs_devices) {
fs_devices = alloc_fs_devices(disk_super->fsid);
+ if (IS_ERR(fs_devices))
+ return ERR_CAST(fs_devices);
+
if (has_metadata_uuid)
memcpy(fs_devices->metadata_uuid,
disk_super->metadata_uuid, BTRFS_FSID_SIZE);
- if (IS_ERR(fs_devices))
- return ERR_CAST(fs_devices);
-
if (same_fsid_diff_dev) {
generate_random_uuid(fs_devices->fsid);
fs_devices->temp_fsid = true;
}
out:
- if (cache->alloc_offset > fs_info->zone_size) {
- btrfs_err(fs_info,
- "zoned: invalid write pointer %llu in block group %llu",
- cache->alloc_offset, cache->start);
- ret = -EIO;
- }
-
if (cache->alloc_offset > cache->zone_capacity) {
btrfs_err(fs_info,
"zoned: invalid write pointer %llu (larger than zone capacity %llu) in block group %llu",
void nfsd_net_reply_cache_destroy(struct nfsd_net *nn);
int nfsd_reply_cache_init(struct nfsd_net *);
void nfsd_reply_cache_shutdown(struct nfsd_net *);
-int nfsd_cache_lookup(struct svc_rqst *rqstp,
- struct nfsd_cacherep **cacherep);
+int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start,
+ unsigned int len, struct nfsd_cacherep **cacherep);
void nfsd_cache_update(struct svc_rqst *rqstp, struct nfsd_cacherep *rp,
int cachetype, __be32 *statp);
int nfsd_reply_cache_stats_show(struct seq_file *m, void *v);
/* XXX: alternatively, we could get/drop in seq start/stop */
drop_client(clp);
- return 0;
+ return seq_release(inode, file);
}
static const struct file_operations client_states_fops = {
return freed;
}
-/*
- * Walk an xdr_buf and get a CRC for at most the first RC_CSUMLEN bytes
+/**
+ * nfsd_cache_csum - Checksum incoming NFS Call arguments
+ * @buf: buffer containing a whole RPC Call message
+ * @start: starting byte of the NFS Call header
+ * @remaining: size of the NFS Call header, in bytes
+ *
+ * Compute a weak checksum of the leading bytes of an NFS procedure
+ * call header to help verify that a retransmitted Call matches an
+ * entry in the duplicate reply cache.
+ *
+ * To avoid assumptions about how the RPC message is laid out in
+ * @buf and what else it might contain (eg, a GSS MIC suffix), the
+ * caller passes us the exact location and length of the NFS Call
+ * header.
+ *
+ * Returns a 32-bit checksum value, as defined in RFC 793.
*/
-static __wsum
-nfsd_cache_csum(struct svc_rqst *rqstp)
+static __wsum nfsd_cache_csum(struct xdr_buf *buf, unsigned int start,
+ unsigned int remaining)
{
+ unsigned int base, len;
+ struct xdr_buf subbuf;
+ __wsum csum = 0;
+ void *p;
int idx;
- unsigned int base;
- __wsum csum;
- struct xdr_buf *buf = &rqstp->rq_arg;
- const unsigned char *p = buf->head[0].iov_base;
- size_t csum_len = min_t(size_t, buf->head[0].iov_len + buf->page_len,
- RC_CSUMLEN);
- size_t len = min(buf->head[0].iov_len, csum_len);
+
+ if (remaining > RC_CSUMLEN)
+ remaining = RC_CSUMLEN;
+ if (xdr_buf_subsegment(buf, &subbuf, start, remaining))
+ return csum;
/* rq_arg.head first */
- csum = csum_partial(p, len, 0);
- csum_len -= len;
+ if (subbuf.head[0].iov_len) {
+ len = min_t(unsigned int, subbuf.head[0].iov_len, remaining);
+ csum = csum_partial(subbuf.head[0].iov_base, len, csum);
+ remaining -= len;
+ }
/* Continue into page array */
- idx = buf->page_base / PAGE_SIZE;
- base = buf->page_base & ~PAGE_MASK;
- while (csum_len) {
- p = page_address(buf->pages[idx]) + base;
- len = min_t(size_t, PAGE_SIZE - base, csum_len);
+ idx = subbuf.page_base / PAGE_SIZE;
+ base = subbuf.page_base & ~PAGE_MASK;
+ while (remaining) {
+ p = page_address(subbuf.pages[idx]) + base;
+ len = min_t(unsigned int, PAGE_SIZE - base, remaining);
csum = csum_partial(p, len, csum);
- csum_len -= len;
+ remaining -= len;
base = 0;
++idx;
}
/**
* nfsd_cache_lookup - Find an entry in the duplicate reply cache
* @rqstp: Incoming Call to find
+ * @start: starting byte in @rqstp->rq_arg of the NFS Call header
+ * @len: size of the NFS Call header, in bytes
* @cacherep: OUT: DRC entry for this request
*
* Try to find an entry matching the current call in the cache. When none
* %RC_REPLY: Reply from cache
* %RC_DROPIT: Do not process the request further
*/
-int nfsd_cache_lookup(struct svc_rqst *rqstp, struct nfsd_cacherep **cacherep)
+int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start,
+ unsigned int len, struct nfsd_cacherep **cacherep)
{
struct nfsd_net *nn;
struct nfsd_cacherep *rp, *found;
goto out;
}
- csum = nfsd_cache_csum(rqstp);
+ csum = nfsd_cache_csum(&rqstp->rq_arg, start, len);
/*
* Since the common case is a cache miss followed by an insert,
return;
}
-/*
- * Copy cached reply to current reply buffer. Should always fit.
- * FIXME as reply is in a page, we should just attach the page, and
- * keep a refcount....
- */
static int
nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *data)
{
- struct kvec *vec = &rqstp->rq_res.head[0];
-
- if (vec->iov_len + data->iov_len > PAGE_SIZE) {
- printk(KERN_WARNING "nfsd: cached reply too large (%zd).\n",
- data->iov_len);
- return 0;
- }
- memcpy((char*)vec->iov_base + vec->iov_len, data->iov_base, data->iov_len);
- vec->iov_len += data->iov_len;
- return 1;
+ __be32 *p;
+
+ p = xdr_reserve_space(&rqstp->rq_res_stream, data->iov_len);
+ if (unlikely(!p))
+ return false;
+ memcpy(p, data->iov_base, data->iov_len);
+ xdr_commit_encode(&rqstp->rq_res_stream);
+ return true;
}
/*
const struct svc_procedure *proc = rqstp->rq_procinfo;
__be32 *statp = rqstp->rq_accept_statp;
struct nfsd_cacherep *rp;
+ unsigned int start, len;
+ __be32 *nfs_reply;
/*
* Give the xdr decoder a chance to change this if it wants
*/
rqstp->rq_cachetype = proc->pc_cachetype;
+ /*
+ * ->pc_decode advances the argument stream past the NFS
+ * Call header, so grab the header's starting location and
+ * size now for the call to nfsd_cache_lookup().
+ */
+ start = xdr_stream_pos(&rqstp->rq_arg_stream);
+ len = xdr_stream_remaining(&rqstp->rq_arg_stream);
if (!proc->pc_decode(rqstp, &rqstp->rq_arg_stream))
goto out_decode_err;
smp_store_release(&rqstp->rq_status_counter, rqstp->rq_status_counter | 1);
rp = NULL;
- switch (nfsd_cache_lookup(rqstp, &rp)) {
+ switch (nfsd_cache_lookup(rqstp, start, len, &rp)) {
case RC_DOIT:
break;
case RC_REPLY:
goto out_dropit;
}
+ nfs_reply = xdr_inline_decode(&rqstp->rq_res_stream, 0);
*statp = proc->pc_func(rqstp);
if (test_bit(RQ_DROPME, &rqstp->rq_flags))
goto out_update_drop;
*/
smp_store_release(&rqstp->rq_status_counter, rqstp->rq_status_counter + 1);
- nfsd_cache_update(rqstp, rp, rqstp->rq_cachetype, statp + 1);
+ nfsd_cache_update(rqstp, rp, rqstp->rq_cachetype, nfs_reply);
out_cached_reply:
return 1;
struct ovl_fs_context *ctx = fc->fs_private;
struct ovl_fs_context_layer *l;
char *dup = NULL, *iter;
- ssize_t nr_lower = 0, nr = 0, nr_data = 0;
+ ssize_t nr_lower, nr;
bool data_layer = false;
/*
iter = dup;
l = ctx->lower;
for (nr = 0; nr < nr_lower; nr++, l++) {
+ ctx->nr++;
memset(l, 0, sizeof(*l));
err = ovl_mount_dir(iter, &l->path);
goto out_put;
if (data_layer)
- nr_data++;
+ ctx->nr_data++;
/* Calling strchr() again would overrun. */
- if ((nr + 1) == nr_lower)
+ if (ctx->nr == nr_lower)
break;
err = -EINVAL;
* This is a regular layer so we require that
* there are no data layers.
*/
- if ((ctx->nr_data + nr_data) > 0) {
+ if (ctx->nr_data > 0) {
pr_err("regular lower layers cannot follow data lower layers");
goto out_put;
}
data_layer = true;
iter++;
}
- ctx->nr = nr_lower;
- ctx->nr_data += nr_data;
kfree(dup);
return 0;
return 0;
}
-/**
+/*
* Caller must hold a reference to inode to prevent it from being freed while
* it is marked inuse.
*/
* strlen(";sec=ntlmsspi") */
#define MAX_MECH_STR_LEN 13
-/* strlen of "host=" */
-#define HOST_KEY_LEN 5
+/* strlen of ";host=" */
+#define HOST_KEY_LEN 6
/* strlen of ";ip4=" or ";ip6=" */
#define IP_KEY_LEN 5
ses->chans[i].server = NULL;
}
+ /* we now account for primary channel in iface->refcount */
+ if (ses->chans[0].iface) {
+ kref_put(&ses->chans[0].iface->refcount, release_iface);
+ ses->chans[0].server = NULL;
+ }
+
sesInfoFree(ses);
cifs_put_tcp_session(server, 0);
}
iface = ses->chans[i].iface;
server = ses->chans[i].server;
+ /*
+ * remove these references first, since we need to unlock
+ * the chan_lock here, since iface_lock is a higher lock
+ */
+ ses->chans[i].iface = NULL;
+ ses->chans[i].server = NULL;
+ spin_unlock(&ses->chan_lock);
+
if (iface) {
spin_lock(&ses->iface_lock);
kref_put(&iface->refcount, release_iface);
- ses->chans[i].iface = NULL;
iface->num_channels--;
if (iface->weight_fulfilled)
iface->weight_fulfilled--;
spin_unlock(&ses->iface_lock);
}
- spin_unlock(&ses->chan_lock);
- if (server && !server->terminate) {
- server->terminate = true;
- cifs_signal_cifsd_for_reconnect(server, false);
- }
- spin_lock(&ses->chan_lock);
-
if (server) {
- ses->chans[i].server = NULL;
+ if (!server->terminate) {
+ server->terminate = true;
+ cifs_signal_cifsd_for_reconnect(server, false);
+ }
cifs_put_tcp_session(server, false);
}
+ spin_lock(&ses->chan_lock);
}
done:
ptriplet->encryption.context,
ses->smb3encryptionkey,
SMB3_ENC_DEC_KEY_SIZE);
+ if (rc)
+ return rc;
rc = generate_key(ses, ptriplet->decryption.label,
ptriplet->decryption.context,
ses->smb3decryptionkey,
return rc;
}
- if (rc)
- return rc;
-
#ifdef CONFIG_CIFS_DEBUG_DUMP_KEYS
cifs_dbg(VFS, "%s: dumping generated AES session keys\n", __func__);
/*
bool "XFS online metadata check usage data collection"
default y
depends on XFS_ONLINE_SCRUB
- select XFS_DEBUG
+ select DEBUG_FS
help
If you say Y here, the kernel will gather usage data about
the online metadata check subsystem. This includes the number
ASSERT(mp->m_alloc_maxlevels > 0);
+ /*
+ * For a btree shorter than the maximum height, the worst case is that
+ * every level gets split and a new level is added, then while inserting
+ * another entry to refill the AGFL, every level under the old root gets
+ * split again. This is:
+ *
+ * (full height split reservation) + (AGFL refill split height)
+ * = (current height + 1) + (current height - 1)
+ * = (new height) + (new height - 2)
+ * = 2 * new height - 2
+ *
+ * For a btree of maximum height, the worst case is that every level
+ * under the root gets split, then while inserting another entry to
+ * refill the AGFL, every level under the root gets split again. This is
+ * also:
+ *
+ * 2 * (current height - 1)
+ * = 2 * (new height - 1)
+ * = 2 * new height - 2
+ */
+
/* space needed by-bno freespace btree */
min_free = min_t(unsigned int, levels[XFS_BTNUM_BNOi] + 1,
- mp->m_alloc_maxlevels);
+ mp->m_alloc_maxlevels) * 2 - 2;
/* space needed by-size freespace btree */
min_free += min_t(unsigned int, levels[XFS_BTNUM_CNTi] + 1,
- mp->m_alloc_maxlevels);
+ mp->m_alloc_maxlevels) * 2 - 2;
/* space needed reverse mapping used space btree */
if (xfs_has_rmapbt(mp))
min_free += min_t(unsigned int, levels[XFS_BTNUM_RMAPi] + 1,
- mp->m_rmap_maxlevels);
+ mp->m_rmap_maxlevels) * 2 - 2;
return min_free;
}
return ret;
}
-/* Abort all the intents that were committed. */
STATIC void
-xfs_defer_trans_abort(
- struct xfs_trans *tp,
- struct list_head *dop_pending)
+xfs_defer_pending_abort(
+ struct xfs_mount *mp,
+ struct list_head *dop_list)
{
struct xfs_defer_pending *dfp;
const struct xfs_defer_op_type *ops;
- trace_xfs_defer_trans_abort(tp, _RET_IP_);
-
/* Abort intent items that don't have a done item. */
- list_for_each_entry(dfp, dop_pending, dfp_list) {
+ list_for_each_entry(dfp, dop_list, dfp_list) {
ops = defer_op_types[dfp->dfp_type];
- trace_xfs_defer_pending_abort(tp->t_mountp, dfp);
+ trace_xfs_defer_pending_abort(mp, dfp);
if (dfp->dfp_intent && !dfp->dfp_done) {
ops->abort_intent(dfp->dfp_intent);
dfp->dfp_intent = NULL;
}
}
+/* Abort all the intents that were committed. */
+STATIC void
+xfs_defer_trans_abort(
+ struct xfs_trans *tp,
+ struct list_head *dop_pending)
+{
+ trace_xfs_defer_trans_abort(tp, _RET_IP_);
+ xfs_defer_pending_abort(tp->t_mountp, dop_pending);
+}
+
/*
* Capture resources that the caller said not to release ("held") when the
* transaction commits. Caller is responsible for zero-initializing @dres.
/* Release all resources that we used to capture deferred ops. */
void
-xfs_defer_ops_capture_free(
+xfs_defer_ops_capture_abort(
struct xfs_mount *mp,
struct xfs_defer_capture *dfc)
{
unsigned short i;
+ xfs_defer_pending_abort(mp, &dfc->dfc_dfops);
xfs_defer_cancel_list(mp, &dfc->dfc_dfops);
for (i = 0; i < dfc->dfc_held.dr_bufs; i++)
/* Commit the transaction and add the capture structure to the list. */
error = xfs_trans_commit(tp);
if (error) {
- xfs_defer_ops_capture_free(mp, dfc);
+ xfs_defer_ops_capture_abort(mp, dfc);
return error;
}
struct list_head *capture_list);
void xfs_defer_ops_continue(struct xfs_defer_capture *d, struct xfs_trans *tp,
struct xfs_defer_resources *dres);
-void xfs_defer_ops_capture_free(struct xfs_mount *mp,
+void xfs_defer_ops_capture_abort(struct xfs_mount *mp,
struct xfs_defer_capture *d);
void xfs_defer_resources_rele(struct xfs_defer_resources *dres);
if (mode && nextents + naextents > nblocks)
return __this_address;
+ if (nextents + naextents == 0 && nblocks != 0)
+ return __this_address;
+
if (S_ISDIR(mode) && nextents > mp->m_dir_geo->max_extents)
return __this_address;
struct xfs_log_dinode *ldip;
uint isize;
int need_free = 0;
+ xfs_failaddr_t fa;
if (item->ri_buf[0].i_len == sizeof(struct xfs_inode_log_format)) {
in_f = item->ri_buf[0].i_addr;
* superblock flag to determine whether we need to look at di_flushiter
* to skip replay when the on disk inode is newer than the log one
*/
- if (!xfs_has_v3inodes(mp) &&
- ldip->di_flushiter < be16_to_cpu(dip->di_flushiter)) {
- /*
- * Deal with the wrap case, DI_MAX_FLUSH is less
- * than smaller numbers
- */
- if (be16_to_cpu(dip->di_flushiter) == DI_MAX_FLUSH &&
- ldip->di_flushiter < (DI_MAX_FLUSH >> 1)) {
- /* do nothing */
- } else {
- trace_xfs_log_recover_inode_skip(log, in_f);
- error = 0;
- goto out_release;
+ if (!xfs_has_v3inodes(mp)) {
+ if (ldip->di_flushiter < be16_to_cpu(dip->di_flushiter)) {
+ /*
+ * Deal with the wrap case, DI_MAX_FLUSH is less
+ * than smaller numbers
+ */
+ if (be16_to_cpu(dip->di_flushiter) == DI_MAX_FLUSH &&
+ ldip->di_flushiter < (DI_MAX_FLUSH >> 1)) {
+ /* do nothing */
+ } else {
+ trace_xfs_log_recover_inode_skip(log, in_f);
+ error = 0;
+ goto out_release;
+ }
}
+
+ /* Take the opportunity to reset the flush iteration count */
+ ldip->di_flushiter = 0;
}
- /* Take the opportunity to reset the flush iteration count */
- ldip->di_flushiter = 0;
if (unlikely(S_ISREG(ldip->di_mode))) {
if ((ldip->di_format != XFS_DINODE_FMT_EXTENTS) &&
(dip->di_mode != 0))
error = xfs_recover_inode_owner_change(mp, dip, in_f,
buffer_list);
- /* re-generate the checksum. */
+ /* re-generate the checksum and validate the recovered inode. */
xfs_dinode_calc_crc(log->l_mp, dip);
+ fa = xfs_dinode_verify(log->l_mp, in_f->ilf_ino, dip);
+ if (fa) {
+ XFS_CORRUPTION_ERROR(
+ "Bad dinode after recovery",
+ XFS_ERRLEVEL_LOW, mp, dip, sizeof(*dip));
+ xfs_alert(mp,
+ "Metadata corruption detected at %pS, inode 0x%llx",
+ fa, in_f->ilf_ino);
+ error = -EFSCORRUPTED;
+ goto out_release;
+ }
ASSERT(bp->b_mount == mp);
bp->b_flags |= _XBF_LOGRECOVERY;
* the buffer manually, the code needs to be kept in sync
* with the I/O completion path.
*/
- xlog_state_done_syncing(iclog);
- up(&iclog->ic_sema);
- return;
+ goto sync;
}
/*
* avoid shutdown re-entering this path and erroring out again.
*/
if (log->l_targ != log->l_mp->m_ddev_targp &&
- blkdev_issue_flush(log->l_mp->m_ddev_targp->bt_bdev)) {
- xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR);
- return;
- }
+ blkdev_issue_flush(log->l_mp->m_ddev_targp->bt_bdev))
+ goto shutdown;
}
if (iclog->ic_flags & XLOG_ICL_NEED_FUA)
iclog->ic_bio.bi_opf |= REQ_FUA;
iclog->ic_flags &= ~(XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA);
- if (xlog_map_iclog_data(&iclog->ic_bio, iclog->ic_data, count)) {
- xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR);
- return;
- }
+ if (xlog_map_iclog_data(&iclog->ic_bio, iclog->ic_data, count))
+ goto shutdown;
+
if (is_vmalloc_addr(iclog->ic_data))
flush_kernel_vmap_range(iclog->ic_data, count);
}
submit_bio(&iclog->ic_bio);
+ return;
+shutdown:
+ xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR);
+sync:
+ xlog_state_done_syncing(iclog);
+ up(&iclog->ic_sema);
}
/*
list_for_each_entry_safe(dfc, next, capture_list, dfc_list) {
list_del_init(&dfc->dfc_list);
- xfs_defer_ops_capture_free(mp, dfc);
+ xfs_defer_ops_capture_abort(mp, dfc);
}
}
}
}
del = got;
+ xfs_trim_extent(&del, *offset_fsb, end_fsb - *offset_fsb);
/* Grab the corresponding mapping in the data fork. */
nmaps = 1;
extern spinlock_t btf_idr_lock;
extern struct kobject *btf_kobj;
extern struct bpf_mem_alloc bpf_global_ma, bpf_global_percpu_ma;
-extern bool bpf_global_ma_set, bpf_global_percpu_ma_set;
+extern bool bpf_global_ma_set;
typedef u64 (*bpf_callback_t)(u64, u64, u64, u64, u64);
typedef int (*bpf_iter_init_seq_priv_t)(void *private_data,
aux->ctx_field_size = size;
}
+static bool bpf_is_ldimm64(const struct bpf_insn *insn)
+{
+ return insn->code == (BPF_LD | BPF_IMM | BPF_DW);
+}
+
static inline bool bpf_pseudo_func(const struct bpf_insn *insn)
{
- return insn->code == (BPF_LD | BPF_IMM | BPF_DW) &&
- insn->src_reg == BPF_PSEUDO_FUNC;
+ return bpf_is_ldimm64(insn) && insn->src_reg == BPF_PSEUDO_FUNC;
}
struct bpf_prog_ops {
CPUHP_AP_ARM_CORESIGHT_CTI_STARTING,
CPUHP_AP_ARM64_ISNDEP_STARTING,
CPUHP_AP_SMPCFD_DYING,
+ CPUHP_AP_HRTIMERS_DYING,
CPUHP_AP_X86_TBOOT_DYING,
CPUHP_AP_ARM_CACHE_B15_RAC_DYING,
CPUHP_AP_ONLINE,
int hrtimers_prepare_cpu(unsigned int cpu);
#ifdef CONFIG_HOTPLUG_CPU
-int hrtimers_dead_cpu(unsigned int cpu);
+int hrtimers_cpu_dying(unsigned int cpu);
#else
-#define hrtimers_dead_cpu NULL
+#define hrtimers_cpu_dying NULL
#endif
#endif
* A function that translates value of following registers to the linkmode:
* IEEE 802.3-2018 45.2.3.10 "EEE control and capability 1" register (3.20)
* IEEE 802.3-2018 45.2.7.13 "EEE advertisement 1" register (7.60)
- * IEEE 802.3-2018 45.2.7.14 "EEE "link partner ability 1 register (7.61)
+ * IEEE 802.3-2018 45.2.7.14 "EEE link partner ability 1" register (7.61)
*/
static inline void mii_eee_cap1_mod_linkmode_t(unsigned long *adv, u32 val)
{
};
/*
- * ,-----------------------[1:n]----------------------.
- * V V
- * perf_event_context <-[1:n]-> perf_event_pmu_context <--- perf_event
- * ^ ^ | |
- * `--------[1:n]---------' `-[n:1]-> pmu <-[1:n]-'
+ * ,-----------------------[1:n]------------------------.
+ * V V
+ * perf_event_context <-[1:n]-> perf_event_pmu_context <-[1:n]- perf_event
+ * | |
+ * `--[n:1]-> pmu <-[1:n]--'
*
*
* struct perf_event_pmu_context lifetime is refcount based and RCU freed
* ctx->mutex pinning the configuration. Since we hold a reference on
* group_leader (through the filedesc) it can't go away, therefore it's
* associated pmu_ctx must exist and cannot change due to ctx->mutex.
+ *
+ * perf_event holds a refcount on perf_event_context
+ * perf_event holds a refcount on perf_event_pmu_context
*/
struct perf_event_pmu_context {
struct pmu *pmu;
#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
#include <asm/stacktrace.h>
+#include <linux/linkage.h>
/*
* The lowest address on tsk's stack which we can plausibly erase.
# endif
}
+asmlinkage void noinstr stackleak_erase(void);
+asmlinkage void noinstr stackleak_erase_on_task_stack(void);
+asmlinkage void noinstr stackleak_erase_off_task_stack(void);
+void __no_caller_saved_registers noinstr stackleak_track_stack(void);
+
#else /* !CONFIG_GCC_PLUGIN_STACKLEAK */
static inline void stackleak_task_init(struct task_struct *t) { }
#endif
#include <linux/pci.h>
#include <linux/virtio_pci.h>
-struct virtio_pci_modern_common_cfg {
- struct virtio_pci_common_cfg cfg;
-
- __le16 queue_notify_data; /* read-write */
- __le16 queue_reset; /* read-write */
-};
-
/**
* struct virtio_pci_modern_device - info for modern PCI virtio
* @pci_dev: Ptr to the PCI device struct
return *(__force __be32 *)sreg;
}
-static inline void nft_reg_store64(u32 *dreg, u64 val)
+static inline void nft_reg_store64(u64 *dreg, u64 val)
{
- put_unaligned(val, (u64 *)dreg);
+ put_unaligned(val, dreg);
}
static inline u64 nft_reg_load64(const u32 *sreg)
return to_ct_params(a)->nf_ft;
}
+static inline struct nf_conntrack_helper *tcf_ct_helper(const struct tc_action *a)
+{
+ return to_ct_params(a)->helper;
+}
+
#else
static inline uint16_t tcf_ct_zone(const struct tc_action *a) { return 0; }
static inline int tcf_ct_action(const struct tc_action *a) { return 0; }
{
return NULL;
}
+static inline struct nf_conntrack_helper *tcf_ct_helper(const struct tc_action *a)
+{
+ return NULL;
+}
#endif /* CONFIG_NF_CONNTRACK */
#if IS_ENABLED(CONFIG_NET_ACT_CT)
*/
#define BTRFS_METADATA_ITEM_KEY 169
+/*
+ * Special inline ref key which stores the id of the subvolume which originally
+ * created the extent. This subvolume owns the extent permanently from the
+ * perspective of simple quotas. Needed to know which subvolume to free quota
+ * usage from when the extent is deleted.
+ *
+ * Stored as an inline ref rather to avoid wasting space on a separate item on
+ * top of the existing extent item. However, unlike the other inline refs,
+ * there is one one owner ref per extent rather than one per extent.
+ *
+ * Because of this, it goes at the front of the list of inline refs, and thus
+ * must have a lower type value than any other inline ref type (to satisfy the
+ * disk format rule that inline refs have non-decreasing type).
+ */
+#define BTRFS_EXTENT_OWNER_REF_KEY 172
+
#define BTRFS_TREE_BLOCK_REF_KEY 176
#define BTRFS_EXTENT_DATA_REF_KEY 178
#define BTRFS_SHARED_DATA_REF_KEY 184
-/*
- * Special inline ref key which stores the id of the subvolume which originally
- * created the extent. This subvolume owns the extent permanently from the
- * perspective of simple quotas. Needed to know which subvolume to free quota
- * usage from when the extent is deleted.
- */
-#define BTRFS_EXTENT_OWNER_REF_KEY 188
-
/*
* block groups give us hints into the extent allocation trees. Which
* blocks are free etc etc
__le32 queue_used_hi; /* read-write */
};
+/*
+ * Warning: do not use sizeof on this: use offsetofend for
+ * specific fields you need.
+ */
+struct virtio_pci_modern_common_cfg {
+ struct virtio_pci_common_cfg cfg;
+
+ __le16 queue_notify_data; /* read-write */
+ __le16 queue_reset; /* read-write */
+};
+
/* Fields in VIRTIO_PCI_CAP_PCI_CFG: */
struct virtio_pci_cfg_cap {
struct virtio_pci_cap cap;
/* Clear an irq's pending state, in preparation for polling on it */
void xen_clear_irq_pending(int irq);
-void xen_set_irq_pending(int irq);
bool xen_test_irq_pending(int irq);
/* Poll waiting for an irq to become pending. In the usual case, the
/* Determine the IRQ which is bound to an event channel */
unsigned int irq_from_evtchn(evtchn_port_t evtchn);
-int irq_from_virq(unsigned int cpu, unsigned int virq);
-evtchn_port_t evtchn_from_irq(unsigned irq);
+int irq_evtchn_from_virq(unsigned int cpu, unsigned int virq,
+ evtchn_port_t *evtchn);
int xen_set_callback_via(uint64_t via);
int xen_evtchn_do_upcall(void);
/* De-allocates the above mentioned physical interrupt. */
int xen_destroy_irq(int irq);
-/* Return irq from pirq */
-int xen_irq_from_pirq(unsigned pirq);
-
/* Return the pirq allocated to the irq. */
int xen_pirq_from_irq(unsigned irq);
if (has_lock && (ctx->flags & IORING_SETUP_SQPOLL)) {
struct io_sq_data *sq = ctx->sq_data;
- if (mutex_trylock(&sq->lock)) {
- if (sq->thread) {
- sq_pid = task_pid_nr(sq->thread);
- sq_cpu = task_cpu(sq->thread);
- }
- mutex_unlock(&sq->lock);
- }
+ sq_pid = sq->task_pid;
+ sq_cpu = sq->sq_cpu;
}
seq_printf(m, "SqThread:\t%d\n", sq_pid);
did_sig = get_signal(&ksig);
cond_resched();
mutex_lock(&sqd->lock);
+ sqd->sq_cpu = raw_smp_processor_id();
}
return did_sig || test_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state);
}
snprintf(buf, sizeof(buf), "iou-sqp-%d", sqd->task_pid);
set_task_comm(current, buf);
- if (sqd->sq_cpu != -1)
+ /* reset to our pid after we've set task_comm, for fdinfo */
+ sqd->task_pid = current->pid;
+
+ if (sqd->sq_cpu != -1) {
set_cpus_allowed_ptr(current, cpumask_of(sqd->sq_cpu));
- else
+ } else {
set_cpus_allowed_ptr(current, cpu_online_mask);
+ sqd->sq_cpu = raw_smp_processor_id();
+ }
mutex_lock(&sqd->lock);
while (1) {
mutex_unlock(&sqd->lock);
cond_resched();
mutex_lock(&sqd->lock);
+ sqd->sq_cpu = raw_smp_processor_id();
}
continue;
}
mutex_unlock(&sqd->lock);
schedule();
mutex_lock(&sqd->lock);
+ sqd->sq_cpu = raw_smp_processor_id();
}
list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
atomic_andnot(IORING_SQ_NEED_WAKEUP,
if (tsk != current)
return 0;
- if (WARN_ON_ONCE(!current->mm))
+ if (!current->mm)
return 0;
exe_file = get_mm_exe_file(current->mm);
if (!exe_file)
#define OFF insn->off
#define IMM insn->imm
-struct bpf_mem_alloc bpf_global_ma, bpf_global_percpu_ma;
-bool bpf_global_ma_set, bpf_global_percpu_ma_set;
+struct bpf_mem_alloc bpf_global_ma;
+bool bpf_global_ma_set;
/* No hurry in this branch
*
ret = bpf_mem_alloc_init(&bpf_global_ma, 0, false);
bpf_global_ma_set = !ret;
- ret = bpf_mem_alloc_init(&bpf_global_percpu_ma, 0, true);
- bpf_global_percpu_ma_set = !ret;
- return !bpf_global_ma_set || !bpf_global_percpu_ma_set;
+ return ret;
}
late_initcall(bpf_global_ma_init);
#endif
#include <linux/poison.h>
#include <linux/module.h>
#include <linux/cpumask.h>
+#include <linux/bpf_mem_alloc.h>
#include <net/xdp.h>
#include "disasm.h"
#undef BPF_LINK_TYPE
};
+struct bpf_mem_alloc bpf_global_percpu_ma;
+static bool bpf_global_percpu_ma_set;
+
/* bpf_check() is a static code analyzer that walks eBPF program
* instruction by instruction and updates register/stack state.
* All paths of conditional branches are analyzed until 'bpf_exit' insn.
struct btf *btf_vmlinux;
static DEFINE_MUTEX(bpf_verifier_lock);
+static DEFINE_MUTEX(bpf_percpu_ma_lock);
static const struct bpf_line_info *
find_linfo(const struct bpf_verifier_env *env, u32 insn_off)
/* Backtrack one insn at a time. If idx is not at the top of recorded
* history then previous instruction came from straight line execution.
+ * Return -ENOENT if we exhausted all instructions within given state.
+ *
+ * It's legal to have a bit of a looping with the same starting and ending
+ * insn index within the same state, e.g.: 3->4->5->3, so just because current
+ * instruction index is the same as state's first_idx doesn't mean we are
+ * done. If there is still some jump history left, we should keep going. We
+ * need to take into account that we might have a jump history between given
+ * state's parent and itself, due to checkpointing. In this case, we'll have
+ * history entry recording a jump from last instruction of parent state and
+ * first instruction of given state.
*/
static int get_prev_insn_idx(struct bpf_verifier_state *st, int i,
u32 *history)
{
u32 cnt = *history;
+ if (i == st->first_insn_idx) {
+ if (cnt == 0)
+ return -ENOENT;
+ if (cnt == 1 && st->jmp_history[0].idx == i)
+ return -ENOENT;
+ }
+
if (cnt && st->jmp_history[cnt - 1].idx == i) {
i = st->jmp_history[cnt - 1].prev_idx;
(*history)--;
* Nothing to be tracked further in the parent state.
*/
return 0;
- if (i == first_idx)
- break;
subseq_idx = i;
i = get_prev_insn_idx(st, i, &history);
+ if (i == -ENOENT)
+ break;
if (i >= env->prog->len) {
/* This can happen if backtracking reached insn 0
* and there are still reg_mask or stack_mask
if (meta.func_id == special_kfunc_list[KF_bpf_obj_new_impl] && !bpf_global_ma_set)
return -ENOMEM;
- if (meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl] && !bpf_global_percpu_ma_set)
- return -ENOMEM;
+ if (meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
+ if (!bpf_global_percpu_ma_set) {
+ mutex_lock(&bpf_percpu_ma_lock);
+ if (!bpf_global_percpu_ma_set) {
+ err = bpf_mem_alloc_init(&bpf_global_percpu_ma, 0, true);
+ if (!err)
+ bpf_global_percpu_ma_set = true;
+ }
+ mutex_unlock(&bpf_percpu_ma_lock);
+ if (err)
+ return err;
+ }
+ }
if (((u64)(u32)meta.arg_constant.value) != meta.arg_constant.value) {
verbose(env, "local type ID argument must be in range [0, U32_MAX]\n");
* w - next instruction
* e - edge
*/
-static int push_insn(int t, int w, int e, struct bpf_verifier_env *env,
- bool loop_ok)
+static int push_insn(int t, int w, int e, struct bpf_verifier_env *env)
{
int *insn_stack = env->cfg.insn_stack;
int *insn_state = env->cfg.insn_state;
insn_stack[env->cfg.cur_stack++] = w;
return KEEP_EXPLORING;
} else if ((insn_state[w] & 0xF0) == DISCOVERED) {
- if (loop_ok && env->bpf_capable)
+ if (env->bpf_capable)
return DONE_EXPLORING;
verbose_linfo(env, t, "%d: ", t);
verbose_linfo(env, w, "%d: ", w);
struct bpf_verifier_env *env,
bool visit_callee)
{
- int ret;
+ int ret, insn_sz;
- ret = push_insn(t, t + 1, FALLTHROUGH, env, false);
+ insn_sz = bpf_is_ldimm64(&insns[t]) ? 2 : 1;
+ ret = push_insn(t, t + insn_sz, FALLTHROUGH, env);
if (ret)
return ret;
- mark_prune_point(env, t + 1);
+ mark_prune_point(env, t + insn_sz);
/* when we exit from subprog, we need to record non-linear history */
- mark_jmp_point(env, t + 1);
+ mark_jmp_point(env, t + insn_sz);
if (visit_callee) {
mark_prune_point(env, t);
- ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env,
- /* It's ok to allow recursion from CFG point of
- * view. __check_func_call() will do the actual
- * check.
- */
- bpf_pseudo_func(insns + t));
+ ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env);
}
return ret;
}
static int visit_insn(int t, struct bpf_verifier_env *env)
{
struct bpf_insn *insns = env->prog->insnsi, *insn = &insns[t];
- int ret, off;
+ int ret, off, insn_sz;
if (bpf_pseudo_func(insn))
return visit_func_call_insn(t, insns, env, true);
/* All non-branch instructions have a single fall-through edge. */
if (BPF_CLASS(insn->code) != BPF_JMP &&
- BPF_CLASS(insn->code) != BPF_JMP32)
- return push_insn(t, t + 1, FALLTHROUGH, env, false);
+ BPF_CLASS(insn->code) != BPF_JMP32) {
+ insn_sz = bpf_is_ldimm64(insn) ? 2 : 1;
+ return push_insn(t, t + insn_sz, FALLTHROUGH, env);
+ }
switch (BPF_OP(insn->code)) {
case BPF_EXIT:
off = insn->imm;
/* unconditional jump with single edge */
- ret = push_insn(t, t + off + 1, FALLTHROUGH, env,
- true);
+ ret = push_insn(t, t + off + 1, FALLTHROUGH, env);
if (ret)
return ret;
/* conditional jump with two edges */
mark_prune_point(env, t);
- ret = push_insn(t, t + 1, FALLTHROUGH, env, true);
+ ret = push_insn(t, t + 1, FALLTHROUGH, env);
if (ret)
return ret;
- return push_insn(t, t + insn->off + 1, BRANCH, env, true);
+ return push_insn(t, t + insn->off + 1, BRANCH, env);
}
}
}
for (i = 0; i < insn_cnt; i++) {
+ struct bpf_insn *insn = &env->prog->insnsi[i];
+
if (insn_state[i] != EXPLORED) {
verbose(env, "unreachable insn %d\n", i);
ret = -EINVAL;
goto err_free;
}
+ if (bpf_is_ldimm64(insn)) {
+ if (insn_state[i + 1] != 0) {
+ verbose(env, "jump into the middle of ldimm64 insn %d\n", i);
+ ret = -EINVAL;
+ goto err_free;
+ }
+ i++; /* skip second half of ldimm64 */
+ }
}
ret = 0; /* cfg looks good */
return psi_trigger_poll(&ctx->psi.trigger, of->file, pt);
}
-static int cgroup_pressure_open(struct kernfs_open_file *of)
-{
- if (of->file->f_mode & FMODE_WRITE && !capable(CAP_SYS_RESOURCE))
- return -EPERM;
-
- return 0;
-}
-
static void cgroup_pressure_release(struct kernfs_open_file *of)
{
struct cgroup_file_ctx *ctx = of->priv;
{
.name = "io.pressure",
.file_offset = offsetof(struct cgroup, psi_files[PSI_IO]),
- .open = cgroup_pressure_open,
.seq_show = cgroup_io_pressure_show,
.write = cgroup_io_pressure_write,
.poll = cgroup_pressure_poll,
{
.name = "memory.pressure",
.file_offset = offsetof(struct cgroup, psi_files[PSI_MEM]),
- .open = cgroup_pressure_open,
.seq_show = cgroup_memory_pressure_show,
.write = cgroup_memory_pressure_write,
.poll = cgroup_pressure_poll,
{
.name = "cpu.pressure",
.file_offset = offsetof(struct cgroup, psi_files[PSI_CPU]),
- .open = cgroup_pressure_open,
.seq_show = cgroup_cpu_pressure_show,
.write = cgroup_cpu_pressure_write,
.poll = cgroup_pressure_poll,
{
.name = "irq.pressure",
.file_offset = offsetof(struct cgroup, psi_files[PSI_IRQ]),
- .open = cgroup_pressure_open,
.seq_show = cgroup_irq_pressure_show,
.write = cgroup_irq_pressure_write,
.poll = cgroup_pressure_poll,
[CPUHP_HRTIMERS_PREPARE] = {
.name = "hrtimers:prepare",
.startup.single = hrtimers_prepare_cpu,
- .teardown.single = hrtimers_dead_cpu,
+ .teardown.single = NULL,
},
[CPUHP_SMPCFD_PREPARE] = {
.name = "smpcfd:prepare",
.startup.single = NULL,
.teardown.single = smpcfd_dying_cpu,
},
+ [CPUHP_AP_HRTIMERS_DYING] = {
+ .name = "hrtimers:dying",
+ .startup.single = NULL,
+ .teardown.single = hrtimers_cpu_dying,
+ },
+
/* Entry state on starting. Interrupts enabled from here on. Transient
* state for synchronsization */
[CPUHP_AP_ONLINE] = {
void *task_ctx_data = NULL;
if (!ctx->task) {
+ /*
+ * perf_pmu_migrate_context() / __perf_pmu_install_event()
+ * relies on the fact that find_get_pmu_context() cannot fail
+ * for CPU contexts.
+ */
struct perf_cpu_pmu_context *cpc;
cpc = per_cpu_ptr(pmu->cpu_pmu_context, event->cpu);
int cpu, struct perf_event *event)
{
struct perf_event_pmu_context *epc;
+ struct perf_event_context *old_ctx = event->ctx;
+
+ get_ctx(ctx); /* normally find_get_context() */
event->cpu = cpu;
epc = find_get_pmu_context(pmu, ctx, event);
if (event->state >= PERF_EVENT_STATE_OFF)
event->state = PERF_EVENT_STATE_INACTIVE;
perf_install_in_context(ctx, event, cpu);
+
+ /*
+ * Now that event->ctx is updated and visible, put the old ctx.
+ */
+ put_ctx(old_ctx);
}
static void __perf_pmu_install(struct perf_event_context *ctx,
struct perf_event_context *src_ctx, *dst_ctx;
LIST_HEAD(events);
+ /*
+ * Since per-cpu context is persistent, no need to grab an extra
+ * reference.
+ */
src_ctx = &per_cpu_ptr(&perf_cpu_context, src_cpu)->ctx;
dst_ctx = &per_cpu_ptr(&perf_cpu_context, dst_cpu)->ctx;
owner = uval & FUTEX_TID_MASK;
if (pending_op && !pi && !owner) {
- futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
+ futex_wake(uaddr, FLAGS_SIZE_32 | FLAGS_SHARED, 1,
+ FUTEX_BITSET_MATCH_ANY);
return 0;
}
* Wake robust non-PI futexes here. The wakeup of
* PI futexes happens in exit_pi_state():
*/
- if (!pi && (uval & FUTEX_WAITERS))
- futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
+ if (!pi && (uval & FUTEX_WAITERS)) {
+ futex_wake(uaddr, FLAGS_SIZE_32 | FLAGS_SHARED, 1,
+ FUTEX_BITSET_MATCH_ANY);
+ }
return 0;
}
dequeue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) { }
#endif
+static void reweight_eevdf(struct cfs_rq *cfs_rq, struct sched_entity *se,
+ unsigned long weight)
+{
+ unsigned long old_weight = se->load.weight;
+ u64 avruntime = avg_vruntime(cfs_rq);
+ s64 vlag, vslice;
+
+ /*
+ * VRUNTIME
+ * ========
+ *
+ * COROLLARY #1: The virtual runtime of the entity needs to be
+ * adjusted if re-weight at !0-lag point.
+ *
+ * Proof: For contradiction assume this is not true, so we can
+ * re-weight without changing vruntime at !0-lag point.
+ *
+ * Weight VRuntime Avg-VRuntime
+ * before w v V
+ * after w' v' V'
+ *
+ * Since lag needs to be preserved through re-weight:
+ *
+ * lag = (V - v)*w = (V'- v')*w', where v = v'
+ * ==> V' = (V - v)*w/w' + v (1)
+ *
+ * Let W be the total weight of the entities before reweight,
+ * since V' is the new weighted average of entities:
+ *
+ * V' = (WV + w'v - wv) / (W + w' - w) (2)
+ *
+ * by using (1) & (2) we obtain:
+ *
+ * (WV + w'v - wv) / (W + w' - w) = (V - v)*w/w' + v
+ * ==> (WV-Wv+Wv+w'v-wv)/(W+w'-w) = (V - v)*w/w' + v
+ * ==> (WV - Wv)/(W + w' - w) + v = (V - v)*w/w' + v
+ * ==> (V - v)*W/(W + w' - w) = (V - v)*w/w' (3)
+ *
+ * Since we are doing at !0-lag point which means V != v, we
+ * can simplify (3):
+ *
+ * ==> W / (W + w' - w) = w / w'
+ * ==> Ww' = Ww + ww' - ww
+ * ==> W * (w' - w) = w * (w' - w)
+ * ==> W = w (re-weight indicates w' != w)
+ *
+ * So the cfs_rq contains only one entity, hence vruntime of
+ * the entity @v should always equal to the cfs_rq's weighted
+ * average vruntime @V, which means we will always re-weight
+ * at 0-lag point, thus breach assumption. Proof completed.
+ *
+ *
+ * COROLLARY #2: Re-weight does NOT affect weighted average
+ * vruntime of all the entities.
+ *
+ * Proof: According to corollary #1, Eq. (1) should be:
+ *
+ * (V - v)*w = (V' - v')*w'
+ * ==> v' = V' - (V - v)*w/w' (4)
+ *
+ * According to the weighted average formula, we have:
+ *
+ * V' = (WV - wv + w'v') / (W - w + w')
+ * = (WV - wv + w'(V' - (V - v)w/w')) / (W - w + w')
+ * = (WV - wv + w'V' - Vw + wv) / (W - w + w')
+ * = (WV + w'V' - Vw) / (W - w + w')
+ *
+ * ==> V'*(W - w + w') = WV + w'V' - Vw
+ * ==> V' * (W - w) = (W - w) * V (5)
+ *
+ * If the entity is the only one in the cfs_rq, then reweight
+ * always occurs at 0-lag point, so V won't change. Or else
+ * there are other entities, hence W != w, then Eq. (5) turns
+ * into V' = V. So V won't change in either case, proof done.
+ *
+ *
+ * So according to corollary #1 & #2, the effect of re-weight
+ * on vruntime should be:
+ *
+ * v' = V' - (V - v) * w / w' (4)
+ * = V - (V - v) * w / w'
+ * = V - vl * w / w'
+ * = V - vl'
+ */
+ if (avruntime != se->vruntime) {
+ vlag = (s64)(avruntime - se->vruntime);
+ vlag = div_s64(vlag * old_weight, weight);
+ se->vruntime = avruntime - vlag;
+ }
+
+ /*
+ * DEADLINE
+ * ========
+ *
+ * When the weight changes, the virtual time slope changes and
+ * we should adjust the relative virtual deadline accordingly.
+ *
+ * d' = v' + (d - v)*w/w'
+ * = V' - (V - v)*w/w' + (d - v)*w/w'
+ * = V - (V - v)*w/w' + (d - v)*w/w'
+ * = V + (d - V)*w/w'
+ */
+ vslice = (s64)(se->deadline - avruntime);
+ vslice = div_s64(vslice * old_weight, weight);
+ se->deadline = avruntime + vslice;
+}
+
static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
unsigned long weight)
{
- unsigned long old_weight = se->load.weight;
+ bool curr = cfs_rq->curr == se;
if (se->on_rq) {
/* commit outstanding execution time */
- if (cfs_rq->curr == se)
+ if (curr)
update_curr(cfs_rq);
else
- avg_vruntime_sub(cfs_rq, se);
+ __dequeue_entity(cfs_rq, se);
update_load_sub(&cfs_rq->load, se->load.weight);
}
dequeue_load_avg(cfs_rq, se);
- update_load_set(&se->load, weight);
-
if (!se->on_rq) {
/*
* Because we keep se->vlag = V - v_i, while: lag_i = w_i*(V - v_i),
* we need to scale se->vlag when w_i changes.
*/
- se->vlag = div_s64(se->vlag * old_weight, weight);
+ se->vlag = div_s64(se->vlag * se->load.weight, weight);
} else {
- s64 deadline = se->deadline - se->vruntime;
- /*
- * When the weight changes, the virtual time slope changes and
- * we should adjust the relative virtual deadline accordingly.
- */
- deadline = div_s64(deadline * old_weight, weight);
- se->deadline = se->vruntime + deadline;
- if (se != cfs_rq->curr)
- min_deadline_cb_propagate(&se->run_node, NULL);
+ reweight_eevdf(cfs_rq, se, weight);
}
+ update_load_set(&se->load, weight);
+
#ifdef CONFIG_SMP
do {
u32 divider = get_pelt_divider(&se->avg);
enqueue_load_avg(cfs_rq, se);
if (se->on_rq) {
update_load_add(&cfs_rq->load, se->load.weight);
- if (cfs_rq->curr != se)
- avg_vruntime_add(cfs_rq, se);
+ if (!curr) {
+ /*
+ * The entity's vruntime has been adjusted, so let's check
+ * whether the rq-wide min_vruntime needs updated too. Since
+ * the calculations above require stable min_vruntime rather
+ * than up-to-date one, we do the update at the end of the
+ * reweight process.
+ */
+ __enqueue_entity(cfs_rq, se);
+ update_min_vruntime(cfs_rq);
+ }
}
}
#ifndef CONFIG_SMP
shares = READ_ONCE(gcfs_rq->tg->shares);
-
- if (likely(se->load.weight == shares))
- return;
#else
- shares = calc_group_shares(gcfs_rq);
+ shares = calc_group_shares(gcfs_rq);
#endif
-
- reweight_entity(cfs_rq_of(se), se, shares);
+ if (unlikely(se->load.weight != shares))
+ reweight_entity(cfs_rq_of(se), se, shares);
}
#else /* CONFIG_FAIR_GROUP_SCHED */
continue;
}
- /* Are we the first idle CPU? */
+ /*
+ * Are we the first idle core in a non-SMT domain or higher,
+ * or the first idle CPU in a SMT domain?
+ */
return cpu == env->dst_cpu;
}
- if (idle_smt == env->dst_cpu)
- return true;
+ /* Are we the first idle CPU with busy siblings? */
+ if (idle_smt != -1)
+ return idle_smt == env->dst_cpu;
/* Are we the first CPU of this group ? */
return group_balance_cpu(sg) == env->dst_cpu;
if (bits & PR_MDWE_NO_INHERIT && !(bits & PR_MDWE_REFUSE_EXEC_GAIN))
return -EINVAL;
+ /* PARISC cannot allow mdwe as it needs writable stacks */
+ if (IS_ENABLED(CONFIG_PARISC))
+ return -EINVAL;
+
current_bits = get_current_mdwe();
if (current_bits && current_bits != bits)
return -EPERM; /* Cannot unset the flags */
}
}
-int hrtimers_dead_cpu(unsigned int scpu)
+int hrtimers_cpu_dying(unsigned int dying_cpu)
{
struct hrtimer_cpu_base *old_base, *new_base;
- int i;
+ int i, ncpu = cpumask_first(cpu_active_mask);
- BUG_ON(cpu_online(scpu));
- tick_cancel_sched_timer(scpu);
+ tick_cancel_sched_timer(dying_cpu);
+
+ old_base = this_cpu_ptr(&hrtimer_bases);
+ new_base = &per_cpu(hrtimer_bases, ncpu);
- /*
- * this BH disable ensures that raise_softirq_irqoff() does
- * not wakeup ksoftirqd (and acquire the pi-lock) while
- * holding the cpu_base lock
- */
- local_bh_disable();
- local_irq_disable();
- old_base = &per_cpu(hrtimer_bases, scpu);
- new_base = this_cpu_ptr(&hrtimer_bases);
/*
* The caller is globally serialized and nobody else
* takes two locks at once, deadlock is not possible.
*/
- raw_spin_lock(&new_base->lock);
- raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
+ raw_spin_lock(&old_base->lock);
+ raw_spin_lock_nested(&new_base->lock, SINGLE_DEPTH_NESTING);
for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
migrate_hrtimer_list(&old_base->clock_base[i],
* The migration might have changed the first expiring softirq
* timer on this CPU. Update it.
*/
- hrtimer_update_softirq_timer(new_base, false);
+ __hrtimer_get_next_event(new_base, HRTIMER_ACTIVE_SOFT);
+ /* Tell the other CPU to retrigger the next event */
+ smp_call_function_single(ncpu, retrigger_next_event, NULL, 0);
- raw_spin_unlock(&old_base->lock);
raw_spin_unlock(&new_base->lock);
+ raw_spin_unlock(&old_base->lock);
- /* Check, if we got expired work to do */
- __hrtimer_peek_ahead_timers();
- local_irq_enable();
- local_bh_enable();
return 0;
}
typedef struct {
short ncount[FSE_MAX_SYMBOL_VALUE + 1];
- FSE_DTable dtable[1]; /* Dynamically sized */
+ FSE_DTable dtable[]; /* Dynamically sized */
} FSE_DecompressWksp;
matched = true;
break;
default:
- break;
+ return false;
}
return matched == filter->matching;
struct damon_sysfs_scheme_regions *regions = kmalloc(sizeof(*regions),
GFP_KERNEL);
+ if (!regions)
+ return NULL;
+
regions->kobj = (struct kobject){};
INIT_LIST_HEAD(®ions->regions_list);
regions->nr_regions = 0;
return 0;
region = damon_sysfs_scheme_region_alloc(r);
+ if (!region)
+ return 0;
list_add_tail(®ion->list, &sysfs_regions->regions_list);
sysfs_regions->nr_regions++;
if (kobject_init_and_add(®ion->kobj,
struct damon_ctx *ctx,
struct damon_sysfs_target *sys_target)
{
- int err;
+ int err = 0;
if (damon_target_has_pid(ctx)) {
err = damon_sysfs_update_target_pid(target, sys_target->pid);
damon_for_each_target_safe(t, next, ctx) {
if (i < sysfs_targets->nr) {
- damon_sysfs_update_target(t, ctx,
+ err = damon_sysfs_update_target(t, ctx,
sysfs_targets->targets_arr[i]);
+ if (err)
+ return err;
} else {
if (damon_target_has_pid(ctx))
put_pid(t->pid);
* handled in the specific fault path, and it'll prohibit the
* fault-around logic.
*/
- if (!pte_none(vmf->pte[count]))
+ if (!pte_none(ptep_get(&vmf->pte[count])))
goto skip;
count++;
int nr = folio_nr_pages(folio);
xas_split(&xas, folio, folio_order(folio));
- if (folio_test_swapbacked(folio)) {
- __lruvec_stat_mod_folio(folio, NR_SHMEM_THPS,
- -nr);
- } else {
- __lruvec_stat_mod_folio(folio, NR_FILE_THPS,
- -nr);
- filemap_nr_thps_dec(mapping);
+ if (folio_test_pmd_mappable(folio)) {
+ if (folio_test_swapbacked(folio)) {
+ __lruvec_stat_mod_folio(folio,
+ NR_SHMEM_THPS, -nr);
+ } else {
+ __lruvec_stat_mod_folio(folio,
+ NR_FILE_THPS, -nr);
+ filemap_nr_thps_dec(mapping);
+ }
}
}
page = pfn_swap_entry_to_page(entry);
}
/* return 1 if the page is an normal ksm page or KSM-placed zero page */
- ret = (page && PageKsm(page)) || is_ksm_zero_pte(*pte);
+ ret = (page && PageKsm(page)) || is_ksm_zero_pte(ptent);
pte_unmap_unlock(pte, ptl);
return ret;
}
* Moreover, it should not come from DMA buffer and is not readily
* reclaimable. So those GFP bits should be masked off.
*/
-#define OBJCGS_CLEAR_MASK (__GFP_DMA | __GFP_RECLAIMABLE | __GFP_ACCOUNT)
+#define OBJCGS_CLEAR_MASK (__GFP_DMA | __GFP_RECLAIMABLE | \
+ __GFP_ACCOUNT | __GFP_NOFAIL)
/*
* mod_objcg_mlstate() may be called with irq enabled, so
ret = -EEXIST;
/* Refuse to overwrite any PTE, even a PTE marker (e.g. UFFD WP). */
- if (!pte_none(*dst_pte))
+ if (!pte_none(ptep_get(dst_pte)))
goto out_unlock;
set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte);
static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack)
{
+#ifdef CONFIG_STACK_GROWSUP
+ /*
+ * For an upwards growing stack the calculation is much simpler.
+ * Memory for the maximum stack size is reserved at the top of the
+ * task. mmap_base starts directly below the stack and grows
+ * downwards.
+ */
+ return PAGE_ALIGN_DOWN(mmap_upper_limit(rlim_stack) - rnd);
+#else
unsigned long gap = rlim_stack->rlim_cur;
unsigned long pad = stack_guard_gap;
gap = MAX_GAP;
return PAGE_ALIGN(STACK_TOP - gap - rnd);
+#endif
}
void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
ktime_t tstamp = skb->tstamp;
struct ip_frag_state state;
struct iphdr *iph;
- int err;
+ int err = 0;
/* for offloaded checksums cleanup checksum before fragmentation */
if (skb->ip_summed == CHECKSUM_PARTIAL &&
if (i == max_netdevices)
return -ENFILE;
- snprintf(res, IFNAMSIZ, name, i);
+ /* 'res' and 'name' could overlap, use 'buf' as an intermediate buffer */
+ strscpy(buf, name, IFNAMSIZ);
+ snprintf(res, IFNAMSIZ, buf, i);
return i;
}
}
if (tcase->frag_skbs) {
- unsigned int total_size = 0, total_true_size = 0, alloc_size = 0;
+ unsigned int total_size = 0, total_true_size = 0;
struct sk_buff *frag_skb, *prev = NULL;
- page = alloc_page(GFP_KERNEL);
- KUNIT_ASSERT_NOT_NULL(test, page);
- page_ref_add(page, tcase->nr_frag_skbs - 1);
-
for (i = 0; i < tcase->nr_frag_skbs; i++) {
unsigned int frag_size;
+ page = alloc_page(GFP_KERNEL);
+ KUNIT_ASSERT_NOT_NULL(test, page);
+
frag_size = tcase->frag_skbs[i];
- frag_skb = build_skb(page_address(page) + alloc_size,
+ frag_skb = build_skb(page_address(page),
frag_size + shinfo_size);
KUNIT_ASSERT_NOT_NULL(test, frag_skb);
__skb_put(frag_skb, frag_size);
total_size += frag_size;
total_true_size += frag_skb->truesize;
- alloc_size += frag_size + shinfo_size;
}
- KUNIT_ASSERT_LE(test, alloc_size, PAGE_SIZE);
-
skb->len += total_size;
skb->data_len += total_size;
skb->truesize += total_true_size;
if (err)
goto unlock;
}
+ sock_set_flag(sk, SOCK_RCU_FREE);
if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport &&
sk->sk_family == AF_INET6)
__sk_nulls_add_node_tail_rcu(sk, &ilb2->nulls_head);
else
__sk_nulls_add_node_rcu(sk, &ilb2->nulls_head);
- sock_set_flag(sk, SOCK_RCU_FREE);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
unlock:
spin_unlock(&ilb2->lock);
struct mptcp_pm_addr_entry *entry;
list_for_each_entry(entry, rm_list, list) {
- remove_anno_list_by_saddr(msk, &entry->addr);
- if (alist.nr < MPTCP_RM_IDS_MAX)
+ if ((remove_anno_list_by_saddr(msk, &entry->addr) ||
+ lookup_subflow_by_saddr(&msk->conn_list, &entry->addr)) &&
+ alist.nr < MPTCP_RM_IDS_MAX)
alist.ids[alist.nr++] = entry->addr.id;
}
mptcp_do_fallback(ssk);
}
+#define MPTCP_MAX_GSO_SIZE (GSO_LEGACY_MAX_SIZE - (MAX_TCP_HEADER + 1))
+
static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
struct mptcp_data_frag *dfrag,
struct mptcp_sendmsg_info *info)
return -EAGAIN;
/* compute send limit */
+ if (unlikely(ssk->sk_gso_max_size > MPTCP_MAX_GSO_SIZE))
+ ssk->sk_gso_max_size = MPTCP_MAX_GSO_SIZE;
info->mss_now = tcp_send_mss(ssk, &info->size_goal, info->flags);
copy = info->size_goal;
if (__test_and_clear_bit(MPTCP_CLEAN_UNA, &msk->cb_flags))
__mptcp_clean_una_wakeup(sk);
if (unlikely(msk->cb_flags)) {
- /* be sure to set the current sk state before tacking actions
- * depending on sk_state, that is processing MPTCP_ERROR_REPORT
+ /* be sure to set the current sk state before taking actions
+ * depending on sk_state (MPTCP_ERROR_REPORT)
+ * On sk release avoid actions depending on the first subflow
*/
- if (__test_and_clear_bit(MPTCP_CONNECTED, &msk->cb_flags))
+ if (__test_and_clear_bit(MPTCP_CONNECTED, &msk->cb_flags) && msk->first)
__mptcp_set_connected(sk);
if (__test_and_clear_bit(MPTCP_ERROR_REPORT, &msk->cb_flags))
__mptcp_error_report(sk);
val = READ_ONCE(inet_sk(sk)->tos);
mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+ bool slow;
+ slow = lock_sock_fast(ssk);
__ip_sock_set_tos(ssk, val);
+ unlock_sock_fast(ssk, slow);
}
release_sock(sk);
if ((had_link == has_link) || chained)
return 0;
- if (had_link)
- netif_carrier_off(ndp->ndev.dev);
- else
- netif_carrier_on(ndp->ndev.dev);
-
if (!ndp->multi_package && !nc->package->multi_channel) {
if (had_link) {
ndp->flags |= NCSI_DEV_RESHUFFLE;
ip_set_dereference((inst)->ip_set_list)[id]
#define ip_set_ref_netlink(inst,id) \
rcu_dereference_raw((inst)->ip_set_list)[id]
+#define ip_set_dereference_nfnl(p) \
+ rcu_dereference_check(p, lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET))
/* The set types are implemented in modules and registered set types
* can be found in ip_set_type_list. Adding/deleting types is
static struct ip_set *
ip_set_rcu_get(struct net *net, ip_set_id_t index)
{
- struct ip_set *set;
struct ip_set_net *inst = ip_set_pernet(net);
- rcu_read_lock();
- /* ip_set_list itself needs to be protected */
- set = rcu_dereference(inst->ip_set_list)[index];
- rcu_read_unlock();
-
- return set;
+ /* ip_set_list and the set pointer need to be protected */
+ return ip_set_dereference_nfnl(inst->ip_set_list)[index];
}
static inline void
ip_set(inst, to_id) = from;
write_unlock_bh(&ip_set_ref_lock);
+ /* Make sure all readers of the old set pointers are completed. */
+ synchronize_rcu();
+
return 0;
}
if (err < 0) {
NL_SET_BAD_ATTR(extack, attr);
- break;
+ return err;
}
}
- return err;
+
+ return 0;
}
/*
call_rcu(&trans->rcu, nft_trans_gc_trans_free);
}
-static struct nft_trans_gc *nft_trans_gc_catchall(struct nft_trans_gc *gc,
- unsigned int gc_seq,
- bool sync)
+struct nft_trans_gc *nft_trans_gc_catchall_async(struct nft_trans_gc *gc,
+ unsigned int gc_seq)
{
- struct nft_set_elem_catchall *catchall, *next;
+ struct nft_set_elem_catchall *catchall;
const struct nft_set *set = gc->set;
- struct nft_elem_priv *elem_priv;
struct nft_set_ext *ext;
- list_for_each_entry_safe(catchall, next, &set->catchall_list, list) {
+ list_for_each_entry_rcu(catchall, &set->catchall_list, list) {
ext = nft_set_elem_ext(set, catchall->elem);
if (!nft_set_elem_expired(ext))
nft_set_elem_dead(ext);
dead_elem:
- if (sync)
- gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC);
- else
- gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
-
+ gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
if (!gc)
return NULL;
- elem_priv = catchall->elem;
- if (sync) {
- nft_setelem_data_deactivate(gc->net, gc->set, elem_priv);
- nft_setelem_catchall_destroy(catchall);
- }
-
- nft_trans_gc_elem_add(gc, elem_priv);
+ nft_trans_gc_elem_add(gc, catchall->elem);
}
return gc;
}
-struct nft_trans_gc *nft_trans_gc_catchall_async(struct nft_trans_gc *gc,
- unsigned int gc_seq)
-{
- return nft_trans_gc_catchall(gc, gc_seq, false);
-}
-
struct nft_trans_gc *nft_trans_gc_catchall_sync(struct nft_trans_gc *gc)
{
- return nft_trans_gc_catchall(gc, 0, true);
+ struct nft_set_elem_catchall *catchall, *next;
+ const struct nft_set *set = gc->set;
+ struct nft_elem_priv *elem_priv;
+ struct nft_set_ext *ext;
+
+ WARN_ON_ONCE(!lockdep_commit_lock_is_held(gc->net));
+
+ list_for_each_entry_safe(catchall, next, &set->catchall_list, list) {
+ ext = nft_set_elem_ext(set, catchall->elem);
+
+ if (!nft_set_elem_expired(ext))
+ continue;
+
+ gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL);
+ if (!gc)
+ return NULL;
+
+ elem_priv = catchall->elem;
+ nft_setelem_data_deactivate(gc->net, gc->set, elem_priv);
+ nft_setelem_catchall_destroy(catchall);
+ nft_trans_gc_elem_add(gc, elem_priv);
+ }
+
+ return gc;
}
static void nf_tables_module_autoload_cleanup(struct net *net)
switch (priv->size) {
case 8: {
+ u64 *dst64 = (void *)dst;
u64 src64;
switch (priv->op) {
case NFT_BYTEORDER_NTOH:
for (i = 0; i < priv->len / 8; i++) {
src64 = nft_reg_load64(&src[i]);
- nft_reg_store64(&dst[i],
+ nft_reg_store64(&dst64[i],
be64_to_cpu((__force __be64)src64));
}
break;
for (i = 0; i < priv->len / 8; i++) {
src64 = (__force __u64)
cpu_to_be64(nft_reg_load64(&src[i]));
- nft_reg_store64(&dst[i], src64);
+ nft_reg_store64(&dst64[i], src64);
}
break;
}
{
switch (key) {
case NFT_META_TIME_NS:
- nft_reg_store64(dest, ktime_get_real_ns());
+ nft_reg_store64((u64 *)dest, ktime_get_real_ns());
break;
case NFT_META_TIME_DAY:
nft_reg_store8(dest, nft_meta_weekday());
{
struct nft_rbtree *priv = nft_set_priv(set);
struct nft_rbtree_elem *rbe, *rbe_end = NULL;
- struct nftables_pernet *nft_net;
struct rb_node *node, *next;
struct nft_trans_gc *gc;
struct net *net;
set = nft_set_container_of(priv);
net = read_pnet(&set->net);
- nft_net = nft_pernet(net);
gc = nft_trans_gc_alloc(set, 0, GFP_KERNEL);
if (!gc)
if (bind) {
struct flow_action_entry *entry = entry_data;
+ if (tcf_ct_helper(act))
+ return -EOPNOTSUPP;
+
entry->id = FLOW_ACTION_CT;
entry->ct.action = tcf_ct_action(act);
entry->ct.zone = tcf_ct_zone(act);
return -EMSGSIZE;
skb_put(skb, TLV_SPACE(len));
+ memset(tlv, 0, TLV_SPACE(len));
tlv->tlv_type = htons(type);
tlv->tlv_len = htons(TLV_LENGTH(len));
if (len && data)
if (!(state->flags & MSG_PEEK))
WRITE_ONCE(u->oob_skb, NULL);
-
+ else
+ skb_get(oob_skb);
unix_state_unlock(sk);
chunk = state->recv_actor(oob_skb, 0, chunk, state);
- if (!(state->flags & MSG_PEEK)) {
+ if (!(state->flags & MSG_PEEK))
UNIXCB(oob_skb).consumed += 1;
- kfree_skb(oob_skb);
- }
+
+ consume_skb(oob_skb);
mutex_unlock(&u->iolock);
* if (argc <= 1)
* printf("%s: no command arguments :(\n", *argv);
* else
- * printf("%s: %d command arguments!\n", *argv, args - 1);
+ * printf("%s: %d command arguments!\n", *argv, argc - 1);
* }
*
* after:
* // perturb_local_entropy()
* } else {
* local_entropy ^= 3896280633962944730;
- * printf("%s: %d command arguments!\n", *argv, args - 1);
+ * printf("%s: %d command arguments!\n", *argv, argc - 1);
* }
*
* // latent_entropy_execute() 4.
{
const_tree fieldtype;
const_tree typesize;
- const_tree elemtype;
- const_tree elemsize;
fieldtype = TREE_TYPE(field);
typesize = TYPE_SIZE(fieldtype);
if (TREE_CODE(fieldtype) != ARRAY_TYPE)
return false;
- elemtype = TREE_TYPE(fieldtype);
- elemsize = TYPE_SIZE(elemtype);
-
/* size of type is represented in bits */
if (typesize == NULL_TREE && TYPE_DOMAIN(fieldtype) != NULL_TREE &&
TYPE_MAX_VALUE(TYPE_DOMAIN(fieldtype)) == NULL_TREE)
return true;
- if (typesize != NULL_TREE &&
- (TREE_CONSTANT(typesize) && (!tree_to_uhwi(typesize) ||
- tree_to_uhwi(typesize) == tree_to_uhwi(elemsize))))
- return true;
-
return false;
}
return -ENOMEM;
cs35l56->base.dev = &clt->dev;
+
+#ifdef CS35L56_WAKE_HOLD_TIME_US
+ cs35l56->base.can_hibernate = true;
+#endif
cs35l56->base.regmap = devm_regmap_init_i2c(clt, &cs35l56_regmap_i2c);
if (IS_ERR(cs35l56->base.regmap)) {
ret = PTR_ERR(cs35l56->base.regmap);
if (chip->driver_caps & AZX_DCAPS_I915_COMPONENT) {
err = snd_hdac_i915_init(azx_bus(chip));
if (err < 0) {
+ if (err == -EPROBE_DEFER)
+ goto out_free;
+
/* if the controller is bound only with HDMI/DP
* (for HSW and BDW), we need to abort the probe;
* for other chips, still continue probing as other
SND_PCI_QUIRK(0x103c, 0x8898, "HP EliteBook 845 G8 Notebook PC", ALC285_FIXUP_HP_LIMIT_INT_MIC_BOOST),
SND_PCI_QUIRK(0x103c, 0x88d0, "HP Pavilion 15-eh1xxx (mainboard 88D0)", ALC287_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8902, "HP OMEN 16", ALC285_FIXUP_HP_MUTE_LED),
+ SND_PCI_QUIRK(0x103c, 0x890e, "HP 255 G8 Notebook PC", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2),
SND_PCI_QUIRK(0x103c, 0x8919, "HP Pavilion Aero Laptop 13-be0xxx", ALC287_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x896d, "HP ZBook Firefly 16 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x896e, "HP EliteBook x360 830 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8abb, "HP ZBook Firefly 14 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8ad1, "HP EliteBook 840 14 inch G9 Notebook PC", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8ad2, "HP EliteBook 860 16 inch G9 Notebook PC", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8b2f, "HP 255 15.6 inch G10 Notebook PC", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2),
SND_PCI_QUIRK(0x103c, 0x8b42, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8b43, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8b44, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8c70, "HP EliteBook 835 G11", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8c71, "HP EliteBook 845 G11", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8c72, "HP EliteBook 865 G11", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8ca4, "HP ZBook Fury", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8ca7, "HP ZBook Fury", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8cf5, "HP ZBook Studio 16", ALC245_FIXUP_CS35L41_SPI_4_HP_GPIO_LED),
SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC),
SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300),
SND_PCI_QUIRK(0x1043, 0x106d, "Asus K53BE", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
SND_PCI_QUIRK(0x1043, 0x10a1, "ASUS UX391UA", ALC294_FIXUP_ASUS_SPK),
SND_PCI_QUIRK(0x1043, 0x10c0, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC),
SND_PCI_QUIRK(0x1043, 0x10d0, "ASUS X540LA/X540LJ", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x1043, 0x10d3, "ASUS K6500ZC", ALC294_FIXUP_ASUS_SPK),
SND_PCI_QUIRK(0x1043, 0x115d, "Asus 1015E", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
SND_PCI_QUIRK(0x1043, 0x11c0, "ASUS X556UR", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1043, 0x125e, "ASUS Q524UQK", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1043, 0x19e1, "ASUS UX581LV", ALC295_FIXUP_ASUS_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1043, 0x1a13, "Asus G73Jw", ALC269_FIXUP_ASUS_G73JW),
SND_PCI_QUIRK(0x1043, 0x1a30, "ASUS X705UD", ALC256_FIXUP_ASUS_MIC),
+ SND_PCI_QUIRK(0x1043, 0x1a63, "ASUS UX3405MA", ALC245_FIXUP_CS35L41_SPI_2),
SND_PCI_QUIRK(0x1043, 0x1a83, "ASUS UM5302LA", ALC294_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x1043, 0x1a8f, "ASUS UX582ZS", ALC245_FIXUP_CS35L41_SPI_2),
SND_PCI_QUIRK(0x1043, 0x1b11, "ASUS UX431DA", ALC294_FIXUP_ASUS_COEF_1B),
SND_PCI_QUIRK(0x1043, 0x1b13, "Asus U41SV", ALC269_FIXUP_INV_DMIC),
SND_PCI_QUIRK(0x1043, 0x1b93, "ASUS G614JVR/JIR", ALC245_FIXUP_CS35L41_SPI_2),
SND_PCI_QUIRK(0x1043, 0x1bbd, "ASUS Z550MA", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x1043, 0x1c03, "ASUS UM3406HA", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x1043, 0x1c23, "Asus X55U", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
+ SND_PCI_QUIRK(0x1043, 0x1c33, "ASUS UX5304MA", ALC245_FIXUP_CS35L41_SPI_2),
+ SND_PCI_QUIRK(0x1043, 0x1c43, "ASUS UX8406MA", ALC245_FIXUP_CS35L41_SPI_2),
SND_PCI_QUIRK(0x1043, 0x1c62, "ASUS GU603", ALC289_FIXUP_ASUS_GA401),
SND_PCI_QUIRK(0x1043, 0x1c92, "ASUS ROG Strix G15", ALC285_FIXUP_ASUS_G533Z_PINS),
SND_PCI_QUIRK(0x1043, 0x1c9f, "ASUS G614JI", ALC285_FIXUP_ASUS_HEADSET_MIC),
{0x12, 0x90a60130},
{0x17, 0x90170110},
{0x21, 0x03211020}),
- SND_HDA_PIN_QUIRK(0x10ec0295, 0x1028, "Dell", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE,
- {0x14, 0x90170110},
- {0x21, 0x04211020}),
- SND_HDA_PIN_QUIRK(0x10ec0295, 0x1028, "Dell", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE,
- {0x14, 0x90170110},
- {0x21, 0x04211030}),
- SND_HDA_PIN_QUIRK(0x10ec0295, 0x1028, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE,
- ALC295_STANDARD_PINS,
- {0x17, 0x21014020},
- {0x18, 0x21a19030}),
- SND_HDA_PIN_QUIRK(0x10ec0295, 0x1028, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE,
- ALC295_STANDARD_PINS,
- {0x17, 0x21014040},
- {0x18, 0x21a19050}),
- SND_HDA_PIN_QUIRK(0x10ec0295, 0x1028, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE,
- ALC295_STANDARD_PINS),
SND_HDA_PIN_QUIRK(0x10ec0298, 0x1028, "Dell", ALC298_FIXUP_DELL1_MIC_NO_PRESENCE,
ALC298_STANDARD_PINS,
{0x17, 0x90170110}),
SND_HDA_PIN_QUIRK(0x10ec0289, 0x1028, "Dell", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE,
{0x19, 0x40000000},
{0x1b, 0x40000000}),
+ SND_HDA_PIN_QUIRK(0x10ec0295, 0x1028, "Dell", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE,
+ {0x19, 0x40000000},
+ {0x1b, 0x40000000}),
SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
{0x19, 0x40000000},
{0x1a, 0x40000000}),
struct timeval interval_tv = { 5, 0 };
struct timespec interval_ts = { 5, 0 };
-/* Save original CPU model */
-unsigned int model_orig;
-
unsigned int num_iterations;
unsigned int header_iterations;
unsigned int debug;
unsigned int summary_only;
unsigned int list_header_only;
unsigned int dump_only;
-unsigned int do_snb_cstates;
-unsigned int do_knl_cstates;
-unsigned int do_slm_cstates;
-unsigned int use_c1_residency_msr;
unsigned int has_aperf;
unsigned int has_epb;
unsigned int has_turbo;
unsigned int is_hybrid;
-unsigned int do_irtl_snb;
-unsigned int do_irtl_hsw;
unsigned int units = 1000000; /* MHz etc */
unsigned int genuine_intel;
unsigned int authentic_amd;
unsigned int hygon_genuine;
unsigned int max_level, max_extended_level;
unsigned int has_invariant_tsc;
-unsigned int do_nhm_platform_info;
-unsigned int no_MSR_MISC_PWR_MGMT;
unsigned int aperf_mperf_multiplier = 1;
double bclk;
double base_hz;
unsigned int show_pkg_only;
unsigned int show_core_only;
char *output_buffer, *outp;
-unsigned int do_rapl;
unsigned int do_dts;
unsigned int do_ptm;
unsigned int do_ipc;
unsigned int gfx_act_mhz;
unsigned int tj_max;
unsigned int tj_max_override;
-int tcc_offset_bits;
double rapl_power_units, rapl_time_units;
double rapl_dram_energy_units, rapl_energy_units;
double rapl_joule_counter_range;
-unsigned int do_core_perf_limit_reasons;
-unsigned int has_automatic_cstate_conversion;
-unsigned int dis_cstate_prewake;
-unsigned int do_gfx_perf_limit_reasons;
-unsigned int do_ring_perf_limit_reasons;
unsigned int crystal_hz;
unsigned long long tsc_hz;
int base_cpu;
-double discover_bclk(unsigned int family, unsigned int model);
unsigned int has_hwp; /* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */
/* IA32_HWP_REQUEST, IA32_HWP_STATUS */
unsigned int has_hwp_notify; /* IA32_HWP_INTERRUPT */
unsigned int has_hwp_activity_window; /* IA32_HWP_REQUEST[bits 41:32] */
unsigned int has_hwp_epp; /* IA32_HWP_REQUEST[bits 31:24] */
unsigned int has_hwp_pkg; /* IA32_HWP_REQUEST_PKG */
-unsigned int has_misc_feature_control;
unsigned int first_counter_read = 1;
int ignore_stdin;
-#define RAPL_PKG (1 << 0)
- /* 0x610 MSR_PKG_POWER_LIMIT */
- /* 0x611 MSR_PKG_ENERGY_STATUS */
-#define RAPL_PKG_PERF_STATUS (1 << 1)
- /* 0x613 MSR_PKG_PERF_STATUS */
-#define RAPL_PKG_POWER_INFO (1 << 2)
- /* 0x614 MSR_PKG_POWER_INFO */
-
-#define RAPL_DRAM (1 << 3)
- /* 0x618 MSR_DRAM_POWER_LIMIT */
- /* 0x619 MSR_DRAM_ENERGY_STATUS */
-#define RAPL_DRAM_PERF_STATUS (1 << 4)
- /* 0x61b MSR_DRAM_PERF_STATUS */
-#define RAPL_DRAM_POWER_INFO (1 << 5)
- /* 0x61c MSR_DRAM_POWER_INFO */
-
-#define RAPL_CORES_POWER_LIMIT (1 << 6)
- /* 0x638 MSR_PP0_POWER_LIMIT */
-#define RAPL_CORE_POLICY (1 << 7)
- /* 0x63a MSR_PP0_POLICY */
-
-#define RAPL_GFX (1 << 8)
- /* 0x640 MSR_PP1_POWER_LIMIT */
- /* 0x641 MSR_PP1_ENERGY_STATUS */
- /* 0x642 MSR_PP1_POLICY */
-
-#define RAPL_CORES_ENERGY_STATUS (1 << 9)
- /* 0x639 MSR_PP0_ENERGY_STATUS */
-#define RAPL_PER_CORE_ENERGY (1 << 10)
- /* Indicates cores energy collection is per-core,
- * not per-package. */
-#define RAPL_AMD_F17H (1 << 11)
- /* 0xc0010299 MSR_RAPL_PWR_UNIT */
- /* 0xc001029a MSR_CORE_ENERGY_STAT */
- /* 0xc001029b MSR_PKG_ENERGY_STAT */
-#define RAPL_CORES (RAPL_CORES_ENERGY_STATUS | RAPL_CORES_POWER_LIMIT)
+int get_msr(int cpu, off_t offset, unsigned long long *msr);
+
+/* Model specific support Start */
+
+/* List of features that may diverge among different platforms */
+struct platform_features {
+ bool has_msr_misc_feature_control; /* MSR_MISC_FEATURE_CONTROL */
+ bool has_msr_misc_pwr_mgmt; /* MSR_MISC_PWR_MGMT */
+ bool has_nhm_msrs; /* MSR_PLATFORM_INFO, MSR_IA32_TEMPERATURE_TARGET, MSR_SMI_COUNT, MSR_PKG_CST_CONFIG_CONTROL, MSR_IA32_POWER_CTL, TRL MSRs */
+ bool has_config_tdp; /* MSR_CONFIG_TDP_NOMINAL/LEVEL_1/LEVEL_2/CONTROL, MSR_TURBO_ACTIVATION_RATIO */
+ int bclk_freq; /* CPU base clock */
+ int crystal_freq; /* Crystal clock to use when not available from CPUID.15 */
+ int supported_cstates; /* Core cstates and Package cstates supported */
+ int cst_limit; /* MSR_PKG_CST_CONFIG_CONTROL */
+ bool has_cst_auto_convension; /* AUTOMATIC_CSTATE_CONVERSION bit in MSR_PKG_CST_CONFIG_CONTROL */
+ bool has_irtl_msrs; /* MSR_PKGC3/PKGC6/PKGC7/PKGC8/PKGC9/PKGC10_IRTL */
+ bool has_msr_core_c1_res; /* MSR_CORE_C1_RES */
+ bool has_msr_module_c6_res_ms; /* MSR_MODULE_C6_RES_MS */
+ bool has_msr_c6_demotion_policy_config; /* MSR_CC6_DEMOTION_POLICY_CONFIG/MSR_MC6_DEMOTION_POLICY_CONFIG */
+ bool has_msr_atom_pkg_c6_residency; /* MSR_ATOM_PKG_C6_RESIDENCY */
+ bool has_msr_knl_core_c6_residency; /* MSR_KNL_CORE_C6_RESIDENCY */
+ bool has_ext_cst_msrs; /* MSR_PKG_WEIGHTED_CORE_C0_RES/MSR_PKG_ANY_CORE_C0_RES/MSR_PKG_ANY_GFXE_C0_RES/MSR_PKG_BOTH_CORE_GFXE_C0_RES */
+ bool has_cst_prewake_bit; /* Cstate prewake bit in MSR_IA32_POWER_CTL */
+ int trl_msrs; /* MSR_TURBO_RATIO_LIMIT/LIMIT1/LIMIT2/SECONDARY, Atom TRL MSRs */
+ int plr_msrs; /* MSR_CORE/GFX/RING_PERF_LIMIT_REASONS */
+ int rapl_msrs; /* RAPL PKG/DRAM/CORE/GFX MSRs, AMD RAPL MSRs */
+ bool has_per_core_rapl; /* Indicates cores energy collection is per-core, not per-package. AMD specific for now */
+ bool has_rapl_divisor; /* Divisor for Energy unit raw value from MSR_RAPL_POWER_UNIT */
+ bool has_fixed_rapl_unit; /* Fixed Energy Unit used for DRAM RAPL Domain */
+ int rapl_quirk_tdp; /* Hardcoded TDP value when cannot be retrieved from hardware */
+ int tcc_offset_bits; /* TCC Offset bits in MSR_IA32_TEMPERATURE_TARGET */
+ bool enable_tsc_tweak; /* Use CPU Base freq instead of TSC freq for aperf/mperf counter */
+ bool need_perf_multiplier; /* mperf/aperf multiplier */
+};
+
+struct platform_data {
+ unsigned int model;
+ const struct platform_features *features;
+};
+
+/* For BCLK */
+enum bclk_freq {
+ BCLK_100MHZ = 1,
+ BCLK_133MHZ,
+ BCLK_SLV,
+};
+
+#define SLM_BCLK_FREQS 5
+double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0 };
+
+double slm_bclk(void)
+{
+ unsigned long long msr = 3;
+ unsigned int i;
+ double freq;
+
+ if (get_msr(base_cpu, MSR_FSB_FREQ, &msr))
+ fprintf(outf, "SLM BCLK: unknown\n");
+
+ i = msr & 0xf;
+ if (i >= SLM_BCLK_FREQS) {
+ fprintf(outf, "SLM BCLK[%d] invalid\n", i);
+ i = 3;
+ }
+ freq = slm_freq_table[i];
+
+ if (!quiet)
+ fprintf(outf, "SLM BCLK: %.1f Mhz\n", freq);
+
+ return freq;
+}
+
+/* For Package cstate limit */
+enum package_cstate_limit {
+ CST_LIMIT_NHM = 1,
+ CST_LIMIT_SNB,
+ CST_LIMIT_HSW,
+ CST_LIMIT_SKX,
+ CST_LIMIT_ICX,
+ CST_LIMIT_SLV,
+ CST_LIMIT_AMT,
+ CST_LIMIT_KNL,
+ CST_LIMIT_GMT,
+};
+
+/* For Turbo Ratio Limit MSRs */
+enum turbo_ratio_limit_msrs {
+ TRL_BASE = BIT(0),
+ TRL_LIMIT1 = BIT(1),
+ TRL_LIMIT2 = BIT(2),
+ TRL_ATOM = BIT(3),
+ TRL_KNL = BIT(4),
+ TRL_CORECOUNT = BIT(5),
+};
+
+/* For Perf Limit Reason MSRs */
+enum perf_limit_reason_msrs {
+ PLR_CORE = BIT(0),
+ PLR_GFX = BIT(1),
+ PLR_RING = BIT(2),
+};
+
+/* For RAPL MSRs */
+enum rapl_msrs {
+ RAPL_PKG_POWER_LIMIT = BIT(0), /* 0x610 MSR_PKG_POWER_LIMIT */
+ RAPL_PKG_ENERGY_STATUS = BIT(1), /* 0x611 MSR_PKG_ENERGY_STATUS */
+ RAPL_PKG_PERF_STATUS = BIT(2), /* 0x613 MSR_PKG_PERF_STATUS */
+ RAPL_PKG_POWER_INFO = BIT(3), /* 0x614 MSR_PKG_POWER_INFO */
+ RAPL_DRAM_POWER_LIMIT = BIT(4), /* 0x618 MSR_DRAM_POWER_LIMIT */
+ RAPL_DRAM_ENERGY_STATUS = BIT(5), /* 0x619 MSR_DRAM_ENERGY_STATUS */
+ RAPL_DRAM_PERF_STATUS = BIT(6), /* 0x61b MSR_DRAM_PERF_STATUS */
+ RAPL_DRAM_POWER_INFO = BIT(7), /* 0x61c MSR_DRAM_POWER_INFO */
+ RAPL_CORE_POWER_LIMIT = BIT(8), /* 0x638 MSR_PP0_POWER_LIMIT */
+ RAPL_CORE_ENERGY_STATUS = BIT(9), /* 0x639 MSR_PP0_ENERGY_STATUS */
+ RAPL_CORE_POLICY = BIT(10), /* 0x63a MSR_PP0_POLICY */
+ RAPL_GFX_POWER_LIMIT = BIT(11), /* 0x640 MSR_PP1_POWER_LIMIT */
+ RAPL_GFX_ENERGY_STATUS = BIT(12), /* 0x641 MSR_PP1_ENERGY_STATUS */
+ RAPL_GFX_POLICY = BIT(13), /* 0x642 MSR_PP1_POLICY */
+ RAPL_AMD_PWR_UNIT = BIT(14), /* 0xc0010299 MSR_AMD_RAPL_POWER_UNIT */
+ RAPL_AMD_CORE_ENERGY_STAT = BIT(15), /* 0xc001029a MSR_AMD_CORE_ENERGY_STATUS */
+ RAPL_AMD_PKG_ENERGY_STAT = BIT(16), /* 0xc001029b MSR_AMD_PKG_ENERGY_STATUS */
+};
+
+#define RAPL_PKG (RAPL_PKG_ENERGY_STATUS | RAPL_PKG_POWER_LIMIT)
+#define RAPL_DRAM (RAPL_DRAM_ENERGY_STATUS | RAPL_DRAM_POWER_LIMIT)
+#define RAPL_CORE (RAPL_CORE_ENERGY_STATUS | RAPL_CORE_POWER_LIMIT)
+#define RAPL_GFX (RAPL_GFX_POWER_LIMIT | RAPL_GFX_ENERGY_STATUS)
+
+#define RAPL_PKG_ALL (RAPL_PKG | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO)
+#define RAPL_DRAM_ALL (RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_DRAM_POWER_INFO)
+#define RAPL_CORE_ALL (RAPL_CORE | RAPL_CORE_POLICY)
+#define RAPL_GFX_ALL (RAPL_GFX | RAPL_GFX_POLIGY)
+
+#define RAPL_AMD_F17H (RAPL_AMD_PWR_UNIT | RAPL_AMD_CORE_ENERGY_STAT | RAPL_AMD_PKG_ENERGY_STAT)
+
+/* For Cstates */
+enum cstates {
+ CC1 = BIT(0),
+ CC3 = BIT(1),
+ CC6 = BIT(2),
+ CC7 = BIT(3),
+ PC2 = BIT(4),
+ PC3 = BIT(5),
+ PC6 = BIT(6),
+ PC7 = BIT(7),
+ PC8 = BIT(8),
+ PC9 = BIT(9),
+ PC10 = BIT(10),
+};
+
+static const struct platform_features nhm_features = {
+ .has_msr_misc_pwr_mgmt = 1,
+ .has_nhm_msrs = 1,
+ .bclk_freq = BCLK_133MHZ,
+ .supported_cstates = CC1 | CC3 | CC6 | PC3 | PC6,
+ .cst_limit = CST_LIMIT_NHM,
+ .trl_msrs = TRL_BASE,
+};
+
+static const struct platform_features nhx_features = {
+ .has_msr_misc_pwr_mgmt = 1,
+ .has_nhm_msrs = 1,
+ .bclk_freq = BCLK_133MHZ,
+ .supported_cstates = CC1 | CC3 | CC6 | PC3 | PC6,
+ .cst_limit = CST_LIMIT_NHM,
+};
+
+static const struct platform_features snb_features = {
+ .has_msr_misc_feature_control = 1,
+ .has_msr_misc_pwr_mgmt = 1,
+ .has_nhm_msrs = 1,
+ .bclk_freq = BCLK_100MHZ,
+ .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
+ .cst_limit = CST_LIMIT_SNB,
+ .has_irtl_msrs = 1,
+ .trl_msrs = TRL_BASE,
+ .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
+};
+
+static const struct platform_features snx_features = {
+ .has_msr_misc_feature_control = 1,
+ .has_msr_misc_pwr_mgmt = 1,
+ .has_nhm_msrs = 1,
+ .bclk_freq = BCLK_100MHZ,
+ .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
+ .cst_limit = CST_LIMIT_SNB,
+ .has_irtl_msrs = 1,
+ .trl_msrs = TRL_BASE,
+ .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM_ALL,
+};
+
+static const struct platform_features ivb_features = {
+ .has_msr_misc_feature_control = 1,
+ .has_msr_misc_pwr_mgmt = 1,
+ .has_nhm_msrs = 1,
+ .has_config_tdp = 1,
+ .bclk_freq = BCLK_100MHZ,
+ .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
+ .cst_limit = CST_LIMIT_SNB,
+ .has_irtl_msrs = 1,
+ .trl_msrs = TRL_BASE,
+ .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
+};
+
+static const struct platform_features ivx_features = {
+ .has_msr_misc_feature_control = 1,
+ .has_msr_misc_pwr_mgmt = 1,
+ .has_nhm_msrs = 1,
+ .bclk_freq = BCLK_100MHZ,
+ .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
+ .cst_limit = CST_LIMIT_SNB,
+ .has_irtl_msrs = 1,
+ .trl_msrs = TRL_BASE | TRL_LIMIT1,
+ .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM_ALL,
+};
+
+static const struct platform_features hsw_features = {
+ .has_msr_misc_feature_control = 1,
+ .has_msr_misc_pwr_mgmt = 1,
+ .has_nhm_msrs = 1,
+ .has_config_tdp = 1,
+ .bclk_freq = BCLK_100MHZ,
+ .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
+ .cst_limit = CST_LIMIT_HSW,
+ .has_irtl_msrs = 1,
+ .trl_msrs = TRL_BASE,
+ .plr_msrs = PLR_CORE | PLR_GFX | PLR_RING,
+ .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
+};
+
+static const struct platform_features hsx_features = {
+ .has_msr_misc_feature_control = 1,
+ .has_msr_misc_pwr_mgmt = 1,
+ .has_nhm_msrs = 1,
+ .has_config_tdp = 1,
+ .bclk_freq = BCLK_100MHZ,
+ .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
+ .cst_limit = CST_LIMIT_HSW,
+ .has_irtl_msrs = 1,
+ .trl_msrs = TRL_BASE | TRL_LIMIT1 | TRL_LIMIT2,
+ .plr_msrs = PLR_CORE | PLR_RING,
+ .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
+ .has_fixed_rapl_unit = 1,
+};
+
+static const struct platform_features hswl_features = {
+ .has_msr_misc_feature_control = 1,
+ .has_msr_misc_pwr_mgmt = 1,
+ .has_nhm_msrs = 1,
+ .has_config_tdp = 1,
+ .bclk_freq = BCLK_100MHZ,
+ .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10,
+ .cst_limit = CST_LIMIT_HSW,
+ .has_irtl_msrs = 1,
+ .trl_msrs = TRL_BASE,
+ .plr_msrs = PLR_CORE | PLR_GFX | PLR_RING,
+ .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
+};
+
+static const struct platform_features hswg_features = {
+ .has_msr_misc_feature_control = 1,
+ .has_msr_misc_pwr_mgmt = 1,
+ .has_nhm_msrs = 1,
+ .has_config_tdp = 1,
+ .bclk_freq = BCLK_100MHZ,
+ .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
+ .cst_limit = CST_LIMIT_HSW,
+ .has_irtl_msrs = 1,
+ .trl_msrs = TRL_BASE,
+ .plr_msrs = PLR_CORE | PLR_GFX | PLR_RING,
+ .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
+};
+
+static const struct platform_features bdw_features = {
+ .has_msr_misc_feature_control = 1,
+ .has_msr_misc_pwr_mgmt = 1,
+ .has_nhm_msrs = 1,
+ .has_config_tdp = 1,
+ .bclk_freq = BCLK_100MHZ,
+ .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10,
+ .cst_limit = CST_LIMIT_HSW,
+ .has_irtl_msrs = 1,
+ .trl_msrs = TRL_BASE,
+ .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
+};
+
+static const struct platform_features bdwg_features = {
+ .has_msr_misc_feature_control = 1,
+ .has_msr_misc_pwr_mgmt = 1,
+ .has_nhm_msrs = 1,
+ .has_config_tdp = 1,
+ .bclk_freq = BCLK_100MHZ,
+ .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
+ .cst_limit = CST_LIMIT_HSW,
+ .has_irtl_msrs = 1,
+ .trl_msrs = TRL_BASE,
+ .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
+};
+
+static const struct platform_features bdx_features = {
+ .has_msr_misc_feature_control = 1,
+ .has_msr_misc_pwr_mgmt = 1,
+ .has_nhm_msrs = 1,
+ .has_config_tdp = 1,
+ .bclk_freq = BCLK_100MHZ,
+ .supported_cstates = CC1 | CC3 | CC6 | PC2 | PC3 | PC6,
+ .cst_limit = CST_LIMIT_HSW,
+ .has_irtl_msrs = 1,
+ .has_cst_auto_convension = 1,
+ .trl_msrs = TRL_BASE,
+ .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
+ .has_fixed_rapl_unit = 1,
+};
+
+static const struct platform_features skl_features = {
+ .has_msr_misc_feature_control = 1,
+ .has_msr_misc_pwr_mgmt = 1,
+ .has_nhm_msrs = 1,
+ .has_config_tdp = 1,
+ .bclk_freq = BCLK_100MHZ,
+ .crystal_freq = 24000000,
+ .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10,
+ .cst_limit = CST_LIMIT_HSW,
+ .has_irtl_msrs = 1,
+ .has_ext_cst_msrs = 1,
+ .trl_msrs = TRL_BASE,
+ .tcc_offset_bits = 6,
+ .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX,
+ .enable_tsc_tweak = 1,
+};
+
+static const struct platform_features cnl_features = {
+ .has_msr_misc_feature_control = 1,
+ .has_msr_misc_pwr_mgmt = 1,
+ .has_nhm_msrs = 1,
+ .has_config_tdp = 1,
+ .bclk_freq = BCLK_100MHZ,
+ .supported_cstates = CC1 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10,
+ .cst_limit = CST_LIMIT_HSW,
+ .has_irtl_msrs = 1,
+ .has_msr_core_c1_res = 1,
+ .has_ext_cst_msrs = 1,
+ .trl_msrs = TRL_BASE,
+ .tcc_offset_bits = 6,
+ .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX,
+ .enable_tsc_tweak = 1,
+};
+
+static const struct platform_features adl_features = {
+ .has_msr_misc_feature_control = 1,
+ .has_msr_misc_pwr_mgmt = 1,
+ .has_nhm_msrs = 1,
+ .has_config_tdp = 1,
+ .bclk_freq = BCLK_100MHZ,
+ .supported_cstates = CC1 | CC6 | CC7 | PC2 | PC3 | PC6 | PC8 | PC10,
+ .cst_limit = CST_LIMIT_HSW,
+ .has_irtl_msrs = 1,
+ .has_msr_core_c1_res = 1,
+ .has_ext_cst_msrs = 1,
+ .trl_msrs = TRL_BASE,
+ .tcc_offset_bits = 6,
+ .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX,
+ .enable_tsc_tweak = 1,
+};
+
+static const struct platform_features skx_features = {
+ .has_msr_misc_feature_control = 1,
+ .has_msr_misc_pwr_mgmt = 1,
+ .has_nhm_msrs = 1,
+ .has_config_tdp = 1,
+ .bclk_freq = BCLK_100MHZ,
+ .supported_cstates = CC1 | CC6 | PC2 | PC6,
+ .cst_limit = CST_LIMIT_SKX,
+ .has_irtl_msrs = 1,
+ .has_cst_auto_convension = 1,
+ .trl_msrs = TRL_BASE | TRL_CORECOUNT,
+ .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
+ .has_fixed_rapl_unit = 1,
+};
+
+static const struct platform_features icx_features = {
+ .has_msr_misc_feature_control = 1,
+ .has_msr_misc_pwr_mgmt = 1,
+ .has_nhm_msrs = 1,
+ .has_config_tdp = 1,
+ .bclk_freq = BCLK_100MHZ,
+ .supported_cstates = CC1 | CC6 | PC2 | PC6,
+ .cst_limit = CST_LIMIT_ICX,
+ .has_irtl_msrs = 1,
+ .has_cst_prewake_bit = 1,
+ .trl_msrs = TRL_BASE | TRL_CORECOUNT,
+ .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
+ .has_fixed_rapl_unit = 1,
+};
+
+static const struct platform_features spr_features = {
+ .has_msr_misc_feature_control = 1,
+ .has_msr_misc_pwr_mgmt = 1,
+ .has_nhm_msrs = 1,
+ .has_config_tdp = 1,
+ .bclk_freq = BCLK_100MHZ,
+ .supported_cstates = CC1 | CC6 | PC2 | PC6,
+ .cst_limit = CST_LIMIT_SKX,
+ .has_msr_core_c1_res = 1,
+ .has_irtl_msrs = 1,
+ .has_cst_prewake_bit = 1,
+ .trl_msrs = TRL_BASE | TRL_CORECOUNT,
+ .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
+};
+
+static const struct platform_features srf_features = {
+ .has_msr_misc_feature_control = 1,
+ .has_msr_misc_pwr_mgmt = 1,
+ .has_nhm_msrs = 1,
+ .has_config_tdp = 1,
+ .bclk_freq = BCLK_100MHZ,
+ .supported_cstates = CC1 | CC6 | PC2 | PC6,
+ .cst_limit = CST_LIMIT_SKX,
+ .has_msr_core_c1_res = 1,
+ .has_msr_module_c6_res_ms = 1,
+ .has_irtl_msrs = 1,
+ .has_cst_prewake_bit = 1,
+ .trl_msrs = TRL_BASE | TRL_CORECOUNT,
+ .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
+};
+
+static const struct platform_features grr_features = {
+ .has_msr_misc_feature_control = 1,
+ .has_msr_misc_pwr_mgmt = 1,
+ .has_nhm_msrs = 1,
+ .has_config_tdp = 1,
+ .bclk_freq = BCLK_100MHZ,
+ .supported_cstates = CC1 | CC6,
+ .cst_limit = CST_LIMIT_SKX,
+ .has_msr_core_c1_res = 1,
+ .has_msr_module_c6_res_ms = 1,
+ .has_irtl_msrs = 1,
+ .has_cst_prewake_bit = 1,
+ .trl_msrs = TRL_BASE | TRL_CORECOUNT,
+ .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
+};
+
+static const struct platform_features slv_features = {
+ .has_nhm_msrs = 1,
+ .bclk_freq = BCLK_SLV,
+ .supported_cstates = CC1 | CC6 | PC6,
+ .cst_limit = CST_LIMIT_SLV,
+ .has_msr_core_c1_res = 1,
+ .has_msr_module_c6_res_ms = 1,
+ .has_msr_c6_demotion_policy_config = 1,
+ .has_msr_atom_pkg_c6_residency = 1,
+ .trl_msrs = TRL_ATOM,
+ .rapl_msrs = RAPL_PKG | RAPL_CORE,
+ .has_rapl_divisor = 1,
+ .rapl_quirk_tdp = 30,
+};
+
+static const struct platform_features slvd_features = {
+ .has_msr_misc_pwr_mgmt = 1,
+ .has_nhm_msrs = 1,
+ .bclk_freq = BCLK_SLV,
+ .supported_cstates = CC1 | CC6 | PC3 | PC6,
+ .cst_limit = CST_LIMIT_SLV,
+ .has_msr_atom_pkg_c6_residency = 1,
+ .trl_msrs = TRL_BASE,
+ .rapl_msrs = RAPL_PKG | RAPL_CORE,
+ .rapl_quirk_tdp = 30,
+};
+
+static const struct platform_features amt_features = {
+ .has_nhm_msrs = 1,
+ .bclk_freq = BCLK_133MHZ,
+ .supported_cstates = CC1 | CC3 | CC6 | PC3 | PC6,
+ .cst_limit = CST_LIMIT_AMT,
+ .trl_msrs = TRL_BASE,
+};
+
+static const struct platform_features gmt_features = {
+ .has_msr_misc_pwr_mgmt = 1,
+ .has_nhm_msrs = 1,
+ .bclk_freq = BCLK_100MHZ,
+ .crystal_freq = 19200000,
+ .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10,
+ .cst_limit = CST_LIMIT_GMT,
+ .has_irtl_msrs = 1,
+ .trl_msrs = TRL_BASE | TRL_CORECOUNT,
+ .rapl_msrs = RAPL_PKG | RAPL_PKG_POWER_INFO,
+};
+
+static const struct platform_features gmtd_features = {
+ .has_msr_misc_pwr_mgmt = 1,
+ .has_nhm_msrs = 1,
+ .bclk_freq = BCLK_100MHZ,
+ .crystal_freq = 25000000,
+ .supported_cstates = CC1 | CC6 | PC2 | PC6,
+ .cst_limit = CST_LIMIT_GMT,
+ .has_irtl_msrs = 1,
+ .has_msr_core_c1_res = 1,
+ .trl_msrs = TRL_BASE | TRL_CORECOUNT,
+ .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_CORE_ENERGY_STATUS,
+};
+
+static const struct platform_features gmtp_features = {
+ .has_msr_misc_pwr_mgmt = 1,
+ .has_nhm_msrs = 1,
+ .bclk_freq = BCLK_100MHZ,
+ .crystal_freq = 19200000,
+ .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10,
+ .cst_limit = CST_LIMIT_GMT,
+ .has_irtl_msrs = 1,
+ .trl_msrs = TRL_BASE,
+ .rapl_msrs = RAPL_PKG | RAPL_PKG_POWER_INFO,
+};
+
+static const struct platform_features tmt_features = {
+ .has_msr_misc_pwr_mgmt = 1,
+ .has_nhm_msrs = 1,
+ .bclk_freq = BCLK_100MHZ,
+ .supported_cstates = CC1 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10,
+ .cst_limit = CST_LIMIT_GMT,
+ .has_irtl_msrs = 1,
+ .trl_msrs = TRL_BASE,
+ .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX,
+ .enable_tsc_tweak = 1,
+};
+
+static const struct platform_features tmtd_features = {
+ .has_msr_misc_pwr_mgmt = 1,
+ .has_nhm_msrs = 1,
+ .bclk_freq = BCLK_100MHZ,
+ .supported_cstates = CC1 | CC6,
+ .cst_limit = CST_LIMIT_GMT,
+ .has_irtl_msrs = 1,
+ .trl_msrs = TRL_BASE | TRL_CORECOUNT,
+ .rapl_msrs = RAPL_PKG_ALL,
+};
+
+static const struct platform_features knl_features = {
+ .has_msr_misc_pwr_mgmt = 1,
+ .has_nhm_msrs = 1,
+ .has_config_tdp = 1,
+ .bclk_freq = BCLK_100MHZ,
+ .supported_cstates = CC1 | CC6 | PC3 | PC6,
+ .cst_limit = CST_LIMIT_KNL,
+ .has_msr_knl_core_c6_residency = 1,
+ .trl_msrs = TRL_KNL,
+ .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
+ .has_fixed_rapl_unit = 1,
+ .need_perf_multiplier = 1,
+};
+
+static const struct platform_features default_features = {
+};
+
+static const struct platform_features amd_features_with_rapl = {
+ .rapl_msrs = RAPL_AMD_F17H,
+ .has_per_core_rapl = 1,
+ .rapl_quirk_tdp = 280, /* This is the max stock TDP of HEDT/Server Fam17h+ chips */
+};
+
+static const struct platform_data turbostat_pdata[] = {
+ { INTEL_FAM6_NEHALEM, &nhm_features },
+ { INTEL_FAM6_NEHALEM_G, &nhm_features },
+ { INTEL_FAM6_NEHALEM_EP, &nhm_features },
+ { INTEL_FAM6_NEHALEM_EX, &nhx_features },
+ { INTEL_FAM6_WESTMERE, &nhm_features },
+ { INTEL_FAM6_WESTMERE_EP, &nhm_features },
+ { INTEL_FAM6_WESTMERE_EX, &nhx_features },
+ { INTEL_FAM6_SANDYBRIDGE, &snb_features },
+ { INTEL_FAM6_SANDYBRIDGE_X, &snx_features },
+ { INTEL_FAM6_IVYBRIDGE, &ivb_features },
+ { INTEL_FAM6_IVYBRIDGE_X, &ivx_features },
+ { INTEL_FAM6_HASWELL, &hsw_features },
+ { INTEL_FAM6_HASWELL_X, &hsx_features },
+ { INTEL_FAM6_HASWELL_L, &hswl_features },
+ { INTEL_FAM6_HASWELL_G, &hswg_features },
+ { INTEL_FAM6_BROADWELL, &bdw_features },
+ { INTEL_FAM6_BROADWELL_G, &bdwg_features },
+ { INTEL_FAM6_BROADWELL_X, &bdx_features },
+ { INTEL_FAM6_BROADWELL_D, &bdx_features },
+ { INTEL_FAM6_SKYLAKE_L, &skl_features },
+ { INTEL_FAM6_SKYLAKE, &skl_features },
+ { INTEL_FAM6_SKYLAKE_X, &skx_features },
+ { INTEL_FAM6_KABYLAKE_L, &skl_features },
+ { INTEL_FAM6_KABYLAKE, &skl_features },
+ { INTEL_FAM6_COMETLAKE, &skl_features },
+ { INTEL_FAM6_COMETLAKE_L, &skl_features },
+ { INTEL_FAM6_CANNONLAKE_L, &cnl_features },
+ { INTEL_FAM6_ICELAKE_X, &icx_features },
+ { INTEL_FAM6_ICELAKE_D, &icx_features },
+ { INTEL_FAM6_ICELAKE_L, &cnl_features },
+ { INTEL_FAM6_ICELAKE_NNPI, &cnl_features },
+ { INTEL_FAM6_ROCKETLAKE, &cnl_features },
+ { INTEL_FAM6_TIGERLAKE_L, &cnl_features },
+ { INTEL_FAM6_TIGERLAKE, &cnl_features },
+ { INTEL_FAM6_SAPPHIRERAPIDS_X, &spr_features },
+ { INTEL_FAM6_EMERALDRAPIDS_X, &spr_features },
+ { INTEL_FAM6_GRANITERAPIDS_X, &spr_features },
+ { INTEL_FAM6_LAKEFIELD, &cnl_features },
+ { INTEL_FAM6_ALDERLAKE, &adl_features },
+ { INTEL_FAM6_ALDERLAKE_L, &adl_features },
+ { INTEL_FAM6_RAPTORLAKE, &adl_features },
+ { INTEL_FAM6_RAPTORLAKE_P, &adl_features },
+ { INTEL_FAM6_RAPTORLAKE_S, &adl_features },
+ { INTEL_FAM6_METEORLAKE, &cnl_features },
+ { INTEL_FAM6_METEORLAKE_L, &cnl_features },
+ { INTEL_FAM6_ARROWLAKE, &cnl_features },
+ { INTEL_FAM6_LUNARLAKE_M, &cnl_features },
+ { INTEL_FAM6_ATOM_SILVERMONT, &slv_features },
+ { INTEL_FAM6_ATOM_SILVERMONT_D, &slvd_features },
+ { INTEL_FAM6_ATOM_AIRMONT, &amt_features },
+ { INTEL_FAM6_ATOM_GOLDMONT, &gmt_features },
+ { INTEL_FAM6_ATOM_GOLDMONT_D, &gmtd_features },
+ { INTEL_FAM6_ATOM_GOLDMONT_PLUS, &gmtp_features },
+ { INTEL_FAM6_ATOM_TREMONT_D, &tmtd_features },
+ { INTEL_FAM6_ATOM_TREMONT, &tmt_features },
+ { INTEL_FAM6_ATOM_TREMONT_L, &tmt_features },
+ { INTEL_FAM6_ATOM_GRACEMONT, &adl_features },
+ { INTEL_FAM6_ATOM_CRESTMONT_X, &srf_features },
+ { INTEL_FAM6_ATOM_CRESTMONT, &grr_features },
+ { INTEL_FAM6_XEON_PHI_KNL, &knl_features },
+ { INTEL_FAM6_XEON_PHI_KNM, &knl_features },
+ /*
+ * Missing support for
+ * INTEL_FAM6_ICELAKE
+ * INTEL_FAM6_ATOM_SILVERMONT_MID
+ * INTEL_FAM6_ATOM_AIRMONT_MID
+ * INTEL_FAM6_ATOM_AIRMONT_NP
+ */
+ { 0, NULL },
+};
+
+static const struct platform_features *platform;
+
+void probe_platform_features(unsigned int family, unsigned int model)
+{
+ int i;
+
+ platform = &default_features;
+
+ if (authentic_amd || hygon_genuine) {
+ if (max_extended_level >= 0x80000007) {
+ unsigned int eax, ebx, ecx, edx;
+
+ __cpuid(0x80000007, eax, ebx, ecx, edx);
+ /* RAPL (Fam 17h+) */
+ if ((edx & (1 << 14)) && family >= 0x17)
+ platform = &amd_features_with_rapl;
+ }
+ return;
+ }
+
+ if (!genuine_intel || family != 6)
+ return;
+
+ for (i = 0; turbostat_pdata[i].features; i++) {
+ if (turbostat_pdata[i].model == model) {
+ platform = turbostat_pdata[i].features;
+ return;
+ }
+ }
+}
+
+/* Model specific support End */
+
#define TJMAX_DEFAULT 100
/* MSRs that are not yet in the kernel-provided header. */
char *progname;
#define CPU_SUBSET_MAXCPUS 1024 /* need to use before probe... */
-cpu_set_t *cpu_present_set, *cpu_affinity_set, *cpu_subset;
-size_t cpu_present_setsize, cpu_affinity_setsize, cpu_subset_size;
+cpu_set_t *cpu_present_set, *cpu_effective_set, *cpu_allowed_set, *cpu_affinity_set, *cpu_subset;
+size_t cpu_present_setsize, cpu_effective_setsize, cpu_allowed_setsize, cpu_affinity_setsize, cpu_subset_size;
#define MAX_ADDED_COUNTERS 8
#define MAX_ADDED_THREAD_COUNTERS 24
#define BITMASK_SIZE 32
unsigned int x2apic_id;
unsigned int flags;
bool is_atom;
-#define CPU_IS_FIRST_THREAD_IN_CORE 0x2
-#define CPU_IS_FIRST_CORE_IN_PACKAGE 0x4
unsigned long long counter[MAX_ADDED_THREAD_COUNTERS];
} *thread_even, *thread_odd;
struct core_data {
+ int base_cpu;
unsigned long long c3;
unsigned long long c6;
unsigned long long c7;
} *core_even, *core_odd;
struct pkg_data {
+ int base_cpu;
unsigned long long pc2;
unsigned long long pc3;
unsigned long long pc6;
switch (idx) {
case IDX_PKG_ENERGY:
- if (do_rapl & RAPL_AMD_F17H)
+ if (platform->rapl_msrs & RAPL_AMD_F17H)
offset = MSR_PKG_ENERGY_STAT;
else
offset = MSR_PKG_ENERGY_STATUS;
{
switch (idx) {
case IDX_PKG_ENERGY:
- return do_rapl & (RAPL_PKG | RAPL_AMD_F17H);
+ return platform->rapl_msrs & (RAPL_PKG | RAPL_AMD_F17H);
case IDX_DRAM_ENERGY:
- return do_rapl & RAPL_DRAM;
+ return platform->rapl_msrs & RAPL_DRAM;
case IDX_PP0_ENERGY:
- return do_rapl & RAPL_CORES_ENERGY_STATUS;
+ return platform->rapl_msrs & RAPL_CORE_ENERGY_STATUS;
case IDX_PP1_ENERGY:
- return do_rapl & RAPL_GFX;
+ return platform->rapl_msrs & RAPL_GFX;
case IDX_PKG_PERF:
- return do_rapl & RAPL_PKG_PERF_STATUS;
+ return platform->rapl_msrs & RAPL_PKG_PERF_STATUS;
case IDX_DRAM_PERF:
- return do_rapl & RAPL_DRAM_PERF_STATUS;
+ return platform->rapl_msrs & RAPL_DRAM_PERF_STATUS;
default:
return 0;
}
int num_die;
int num_cpus;
int num_cores;
+ int allowed_packages;
+ int allowed_cpus;
+ int allowed_cores;
int max_cpu_num;
int max_node_num;
int nodes_per_pkg;
int *irq_column_2_cpu; /* /proc/interrupts column numbers */
int *irqs_per_cpu; /* indexed by cpu_num */
-void setup_all_buffers(void);
+void setup_all_buffers(bool startup);
char *sys_lpi_file;
char *sys_lpi_file_sysfs = "/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us";
return !CPU_ISSET_S(cpu, cpu_present_setsize, cpu_present_set);
}
+int cpu_is_not_allowed(int cpu)
+{
+ return !CPU_ISSET_S(cpu, cpu_allowed_setsize, cpu_allowed_set);
+}
+
/*
* run func(thread, core, package) in topology order
* skip non-present cpus
struct thread_data *t;
struct core_data *c;
struct pkg_data *p;
-
t = GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no);
- if (cpu_is_not_present(t->cpu_id))
+ if (cpu_is_not_allowed(t->cpu_id))
continue;
c = GET_CORE(core_base, core_no, node_no, pkg_no);
return 0;
}
+int is_cpu_first_thread_in_core(struct thread_data *t, struct core_data *c, struct pkg_data *p)
+{
+ UNUSED(p);
+
+ return ((int)t->cpu_id == c->base_cpu || c->base_cpu < 0);
+}
+
+int is_cpu_first_core_in_package(struct thread_data *t, struct core_data *c, struct pkg_data *p)
+{
+ UNUSED(c);
+
+ return ((int)t->cpu_id == p->base_cpu || p->base_cpu < 0);
+}
+
+int is_cpu_first_thread_in_package(struct thread_data *t, struct core_data *c, struct pkg_data *p)
+{
+ return is_cpu_first_thread_in_core(t, c, p) && is_cpu_first_core_in_package(t, c, p);
+}
+
int cpu_migrate(int cpu)
{
CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
if (DO_BIC(BIC_CORE_THROT_CNT))
outp += sprintf(outp, "%sCoreThr", (printed++ ? delim : ""));
- if (do_rapl && !rapl_joules) {
- if (DO_BIC(BIC_CorWatt) && (do_rapl & RAPL_PER_CORE_ENERGY))
+ if (platform->rapl_msrs && !rapl_joules) {
+ if (DO_BIC(BIC_CorWatt) && platform->has_per_core_rapl)
outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : ""));
- } else if (do_rapl && rapl_joules) {
- if (DO_BIC(BIC_Cor_J) && (do_rapl & RAPL_PER_CORE_ENERGY))
+ } else if (platform->rapl_msrs && rapl_joules) {
+ if (DO_BIC(BIC_Cor_J) && platform->has_per_core_rapl)
outp += sprintf(outp, "%sCor_J", (printed++ ? delim : ""));
}
if (DO_BIC(BIC_SYS_LPI))
outp += sprintf(outp, "%sSYS%%LPI", (printed++ ? delim : ""));
- if (do_rapl && !rapl_joules) {
+ if (platform->rapl_msrs && !rapl_joules) {
if (DO_BIC(BIC_PkgWatt))
outp += sprintf(outp, "%sPkgWatt", (printed++ ? delim : ""));
- if (DO_BIC(BIC_CorWatt) && !(do_rapl & RAPL_PER_CORE_ENERGY))
+ if (DO_BIC(BIC_CorWatt) && !platform->has_per_core_rapl)
outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : ""));
if (DO_BIC(BIC_GFXWatt))
outp += sprintf(outp, "%sGFXWatt", (printed++ ? delim : ""));
outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : ""));
if (DO_BIC(BIC_RAM__))
outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : ""));
- } else if (do_rapl && rapl_joules) {
+ } else if (platform->rapl_msrs && rapl_joules) {
if (DO_BIC(BIC_Pkg_J))
outp += sprintf(outp, "%sPkg_J", (printed++ ? delim : ""));
- if (DO_BIC(BIC_Cor_J) && !(do_rapl & RAPL_PER_CORE_ENERGY))
+ if (DO_BIC(BIC_Cor_J) && !platform->has_per_core_rapl)
outp += sprintf(outp, "%sCor_J", (printed++ ? delim : ""));
if (DO_BIC(BIC_GFX_J))
outp += sprintf(outp, "%sGFX_J", (printed++ ? delim : ""));
int printed = 0;
/* if showing only 1st thread in core and this isn't one, bail out */
- if (show_core_only && !(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
+ if (show_core_only && !is_cpu_first_thread_in_core(t, c, p))
return 0;
/* if showing only 1st thread in pkg and this isn't one, bail out */
- if (show_pkg_only && !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
+ if (show_pkg_only && !is_cpu_first_core_in_package(t, c, p))
return 0;
/*if not summary line and --cpu is used */
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->c1 / tsc);
/* print per-core data only for 1st thread in core */
- if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
+ if (!is_cpu_first_thread_in_core(t, c, p))
goto done;
if (DO_BIC(BIC_CPU_c3))
fmt8 = "%s%.2f";
- if (DO_BIC(BIC_CorWatt) && (do_rapl & RAPL_PER_CORE_ENERGY))
+ if (DO_BIC(BIC_CorWatt) && platform->has_per_core_rapl)
outp +=
sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units / interval_float);
- if (DO_BIC(BIC_Cor_J) && (do_rapl & RAPL_PER_CORE_ENERGY))
+ if (DO_BIC(BIC_Cor_J) && platform->has_per_core_rapl)
outp += sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units);
/* print per-package data only for 1st core in package */
- if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
+ if (!is_cpu_first_core_in_package(t, c, p))
goto done;
/* PkgTmp */
outp +=
sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units / interval_float);
- if (DO_BIC(BIC_CorWatt) && !(do_rapl & RAPL_PER_CORE_ENERGY))
+ if (DO_BIC(BIC_CorWatt) && !platform->has_per_core_rapl)
outp +=
sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units / interval_float);
if (DO_BIC(BIC_GFXWatt))
p->energy_dram * rapl_dram_energy_units / interval_float);
if (DO_BIC(BIC_Pkg_J))
outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units);
- if (DO_BIC(BIC_Cor_J) && !(do_rapl & RAPL_PER_CORE_ENERGY))
+ if (DO_BIC(BIC_Cor_J) && !platform->has_per_core_rapl)
outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units);
if (DO_BIC(BIC_GFX_J))
outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units);
int soft_c1_residency_display(int bic)
{
- if (!DO_BIC(BIC_CPU_c1) || use_c1_residency_msr)
+ if (!DO_BIC(BIC_CPU_c1) || platform->has_msr_core_c1_res)
return 0;
return DO_BIC_READ(bic);
old->c1 = new->c1 - old->c1;
- if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || soft_c1_residency_display(BIC_Avg_MHz)) {
+ if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || DO_BIC(BIC_IPC)
+ || soft_c1_residency_display(BIC_Avg_MHz)) {
if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) {
old->aperf = new->aperf - old->aperf;
old->mperf = new->mperf - old->mperf;
}
}
- if (use_c1_residency_msr) {
+ if (platform->has_msr_core_c1_res) {
/*
* Some models have a dedicated C1 residency MSR,
* which should be more accurate than the derivation below.
int retval = 0;
/* calculate core delta only for 1st thread in core */
- if (t->flags & CPU_IS_FIRST_THREAD_IN_CORE)
+ if (is_cpu_first_thread_in_core(t, c, p))
delta_core(c, c2);
/* always calculate thread delta */
return retval;
/* calculate package delta only for 1st core in package */
- if (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)
+ if (is_cpu_first_core_in_package(t, c, p))
retval = delta_package(p, p2);
return retval;
t->irq_count = 0;
t->smi_count = 0;
- /* tells format_counters to dump all fields from this set */
- t->flags = CPU_IS_FIRST_THREAD_IN_CORE | CPU_IS_FIRST_CORE_IN_PACKAGE;
-
c->c3 = 0;
c->c6 = 0;
c->c7 = 0;
}
/* sum per-core values only for 1st thread in core */
- if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
+ if (!is_cpu_first_thread_in_core(t, c, p))
return 0;
average.cores.c3 += c->c3;
}
/* sum per-pkg values only for 1st core in pkg */
- if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
+ if (!is_cpu_first_core_in_package(t, c, p))
return 0;
if (DO_BIC(BIC_Totl_c0))
/* Use the global time delta for the average. */
average.threads.tv_delta = tv_delta;
- average.threads.tsc /= topo.num_cpus;
- average.threads.aperf /= topo.num_cpus;
- average.threads.mperf /= topo.num_cpus;
- average.threads.instr_count /= topo.num_cpus;
- average.threads.c1 /= topo.num_cpus;
+ average.threads.tsc /= topo.allowed_cpus;
+ average.threads.aperf /= topo.allowed_cpus;
+ average.threads.mperf /= topo.allowed_cpus;
+ average.threads.instr_count /= topo.allowed_cpus;
+ average.threads.c1 /= topo.allowed_cpus;
if (average.threads.irq_count > 9999999)
sums_need_wide_columns = 1;
- average.cores.c3 /= topo.num_cores;
- average.cores.c6 /= topo.num_cores;
- average.cores.c7 /= topo.num_cores;
- average.cores.mc6_us /= topo.num_cores;
+ average.cores.c3 /= topo.allowed_cores;
+ average.cores.c6 /= topo.allowed_cores;
+ average.cores.c7 /= topo.allowed_cores;
+ average.cores.mc6_us /= topo.allowed_cores;
if (DO_BIC(BIC_Totl_c0))
- average.packages.pkg_wtd_core_c0 /= topo.num_packages;
+ average.packages.pkg_wtd_core_c0 /= topo.allowed_packages;
if (DO_BIC(BIC_Any_c0))
- average.packages.pkg_any_core_c0 /= topo.num_packages;
+ average.packages.pkg_any_core_c0 /= topo.allowed_packages;
if (DO_BIC(BIC_GFX_c0))
- average.packages.pkg_any_gfxe_c0 /= topo.num_packages;
+ average.packages.pkg_any_gfxe_c0 /= topo.allowed_packages;
if (DO_BIC(BIC_CPUGFX))
- average.packages.pkg_both_core_gfxe_c0 /= topo.num_packages;
+ average.packages.pkg_both_core_gfxe_c0 /= topo.allowed_packages;
- average.packages.pc2 /= topo.num_packages;
+ average.packages.pc2 /= topo.allowed_packages;
if (DO_BIC(BIC_Pkgpc3))
- average.packages.pc3 /= topo.num_packages;
+ average.packages.pc3 /= topo.allowed_packages;
if (DO_BIC(BIC_Pkgpc6))
- average.packages.pc6 /= topo.num_packages;
+ average.packages.pc6 /= topo.allowed_packages;
if (DO_BIC(BIC_Pkgpc7))
- average.packages.pc7 /= topo.num_packages;
+ average.packages.pc7 /= topo.allowed_packages;
- average.packages.pc8 /= topo.num_packages;
- average.packages.pc9 /= topo.num_packages;
- average.packages.pc10 /= topo.num_packages;
+ average.packages.pc8 /= topo.allowed_packages;
+ average.packages.pc9 /= topo.allowed_packages;
+ average.packages.pc10 /= topo.allowed_packages;
for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
if (mp->format == FORMAT_RAW)
sums_need_wide_columns = 1;
continue;
}
- average.threads.counter[i] /= topo.num_cpus;
+ average.threads.counter[i] /= topo.allowed_cpus;
}
for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
if (mp->format == FORMAT_RAW)
if (average.cores.counter[i] > 9999999)
sums_need_wide_columns = 1;
}
- average.cores.counter[i] /= topo.num_cores;
+ average.cores.counter[i] /= topo.allowed_cores;
}
for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
if (mp->format == FORMAT_RAW)
if (average.packages.counter[i] > 9999999)
sums_need_wide_columns = 1;
}
- average.packages.counter[i] /= topo.num_packages;
+ average.packages.counter[i] /= topo.allowed_packages;
}
}
retry:
t->tsc = rdtsc(); /* we are running on local CPU of interest */
- if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || soft_c1_residency_display(BIC_Avg_MHz)) {
+ if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || DO_BIC(BIC_IPC)
+ || soft_c1_residency_display(BIC_Avg_MHz)) {
unsigned long long tsc_before, tsc_between, tsc_after, aperf_time, mperf_time;
/*
return -5;
t->smi_count = msr & 0xFFFFFFFF;
}
- if (DO_BIC(BIC_CPU_c1) && use_c1_residency_msr) {
+ if (DO_BIC(BIC_CPU_c1) && platform->has_msr_core_c1_res) {
if (get_msr(cpu, MSR_CORE_C1_RES, &t->c1))
return -6;
}
}
/* collect core counters only for 1st thread in core */
- if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
+ if (!is_cpu_first_thread_in_core(t, c, p))
goto done;
if (DO_BIC(BIC_CPU_c3) || soft_c1_residency_display(BIC_CPU_c3)) {
return -6;
}
- if ((DO_BIC(BIC_CPU_c6) || soft_c1_residency_display(BIC_CPU_c6)) && !do_knl_cstates) {
+ if ((DO_BIC(BIC_CPU_c6) || soft_c1_residency_display(BIC_CPU_c6)) && !platform->has_msr_knl_core_c6_residency) {
if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6))
return -7;
- } else if (do_knl_cstates || soft_c1_residency_display(BIC_CPU_c6)) {
+ } else if (platform->has_msr_knl_core_c6_residency && soft_c1_residency_display(BIC_CPU_c6)) {
if (get_msr(cpu, MSR_KNL_CORE_C6_RESIDENCY, &c->c6))
return -7;
}
if (DO_BIC(BIC_CORE_THROT_CNT))
get_core_throt_cnt(cpu, &c->core_throt_cnt);
- if (do_rapl & RAPL_AMD_F17H) {
+ if (platform->rapl_msrs & RAPL_AMD_F17H) {
if (get_msr(cpu, MSR_CORE_ENERGY_STAT, &msr))
return -14;
c->core_energy = msr & 0xFFFFFFFF;
}
/* collect package counters only for 1st core in package */
- if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
+ if (!is_cpu_first_core_in_package(t, c, p))
goto done;
if (DO_BIC(BIC_Totl_c0)) {
if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3))
return -9;
if (DO_BIC(BIC_Pkgpc6)) {
- if (do_slm_cstates) {
+ if (platform->has_msr_atom_pkg_c6_residency) {
if (get_msr(cpu, MSR_ATOM_PKG_C6_RESIDENCY, &p->pc6))
return -10;
} else {
if (DO_BIC(BIC_SYS_LPI))
p->sys_lpi = cpuidle_cur_sys_lpi_us;
- if (do_rapl & RAPL_PKG) {
+ if (platform->rapl_msrs & RAPL_PKG) {
if (get_msr_sum(cpu, MSR_PKG_ENERGY_STATUS, &msr))
return -13;
p->energy_pkg = msr;
}
- if (do_rapl & RAPL_CORES_ENERGY_STATUS) {
+ if (platform->rapl_msrs & RAPL_CORE_ENERGY_STATUS) {
if (get_msr_sum(cpu, MSR_PP0_ENERGY_STATUS, &msr))
return -14;
p->energy_cores = msr;
}
- if (do_rapl & RAPL_DRAM) {
+ if (platform->rapl_msrs & RAPL_DRAM) {
if (get_msr_sum(cpu, MSR_DRAM_ENERGY_STATUS, &msr))
return -15;
p->energy_dram = msr;
}
- if (do_rapl & RAPL_GFX) {
+ if (platform->rapl_msrs & RAPL_GFX) {
if (get_msr_sum(cpu, MSR_PP1_ENERGY_STATUS, &msr))
return -16;
p->energy_gfx = msr;
}
- if (do_rapl & RAPL_PKG_PERF_STATUS) {
+ if (platform->rapl_msrs & RAPL_PKG_PERF_STATUS) {
if (get_msr_sum(cpu, MSR_PKG_PERF_STATUS, &msr))
return -16;
p->rapl_pkg_perf_status = msr;
}
- if (do_rapl & RAPL_DRAM_PERF_STATUS) {
+ if (platform->rapl_msrs & RAPL_DRAM_PERF_STATUS) {
if (get_msr_sum(cpu, MSR_DRAM_PERF_STATUS, &msr))
return -16;
p->rapl_dram_perf_status = msr;
}
- if (do_rapl & RAPL_AMD_F17H) {
+ if (platform->rapl_msrs & RAPL_AMD_F17H) {
if (get_msr_sum(cpu, MSR_PKG_ENERGY_STAT, &msr))
return -13;
p->energy_pkg = msr;
PCLRSV, PCLRSV
};
-static void calculate_tsc_tweak()
+void probe_cst_limit(void)
{
- tsc_tweak = base_hz / tsc_hz;
-}
+ unsigned long long msr;
+ int *pkg_cstate_limits;
+
+ if (!platform->has_nhm_msrs)
+ return;
+
+ switch (platform->cst_limit) {
+ case CST_LIMIT_NHM:
+ pkg_cstate_limits = nhm_pkg_cstate_limits;
+ break;
+ case CST_LIMIT_SNB:
+ pkg_cstate_limits = snb_pkg_cstate_limits;
+ break;
+ case CST_LIMIT_HSW:
+ pkg_cstate_limits = hsw_pkg_cstate_limits;
+ break;
+ case CST_LIMIT_SKX:
+ pkg_cstate_limits = skx_pkg_cstate_limits;
+ break;
+ case CST_LIMIT_ICX:
+ pkg_cstate_limits = icx_pkg_cstate_limits;
+ break;
+ case CST_LIMIT_SLV:
+ pkg_cstate_limits = slv_pkg_cstate_limits;
+ break;
+ case CST_LIMIT_AMT:
+ pkg_cstate_limits = amt_pkg_cstate_limits;
+ break;
+ case CST_LIMIT_KNL:
+ pkg_cstate_limits = phi_pkg_cstate_limits;
+ break;
+ case CST_LIMIT_GMT:
+ pkg_cstate_limits = glm_pkg_cstate_limits;
+ break;
+ default:
+ return;
+ }
-void prewake_cstate_probe(unsigned int family, unsigned int model);
+ get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
+ pkg_cstate_limit = pkg_cstate_limits[msr & 0xF];
+}
-static void dump_nhm_platform_info(void)
+static void dump_platform_info(void)
{
unsigned long long msr;
unsigned int ratio;
+ if (!platform->has_nhm_msrs)
+ return;
+
get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
fprintf(outf, "cpu%d: MSR_PLATFORM_INFO: 0x%08llx\n", base_cpu, msr);
ratio = (msr >> 8) & 0xFF;
fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n", ratio, bclk, ratio * bclk);
+}
+
+static void dump_power_ctl(void)
+{
+ unsigned long long msr;
+
+ if (!platform->has_nhm_msrs)
+ return;
get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr);
fprintf(outf, "cpu%d: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n",
base_cpu, msr, msr & 0x2 ? "EN" : "DIS");
/* C-state Pre-wake Disable (CSTATE_PREWAKE_DISABLE) */
- if (dis_cstate_prewake)
+ if (platform->has_cst_prewake_bit)
fprintf(outf, "C-state Pre-wake: %sabled\n", msr & 0x40000000 ? "DIS" : "EN");
return;
}
-static void dump_hsw_turbo_ratio_limits(void)
+static void dump_turbo_ratio_limit2(void)
{
unsigned long long msr;
unsigned int ratio;
return;
}
-static void dump_ivt_turbo_ratio_limits(void)
+static void dump_turbo_ratio_limit1(void)
{
unsigned long long msr;
unsigned int ratio;
return;
}
-int has_turbo_ratio_group_limits(int family, int model)
-{
-
- if (!genuine_intel)
- return 0;
-
- if (family != 6)
- return 0;
-
- switch (model) {
- case INTEL_FAM6_ATOM_GOLDMONT:
- case INTEL_FAM6_SKYLAKE_X:
- case INTEL_FAM6_ICELAKE_X:
- case INTEL_FAM6_SAPPHIRERAPIDS_X:
- case INTEL_FAM6_ATOM_GOLDMONT_D:
- case INTEL_FAM6_ATOM_TREMONT_D:
- return 1;
- default:
- return 0;
- }
-}
-
-static void dump_turbo_ratio_limits(int trl_msr_offset, int family, int model)
+static void dump_turbo_ratio_limits(int trl_msr_offset)
{
unsigned long long msr, core_counts;
int shift;
fprintf(outf, "cpu%d: MSR_%sTURBO_RATIO_LIMIT: 0x%08llx\n",
base_cpu, trl_msr_offset == MSR_SECONDARY_TURBO_RATIO_LIMIT ? "SECONDARY_" : "", msr);
- if (has_turbo_ratio_group_limits(family, model)) {
+ if (platform->trl_msrs & TRL_CORECOUNT) {
get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &core_counts);
fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, core_counts);
} else {
ratio[i], bclk, ratio[i] * bclk, cores[i]);
}
-static void dump_nhm_cst_cfg(void)
+static void dump_cst_cfg(void)
{
unsigned long long msr;
+ if (!platform->has_nhm_msrs)
+ return;
+
get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
fprintf(outf, "cpu%d: MSR_PKG_CST_CONFIG_CONTROL: 0x%08llx", base_cpu, msr);
(msr & (1 << 15)) ? "" : "UN", (unsigned int)msr & 0xF, pkg_cstate_limit_strings[pkg_cstate_limit]);
#define AUTOMATIC_CSTATE_CONVERSION (1UL << 16)
- if (has_automatic_cstate_conversion) {
+ if (platform->has_cst_auto_convension) {
fprintf(outf, ", automatic c-state conversion=%s", (msr & AUTOMATIC_CSTATE_CONVERSION) ? "on" : "off");
}
{
unsigned long long msr;
- get_msr(base_cpu, MSR_PKGC3_IRTL, &msr);
- fprintf(outf, "cpu%d: MSR_PKGC3_IRTL: 0x%08llx (", base_cpu, msr);
- fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
- (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
-
- get_msr(base_cpu, MSR_PKGC6_IRTL, &msr);
- fprintf(outf, "cpu%d: MSR_PKGC6_IRTL: 0x%08llx (", base_cpu, msr);
- fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
- (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
+ if (!platform->has_irtl_msrs)
+ return;
- get_msr(base_cpu, MSR_PKGC7_IRTL, &msr);
- fprintf(outf, "cpu%d: MSR_PKGC7_IRTL: 0x%08llx (", base_cpu, msr);
- fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
- (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
+ if (platform->supported_cstates & PC3) {
+ get_msr(base_cpu, MSR_PKGC3_IRTL, &msr);
+ fprintf(outf, "cpu%d: MSR_PKGC3_IRTL: 0x%08llx (", base_cpu, msr);
+ fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
+ (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
+ }
- if (!do_irtl_hsw)
- return;
+ if (platform->supported_cstates & PC6) {
+ get_msr(base_cpu, MSR_PKGC6_IRTL, &msr);
+ fprintf(outf, "cpu%d: MSR_PKGC6_IRTL: 0x%08llx (", base_cpu, msr);
+ fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
+ (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
+ }
- get_msr(base_cpu, MSR_PKGC8_IRTL, &msr);
- fprintf(outf, "cpu%d: MSR_PKGC8_IRTL: 0x%08llx (", base_cpu, msr);
- fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
- (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
+ if (platform->supported_cstates & PC7) {
+ get_msr(base_cpu, MSR_PKGC7_IRTL, &msr);
+ fprintf(outf, "cpu%d: MSR_PKGC7_IRTL: 0x%08llx (", base_cpu, msr);
+ fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
+ (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
+ }
- get_msr(base_cpu, MSR_PKGC9_IRTL, &msr);
- fprintf(outf, "cpu%d: MSR_PKGC9_IRTL: 0x%08llx (", base_cpu, msr);
- fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
- (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
+ if (platform->supported_cstates & PC8) {
+ get_msr(base_cpu, MSR_PKGC8_IRTL, &msr);
+ fprintf(outf, "cpu%d: MSR_PKGC8_IRTL: 0x%08llx (", base_cpu, msr);
+ fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
+ (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
+ }
- get_msr(base_cpu, MSR_PKGC10_IRTL, &msr);
- fprintf(outf, "cpu%d: MSR_PKGC10_IRTL: 0x%08llx (", base_cpu, msr);
- fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
- (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
+ if (platform->supported_cstates & PC9) {
+ get_msr(base_cpu, MSR_PKGC9_IRTL, &msr);
+ fprintf(outf, "cpu%d: MSR_PKGC9_IRTL: 0x%08llx (", base_cpu, msr);
+ fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
+ (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
+ }
+ if (platform->supported_cstates & PC10) {
+ get_msr(base_cpu, MSR_PKGC10_IRTL, &msr);
+ fprintf(outf, "cpu%d: MSR_PKGC10_IRTL: 0x%08llx (", base_cpu, msr);
+ fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
+ (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
+ }
}
void free_fd_percpu(void)
cpu_present_set = NULL;
cpu_present_setsize = 0;
+ CPU_FREE(cpu_effective_set);
+ cpu_effective_set = NULL;
+ cpu_effective_setsize = 0;
+
+ CPU_FREE(cpu_allowed_set);
+ cpu_allowed_set = NULL;
+ cpu_allowed_setsize = 0;
+
CPU_FREE(cpu_affinity_set);
cpu_affinity_set = NULL;
cpu_affinity_setsize = 0;
return -1;
}
-int get_thread_siblings(struct cpu_topology *thiscpu)
+static int parse_cpu_str(char *cpu_str, cpu_set_t *cpu_set, int cpu_set_size)
{
- char path[80], character;
- FILE *filep;
- unsigned long map;
- int so, shift, sib_core;
- int cpu = thiscpu->logical_cpu_id;
- int offset = topo.max_cpu_num + 1;
- size_t size;
- int thread_id = 0;
+ unsigned int start, end;
+ char *next = cpu_str;
- thiscpu->put_ids = CPU_ALLOC((topo.max_cpu_num + 1));
- if (thiscpu->thread_id < 0)
- thiscpu->thread_id = thread_id++;
- if (!thiscpu->put_ids)
- return -1;
+ while (next && *next) {
- size = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
- CPU_ZERO_S(size, thiscpu->put_ids);
+ if (*next == '-') /* no negative cpu numbers */
+ return 1;
- sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", cpu);
- filep = fopen(path, "r");
+ start = strtoul(next, &next, 10);
- if (!filep) {
- warnx("%s: open failed", path);
- return -1;
- }
- do {
- offset -= BITMASK_SIZE;
- if (fscanf(filep, "%lx%c", &map, &character) != 2)
- err(1, "%s: failed to parse file", path);
- for (shift = 0; shift < BITMASK_SIZE; shift++) {
- if ((map >> shift) & 0x1) {
- so = shift + offset;
- sib_core = get_core_id(so);
- if (sib_core == thiscpu->physical_core_id) {
- CPU_SET_S(so, size, thiscpu->put_ids);
- if ((so != cpu) && (cpus[so].thread_id < 0))
- cpus[so].thread_id = thread_id++;
- }
- }
- }
- } while (character == ',');
+ if (start >= CPU_SUBSET_MAXCPUS)
+ return 1;
+ CPU_SET_S(start, cpu_set_size, cpu_set);
+
+ if (*next == '\0' || *next == '\n')
+ break;
+
+ if (*next == ',') {
+ next += 1;
+ continue;
+ }
+
+ if (*next == '-') {
+ next += 1; /* start range */
+ } else if (*next == '.') {
+ next += 1;
+ if (*next == '.')
+ next += 1; /* start range */
+ else
+ return 1;
+ }
+
+ end = strtoul(next, &next, 10);
+ if (end <= start)
+ return 1;
+
+ while (++start <= end) {
+ if (start >= CPU_SUBSET_MAXCPUS)
+ return 1;
+ CPU_SET_S(start, cpu_set_size, cpu_set);
+ }
+
+ if (*next == ',')
+ next += 1;
+ else if (*next != '\0' && *next != '\n')
+ return 1;
+ }
+
+ return 0;
+}
+
+int get_thread_siblings(struct cpu_topology *thiscpu)
+{
+ char path[80], character;
+ FILE *filep;
+ unsigned long map;
+ int so, shift, sib_core;
+ int cpu = thiscpu->logical_cpu_id;
+ int offset = topo.max_cpu_num + 1;
+ size_t size;
+ int thread_id = 0;
+
+ thiscpu->put_ids = CPU_ALLOC((topo.max_cpu_num + 1));
+ if (thiscpu->thread_id < 0)
+ thiscpu->thread_id = thread_id++;
+ if (!thiscpu->put_ids)
+ return -1;
+
+ size = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
+ CPU_ZERO_S(size, thiscpu->put_ids);
+
+ sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", cpu);
+ filep = fopen(path, "r");
+
+ if (!filep) {
+ warnx("%s: open failed", path);
+ return -1;
+ }
+ do {
+ offset -= BITMASK_SIZE;
+ if (fscanf(filep, "%lx%c", &map, &character) != 2)
+ err(1, "%s: failed to parse file", path);
+ for (shift = 0; shift < BITMASK_SIZE; shift++) {
+ if ((map >> shift) & 0x1) {
+ so = shift + offset;
+ sib_core = get_core_id(so);
+ if (sib_core == thiscpu->physical_core_id) {
+ CPU_SET_S(so, size, thiscpu->put_ids);
+ if ((so != cpu) && (cpus[so].thread_id < 0))
+ cpus[so].thread_id = thread_id++;
+ }
+ }
+ }
+ } while (character == ',');
fclose(filep);
return CPU_COUNT_S(size, thiscpu->put_ids);
t = GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no);
- if (cpu_is_not_present(t->cpu_id))
+ if (cpu_is_not_allowed(t->cpu_id))
continue;
t2 = GET_THREAD(thread_base2, thread_no, core_no, node_no, pkg_no);
return 0;
}
+#define PATH_EFFECTIVE_CPUS "/sys/fs/cgroup/cpuset.cpus.effective"
+
+static char cpu_effective_str[1024];
+
+static int update_effective_str(bool startup)
+{
+ FILE *fp;
+ char *pos;
+ char buf[1024];
+ int ret;
+
+ if (cpu_effective_str[0] == '\0' && !startup)
+ return 0;
+
+ fp = fopen(PATH_EFFECTIVE_CPUS, "r");
+ if (!fp)
+ return 0;
+
+ pos = fgets(buf, 1024, fp);
+ if (!pos)
+ err(1, "%s: file read failed\n", PATH_EFFECTIVE_CPUS);
+
+ fclose(fp);
+
+ ret = strncmp(cpu_effective_str, buf, 1024);
+ if (!ret)
+ return 0;
+
+ strncpy(cpu_effective_str, buf, 1024);
+ return 1;
+}
+
+static void update_effective_set(bool startup)
+{
+ update_effective_str(startup);
+
+ if (parse_cpu_str(cpu_effective_str, cpu_effective_set, cpu_effective_setsize))
+ err(1, "%s: cpu str malformat %s\n", PATH_EFFECTIVE_CPUS, cpu_effective_str);
+}
+
void re_initialize(void)
{
free_all_buffers();
- setup_all_buffers();
- fprintf(outf, "turbostat: re-initialized with num_cpus %d\n", topo.num_cpus);
+ setup_all_buffers(false);
+ fprintf(outf, "turbostat: re-initialized with num_cpus %d, allowed_cpus %d\n", topo.num_cpus, topo.allowed_cpus);
}
void set_max_cpu_num(void)
/*
* snapshot_gfx_mhz()
*
- * record snapshot of
- * /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz
+ * fall back to /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz
+ * when /sys/class/drm/card0/gt_cur_freq_mhz is not available.
*
* return 1 if config change requires a restart, else return 0
*/
static FILE *fp;
int retval;
- if (fp == NULL)
- fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", "r");
- else {
+ if (fp == NULL) {
+ fp = fopen("/sys/class/drm/card0/gt_cur_freq_mhz", "r");
+ if (!fp)
+ fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", "r");
+ } else {
rewind(fp);
fflush(fp);
}
/*
* snapshot_gfx_cur_mhz()
*
- * record snapshot of
- * /sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz
+ * fall back to /sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz
+ * when /sys/class/drm/card0/gt_act_freq_mhz is not available.
*
* return 1 if config change requires a restart, else return 0
*/
static FILE *fp;
int retval;
- if (fp == NULL)
- fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", "r");
- else {
+ if (fp == NULL) {
+ fp = fopen("/sys/class/drm/card0/gt_act_freq_mhz", "r");
+ if (!fp)
+ fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", "r");
+ } else {
rewind(fp);
fflush(fp);
}
re_initialize();
goto restart;
}
+ if (update_effective_str(false)) {
+ re_initialize();
+ goto restart;
+ }
do_sleep();
if (snapshot_proc_sysfs_files())
goto restart;
exit(-6);
}
-/*
- * NHM adds support for additional MSRs:
- *
- * MSR_SMI_COUNT 0x00000034
- *
- * MSR_PLATFORM_INFO 0x000000ce
- * MSR_PKG_CST_CONFIG_CONTROL 0x000000e2
- *
- * MSR_MISC_PWR_MGMT 0x000001aa
- *
- * MSR_PKG_C3_RESIDENCY 0x000003f8
- * MSR_PKG_C6_RESIDENCY 0x000003f9
- * MSR_CORE_C3_RESIDENCY 0x000003fc
- * MSR_CORE_C6_RESIDENCY 0x000003fd
- *
- * Side effect:
- * sets global pkg_cstate_limit to decode MSR_PKG_CST_CONFIG_CONTROL
- * sets has_misc_feature_control
- */
-int probe_nhm_msrs(unsigned int family, unsigned int model)
+void probe_bclk(void)
{
unsigned long long msr;
unsigned int base_ratio;
- int *pkg_cstate_limits;
- if (!genuine_intel)
- return 0;
-
- if (family != 6)
- return 0;
-
- bclk = discover_bclk(family, model);
+ if (!platform->has_nhm_msrs)
+ return;
- switch (model) {
- case INTEL_FAM6_NEHALEM: /* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */
- case INTEL_FAM6_NEHALEM_EX: /* Nehalem-EX Xeon - Beckton */
- pkg_cstate_limits = nhm_pkg_cstate_limits;
- break;
- case INTEL_FAM6_SANDYBRIDGE: /* SNB */
- case INTEL_FAM6_SANDYBRIDGE_X: /* SNB Xeon */
- case INTEL_FAM6_IVYBRIDGE: /* IVB */
- case INTEL_FAM6_IVYBRIDGE_X: /* IVB Xeon */
- pkg_cstate_limits = snb_pkg_cstate_limits;
- has_misc_feature_control = 1;
- break;
- case INTEL_FAM6_HASWELL: /* HSW */
- case INTEL_FAM6_HASWELL_G: /* HSW */
- case INTEL_FAM6_HASWELL_X: /* HSX */
- case INTEL_FAM6_HASWELL_L: /* HSW */
- case INTEL_FAM6_BROADWELL: /* BDW */
- case INTEL_FAM6_BROADWELL_G: /* BDW */
- case INTEL_FAM6_BROADWELL_X: /* BDX */
- case INTEL_FAM6_SKYLAKE_L: /* SKL */
- case INTEL_FAM6_CANNONLAKE_L: /* CNL */
- pkg_cstate_limits = hsw_pkg_cstate_limits;
- has_misc_feature_control = 1;
- break;
- case INTEL_FAM6_SKYLAKE_X: /* SKX */
- case INTEL_FAM6_SAPPHIRERAPIDS_X: /* SPR */
- pkg_cstate_limits = skx_pkg_cstate_limits;
- has_misc_feature_control = 1;
- break;
- case INTEL_FAM6_ICELAKE_X: /* ICX */
- pkg_cstate_limits = icx_pkg_cstate_limits;
- has_misc_feature_control = 1;
- break;
- case INTEL_FAM6_ATOM_SILVERMONT: /* BYT */
- no_MSR_MISC_PWR_MGMT = 1;
- /* FALLTHRU */
- case INTEL_FAM6_ATOM_SILVERMONT_D: /* AVN */
- pkg_cstate_limits = slv_pkg_cstate_limits;
- break;
- case INTEL_FAM6_ATOM_AIRMONT: /* AMT */
- pkg_cstate_limits = amt_pkg_cstate_limits;
- no_MSR_MISC_PWR_MGMT = 1;
- break;
- case INTEL_FAM6_XEON_PHI_KNL: /* PHI */
- pkg_cstate_limits = phi_pkg_cstate_limits;
- break;
- case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */
- case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
- case INTEL_FAM6_ATOM_GOLDMONT_D: /* DNV */
- case INTEL_FAM6_ATOM_TREMONT: /* EHL */
- case INTEL_FAM6_ATOM_TREMONT_D: /* JVL */
- pkg_cstate_limits = glm_pkg_cstate_limits;
- break;
- default:
- return 0;
- }
- get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
- pkg_cstate_limit = pkg_cstate_limits[msr & 0xF];
+ if (platform->bclk_freq == BCLK_100MHZ)
+ bclk = 100.00;
+ else if (platform->bclk_freq == BCLK_133MHZ)
+ bclk = 133.33;
+ else if (platform->bclk_freq == BCLK_SLV)
+ bclk = slm_bclk();
+ else
+ return;
get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
base_ratio = (msr >> 8) & 0xFF;
base_hz = base_ratio * bclk * 1000000;
has_base_hz = 1;
- return 1;
-}
-/*
- * SLV client has support for unique MSRs:
- *
- * MSR_CC6_DEMOTION_POLICY_CONFIG
- * MSR_MC6_DEMOTION_POLICY_CONFIG
- */
+ if (platform->enable_tsc_tweak)
+ tsc_tweak = base_hz / tsc_hz;
+}
-int has_slv_msrs(unsigned int family, unsigned int model)
+static void remove_underbar(char *s)
{
- if (!genuine_intel)
- return 0;
-
- if (family != 6)
- return 0;
+ char *to = s;
- switch (model) {
- case INTEL_FAM6_ATOM_SILVERMONT:
- case INTEL_FAM6_ATOM_SILVERMONT_MID:
- case INTEL_FAM6_ATOM_AIRMONT_MID:
- return 1;
+ while (*s) {
+ if (*s != '_')
+ *to++ = *s;
+ s++;
}
- return 0;
+
+ *to = 0;
}
-int is_dnv(unsigned int family, unsigned int model)
+static void dump_turbo_ratio_info(void)
{
+ if (!has_turbo)
+ return;
- if (!genuine_intel)
- return 0;
+ if (!platform->has_nhm_msrs)
+ return;
- if (family != 6)
- return 0;
+ if (platform->trl_msrs & TRL_LIMIT2)
+ dump_turbo_ratio_limit2();
- switch (model) {
- case INTEL_FAM6_ATOM_GOLDMONT_D:
- return 1;
- }
- return 0;
-}
+ if (platform->trl_msrs & TRL_LIMIT1)
+ dump_turbo_ratio_limit1();
-int is_bdx(unsigned int family, unsigned int model)
-{
+ if (platform->trl_msrs & TRL_BASE) {
+ dump_turbo_ratio_limits(MSR_TURBO_RATIO_LIMIT);
- if (!genuine_intel)
- return 0;
+ if (is_hybrid)
+ dump_turbo_ratio_limits(MSR_SECONDARY_TURBO_RATIO_LIMIT);
+ }
- if (family != 6)
- return 0;
+ if (platform->trl_msrs & TRL_ATOM)
+ dump_atom_turbo_ratio_limits();
- switch (model) {
- case INTEL_FAM6_BROADWELL_X:
- return 1;
- }
- return 0;
+ if (platform->trl_msrs & TRL_KNL)
+ dump_knl_turbo_ratio_limits();
+
+ if (platform->has_config_tdp)
+ dump_config_tdp();
}
-int is_skx(unsigned int family, unsigned int model)
+static int read_sysfs_int(char *path)
{
+ FILE *input;
+ int retval = -1;
- if (!genuine_intel)
- return 0;
-
- if (family != 6)
- return 0;
-
- switch (model) {
- case INTEL_FAM6_SKYLAKE_X:
- return 1;
+ input = fopen(path, "r");
+ if (input == NULL) {
+ if (debug)
+ fprintf(outf, "NSFOD %s\n", path);
+ return (-1);
}
- return 0;
+ if (fscanf(input, "%d", &retval) != 1)
+ err(1, "%s: failed to read int from file", path);
+ fclose(input);
+
+ return (retval);
}
-int is_icx(unsigned int family, unsigned int model)
+static void dump_sysfs_file(char *path)
{
+ FILE *input;
+ char cpuidle_buf[64];
- if (!genuine_intel)
- return 0;
-
- if (family != 6)
- return 0;
-
- switch (model) {
- case INTEL_FAM6_ICELAKE_X:
- return 1;
+ input = fopen(path, "r");
+ if (input == NULL) {
+ if (debug)
+ fprintf(outf, "NSFOD %s\n", path);
+ return;
}
- return 0;
+ if (!fgets(cpuidle_buf, sizeof(cpuidle_buf), input))
+ err(1, "%s: failed to read file", path);
+ fclose(input);
+
+ fprintf(outf, "%s: %s", strrchr(path, '/') + 1, cpuidle_buf);
}
-int is_spr(unsigned int family, unsigned int model)
+static void probe_intel_uncore_frequency(void)
{
+ int i, j;
+ char path[128];
if (!genuine_intel)
- return 0;
+ return;
- if (family != 6)
- return 0;
+ if (access("/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00", R_OK))
+ return;
- switch (model) {
- case INTEL_FAM6_SAPPHIRERAPIDS_X:
- return 1;
- }
- return 0;
-}
+ /* Cluster level sysfs not supported yet. */
+ if (!access("/sys/devices/system/cpu/intel_uncore_frequency/uncore00", R_OK))
+ return;
-int is_ehl(unsigned int family, unsigned int model)
-{
- if (!genuine_intel)
- return 0;
+ if (!access("/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00/current_freq_khz", R_OK))
+ BIC_PRESENT(BIC_UNCORE_MHZ);
- if (family != 6)
- return 0;
-
- switch (model) {
- case INTEL_FAM6_ATOM_TREMONT:
- return 1;
- }
- return 0;
-}
-
-int is_jvl(unsigned int family, unsigned int model)
-{
- if (!genuine_intel)
- return 0;
-
- if (family != 6)
- return 0;
-
- switch (model) {
- case INTEL_FAM6_ATOM_TREMONT_D:
- return 1;
- }
- return 0;
-}
-
-int has_turbo_ratio_limit(unsigned int family, unsigned int model)
-{
- if (has_slv_msrs(family, model))
- return 0;
-
- if (family != 6)
- return 0;
-
- switch (model) {
- /* Nehalem compatible, but do not include turbo-ratio limit support */
- case INTEL_FAM6_NEHALEM_EX: /* Nehalem-EX Xeon - Beckton */
- case INTEL_FAM6_XEON_PHI_KNL: /* PHI - Knights Landing (different MSR definition) */
- return 0;
- default:
- return 1;
- }
-}
-
-int has_atom_turbo_ratio_limit(unsigned int family, unsigned int model)
-{
- if (has_slv_msrs(family, model))
- return 1;
-
- return 0;
-}
-
-int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model)
-{
- if (!genuine_intel)
- return 0;
-
- if (family != 6)
- return 0;
-
- switch (model) {
- case INTEL_FAM6_IVYBRIDGE_X: /* IVB Xeon */
- case INTEL_FAM6_HASWELL_X: /* HSW Xeon */
- return 1;
- default:
- return 0;
- }
-}
-
-int has_hsw_turbo_ratio_limit(unsigned int family, unsigned int model)
-{
- if (!genuine_intel)
- return 0;
-
- if (family != 6)
- return 0;
-
- switch (model) {
- case INTEL_FAM6_HASWELL_X: /* HSW Xeon */
- return 1;
- default:
- return 0;
- }
-}
-
-int has_knl_turbo_ratio_limit(unsigned int family, unsigned int model)
-{
- if (!genuine_intel)
- return 0;
-
- if (family != 6)
- return 0;
-
- switch (model) {
- case INTEL_FAM6_XEON_PHI_KNL: /* Knights Landing */
- return 1;
- default:
- return 0;
- }
-}
-
-int has_glm_turbo_ratio_limit(unsigned int family, unsigned int model)
-{
- if (!genuine_intel)
- return 0;
-
- if (family != 6)
- return 0;
-
- switch (model) {
- case INTEL_FAM6_ATOM_GOLDMONT:
- case INTEL_FAM6_SKYLAKE_X:
- case INTEL_FAM6_ICELAKE_X:
- case INTEL_FAM6_SAPPHIRERAPIDS_X:
- return 1;
- default:
- return 0;
- }
-}
-
-int has_config_tdp(unsigned int family, unsigned int model)
-{
- if (!genuine_intel)
- return 0;
-
- if (family != 6)
- return 0;
-
- switch (model) {
- case INTEL_FAM6_IVYBRIDGE: /* IVB */
- case INTEL_FAM6_HASWELL: /* HSW */
- case INTEL_FAM6_HASWELL_X: /* HSX */
- case INTEL_FAM6_HASWELL_L: /* HSW */
- case INTEL_FAM6_HASWELL_G: /* HSW */
- case INTEL_FAM6_BROADWELL: /* BDW */
- case INTEL_FAM6_BROADWELL_G: /* BDW */
- case INTEL_FAM6_BROADWELL_X: /* BDX */
- case INTEL_FAM6_SKYLAKE_L: /* SKL */
- case INTEL_FAM6_CANNONLAKE_L: /* CNL */
- case INTEL_FAM6_SKYLAKE_X: /* SKX */
- case INTEL_FAM6_ICELAKE_X: /* ICX */
- case INTEL_FAM6_SAPPHIRERAPIDS_X: /* SPR */
- case INTEL_FAM6_XEON_PHI_KNL: /* Knights Landing */
- return 1;
- default:
- return 0;
- }
-}
-
-/*
- * tcc_offset_bits:
- * 0: Tcc Offset not supported (Default)
- * 6: Bit 29:24 of MSR_PLATFORM_INFO
- * 4: Bit 27:24 of MSR_PLATFORM_INFO
- */
-void check_tcc_offset(int model)
-{
- unsigned long long msr;
-
- if (!genuine_intel)
- return;
-
- switch (model) {
- case INTEL_FAM6_SKYLAKE_L:
- case INTEL_FAM6_SKYLAKE:
- case INTEL_FAM6_KABYLAKE_L:
- case INTEL_FAM6_KABYLAKE:
- case INTEL_FAM6_ICELAKE_L:
- case INTEL_FAM6_ICELAKE:
- case INTEL_FAM6_TIGERLAKE_L:
- case INTEL_FAM6_TIGERLAKE:
- case INTEL_FAM6_COMETLAKE:
- if (!get_msr(base_cpu, MSR_PLATFORM_INFO, &msr)) {
- msr = (msr >> 30) & 1;
- if (msr)
- tcc_offset_bits = 6;
- }
- return;
- default:
- return;
- }
-}
-
-static void remove_underbar(char *s)
-{
- char *to = s;
-
- while (*s) {
- if (*s != '_')
- *to++ = *s;
- s++;
- }
-
- *to = 0;
-}
-
-static void dump_turbo_ratio_info(unsigned int family, unsigned int model)
-{
- if (!has_turbo)
- return;
-
- if (has_hsw_turbo_ratio_limit(family, model))
- dump_hsw_turbo_ratio_limits();
-
- if (has_ivt_turbo_ratio_limit(family, model))
- dump_ivt_turbo_ratio_limits();
-
- if (has_turbo_ratio_limit(family, model)) {
- dump_turbo_ratio_limits(MSR_TURBO_RATIO_LIMIT, family, model);
-
- if (is_hybrid)
- dump_turbo_ratio_limits(MSR_SECONDARY_TURBO_RATIO_LIMIT, family, model);
- }
-
- if (has_atom_turbo_ratio_limit(family, model))
- dump_atom_turbo_ratio_limits();
-
- if (has_knl_turbo_ratio_limit(family, model))
- dump_knl_turbo_ratio_limits();
-
- if (has_config_tdp(family, model))
- dump_config_tdp();
-}
-
-static void dump_cstate_pstate_config_info(unsigned int family, unsigned int model)
-{
- if (!do_nhm_platform_info)
- return;
-
- dump_nhm_platform_info();
- dump_turbo_ratio_info(family, model);
- dump_nhm_cst_cfg();
-}
-
-static int read_sysfs_int(char *path)
-{
- FILE *input;
- int retval = -1;
-
- input = fopen(path, "r");
- if (input == NULL) {
- if (debug)
- fprintf(outf, "NSFOD %s\n", path);
- return (-1);
- }
- if (fscanf(input, "%d", &retval) != 1)
- err(1, "%s: failed to read int from file", path);
- fclose(input);
-
- return (retval);
-}
-
-static void dump_sysfs_file(char *path)
-{
- FILE *input;
- char cpuidle_buf[64];
-
- input = fopen(path, "r");
- if (input == NULL) {
- if (debug)
- fprintf(outf, "NSFOD %s\n", path);
- return;
- }
- if (!fgets(cpuidle_buf, sizeof(cpuidle_buf), input))
- err(1, "%s: failed to read file", path);
- fclose(input);
-
- fprintf(outf, "%s: %s", strrchr(path, '/') + 1, cpuidle_buf);
-}
-
-static void intel_uncore_frequency_probe(void)
-{
- int i, j;
- char path[128];
-
- if (!genuine_intel)
- return;
-
- if (access("/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00", R_OK))
- return;
-
- if (!access("/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00/current_freq_khz", R_OK))
- BIC_PRESENT(BIC_UNCORE_MHZ);
-
- if (quiet)
- return;
+ if (quiet)
+ return;
for (i = 0; i < topo.num_packages; ++i) {
for (j = 0; j < topo.num_die; ++j) {
}
}
+static void probe_graphics(void)
+{
+ if (!access("/sys/class/drm/card0/power/rc6_residency_ms", R_OK))
+ BIC_PRESENT(BIC_GFX_rc6);
+
+ if (!access("/sys/class/drm/card0/gt_cur_freq_mhz", R_OK) ||
+ !access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK))
+ BIC_PRESENT(BIC_GFXMHz);
+
+ if (!access("/sys/class/drm/card0/gt_act_freq_mhz", R_OK) ||
+ !access("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", R_OK))
+ BIC_PRESENT(BIC_GFXACTMHz);
+}
+
static void dump_sysfs_cstate_config(void)
{
char path[64];
cpu = t->cpu_id;
/* EPB is per-package */
- if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
+ if (!is_cpu_first_thread_in_package(t, c, p))
return 0;
if (cpu_migrate(cpu)) {
cpu = t->cpu_id;
/* MSR_HWP_CAPABILITIES is per-package */
- if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
+ if (!is_cpu_first_thread_in_package(t, c, p))
return 0;
if (cpu_migrate(cpu)) {
cpu = t->cpu_id;
/* per-package */
- if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
+ if (!is_cpu_first_thread_in_package(t, c, p))
return 0;
if (cpu_migrate(cpu)) {
return -1;
}
- if (do_core_perf_limit_reasons) {
+ if (platform->plr_msrs & PLR_CORE) {
get_msr(cpu, MSR_CORE_PERF_LIMIT_REASONS, &msr);
fprintf(outf, "cpu%d: MSR_CORE_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
fprintf(outf, " (Active: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)",
(msr & 1 << 17) ? "ThermStatus, " : "", (msr & 1 << 16) ? "PROCHOT, " : "");
}
- if (do_gfx_perf_limit_reasons) {
+ if (platform->plr_msrs & PLR_GFX) {
get_msr(cpu, MSR_GFX_PERF_LIMIT_REASONS, &msr);
fprintf(outf, "cpu%d: MSR_GFX_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
fprintf(outf, " (Active: %s%s%s%s%s%s%s%s)",
(msr & 1 << 25) ? "GFXPwr, " : "",
(msr & 1 << 26) ? "PkgPwrL1, " : "", (msr & 1 << 27) ? "PkgPwrL2, " : "");
}
- if (do_ring_perf_limit_reasons) {
+ if (platform->plr_msrs & PLR_RING) {
get_msr(cpu, MSR_RING_PERF_LIMIT_REASONS, &msr);
fprintf(outf, "cpu%d: MSR_RING_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
fprintf(outf, " (Active: %s%s%s%s%s%s)",
#define RAPL_POWER_GRANULARITY 0x7FFF /* 15 bit power granularity */
#define RAPL_TIME_GRANULARITY 0x3F /* 6 bit time granularity */
-double get_tdp_intel(unsigned int model)
+double get_quirk_tdp(void)
{
- unsigned long long msr;
-
- if (do_rapl & RAPL_PKG_POWER_INFO)
- if (!get_msr(base_cpu, MSR_PKG_POWER_INFO, &msr))
- return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units;
+ if (platform->rapl_quirk_tdp)
+ return platform->rapl_quirk_tdp;
- switch (model) {
- case INTEL_FAM6_ATOM_SILVERMONT:
- case INTEL_FAM6_ATOM_SILVERMONT_D:
- return 30.0;
- default:
- return 135.0;
- }
+ return 135.0;
}
-double get_tdp_amd(unsigned int family)
+double get_tdp_intel(void)
{
- UNUSED(family);
+ unsigned long long msr;
- /* This is the max stock TDP of HEDT/Server Fam17h+ chips */
- return 280.0;
+ if (platform->rapl_msrs & RAPL_PKG_POWER_INFO)
+ if (!get_msr(base_cpu, MSR_PKG_POWER_INFO, &msr))
+ return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units;
+ return get_quirk_tdp();
}
-/*
- * rapl_dram_energy_units_probe()
- * Energy units are either hard-coded, or come from RAPL Energy Unit MSR.
- */
-static double rapl_dram_energy_units_probe(int model, double rapl_energy_units)
+double get_tdp_amd(void)
{
- /* only called for genuine_intel, family 6 */
-
- switch (model) {
- case INTEL_FAM6_HASWELL_X: /* HSX */
- case INTEL_FAM6_BROADWELL_X: /* BDX */
- case INTEL_FAM6_SKYLAKE_X: /* SKX */
- case INTEL_FAM6_XEON_PHI_KNL: /* KNL */
- case INTEL_FAM6_ICELAKE_X: /* ICX */
- return (rapl_dram_energy_units = 15.3 / 1000000);
- default:
- return (rapl_energy_units);
- }
+ return get_quirk_tdp();
}
-void rapl_probe_intel(unsigned int family, unsigned int model)
+void rapl_probe_intel(void)
{
unsigned long long msr;
unsigned int time_unit;
double tdp;
- if (family != 6)
- return;
-
- switch (model) {
- case INTEL_FAM6_SANDYBRIDGE:
- case INTEL_FAM6_IVYBRIDGE:
- case INTEL_FAM6_HASWELL: /* HSW */
- case INTEL_FAM6_HASWELL_L: /* HSW */
- case INTEL_FAM6_HASWELL_G: /* HSW */
- case INTEL_FAM6_BROADWELL: /* BDW */
- case INTEL_FAM6_BROADWELL_G: /* BDW */
- do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_GFX | RAPL_PKG_POWER_INFO;
- if (rapl_joules) {
- BIC_PRESENT(BIC_Pkg_J);
- BIC_PRESENT(BIC_Cor_J);
- BIC_PRESENT(BIC_GFX_J);
- } else {
- BIC_PRESENT(BIC_PkgWatt);
- BIC_PRESENT(BIC_CorWatt);
- BIC_PRESENT(BIC_GFXWatt);
- }
- break;
- case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */
- case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
- do_rapl = RAPL_PKG | RAPL_PKG_POWER_INFO;
- if (rapl_joules)
- BIC_PRESENT(BIC_Pkg_J);
- else
- BIC_PRESENT(BIC_PkgWatt);
- break;
- case INTEL_FAM6_ATOM_TREMONT: /* EHL */
- do_rapl =
- RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS
- | RAPL_GFX | RAPL_PKG_POWER_INFO;
- if (rapl_joules) {
- BIC_PRESENT(BIC_Pkg_J);
- BIC_PRESENT(BIC_Cor_J);
- BIC_PRESENT(BIC_RAM_J);
- BIC_PRESENT(BIC_GFX_J);
- } else {
- BIC_PRESENT(BIC_PkgWatt);
- BIC_PRESENT(BIC_CorWatt);
- BIC_PRESENT(BIC_RAMWatt);
- BIC_PRESENT(BIC_GFXWatt);
- }
- break;
- case INTEL_FAM6_ATOM_TREMONT_D: /* JVL */
- do_rapl = RAPL_PKG | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO;
- BIC_PRESENT(BIC_PKG__);
- if (rapl_joules)
- BIC_PRESENT(BIC_Pkg_J);
- else
- BIC_PRESENT(BIC_PkgWatt);
- break;
- case INTEL_FAM6_SKYLAKE_L: /* SKL */
- case INTEL_FAM6_CANNONLAKE_L: /* CNL */
- do_rapl =
- RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS
- | RAPL_GFX | RAPL_PKG_POWER_INFO;
- BIC_PRESENT(BIC_PKG__);
- BIC_PRESENT(BIC_RAM__);
- if (rapl_joules) {
+ if (rapl_joules) {
+ if (platform->rapl_msrs & RAPL_PKG_ENERGY_STATUS)
BIC_PRESENT(BIC_Pkg_J);
+ if (platform->rapl_msrs & RAPL_CORE_ENERGY_STATUS)
BIC_PRESENT(BIC_Cor_J);
+ if (platform->rapl_msrs & RAPL_DRAM_ENERGY_STATUS)
BIC_PRESENT(BIC_RAM_J);
+ if (platform->rapl_msrs & RAPL_GFX_ENERGY_STATUS)
BIC_PRESENT(BIC_GFX_J);
- } else {
+ } else {
+ if (platform->rapl_msrs & RAPL_PKG_ENERGY_STATUS)
BIC_PRESENT(BIC_PkgWatt);
+ if (platform->rapl_msrs & RAPL_CORE_ENERGY_STATUS)
BIC_PRESENT(BIC_CorWatt);
+ if (platform->rapl_msrs & RAPL_DRAM_ENERGY_STATUS)
BIC_PRESENT(BIC_RAMWatt);
+ if (platform->rapl_msrs & RAPL_GFX_ENERGY_STATUS)
BIC_PRESENT(BIC_GFXWatt);
- }
- break;
- case INTEL_FAM6_HASWELL_X: /* HSX */
- case INTEL_FAM6_BROADWELL_X: /* BDX */
- case INTEL_FAM6_SKYLAKE_X: /* SKX */
- case INTEL_FAM6_ICELAKE_X: /* ICX */
- case INTEL_FAM6_SAPPHIRERAPIDS_X: /* SPR */
- case INTEL_FAM6_XEON_PHI_KNL: /* KNL */
- do_rapl =
- RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS |
- RAPL_PKG_POWER_INFO;
+ }
+
+ if (platform->rapl_msrs & RAPL_PKG_PERF_STATUS)
BIC_PRESENT(BIC_PKG__);
+ if (platform->rapl_msrs & RAPL_DRAM_PERF_STATUS)
BIC_PRESENT(BIC_RAM__);
- if (rapl_joules) {
- BIC_PRESENT(BIC_Pkg_J);
- BIC_PRESENT(BIC_RAM_J);
- } else {
- BIC_PRESENT(BIC_PkgWatt);
- BIC_PRESENT(BIC_RAMWatt);
- }
- break;
- case INTEL_FAM6_SANDYBRIDGE_X:
- case INTEL_FAM6_IVYBRIDGE_X:
- do_rapl =
- RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_PKG_PERF_STATUS |
- RAPL_DRAM_PERF_STATUS | RAPL_PKG_POWER_INFO;
- BIC_PRESENT(BIC_PKG__);
- BIC_PRESENT(BIC_RAM__);
- if (rapl_joules) {
- BIC_PRESENT(BIC_Pkg_J);
- BIC_PRESENT(BIC_Cor_J);
- BIC_PRESENT(BIC_RAM_J);
- } else {
- BIC_PRESENT(BIC_PkgWatt);
- BIC_PRESENT(BIC_CorWatt);
- BIC_PRESENT(BIC_RAMWatt);
- }
- break;
- case INTEL_FAM6_ATOM_SILVERMONT: /* BYT */
- case INTEL_FAM6_ATOM_SILVERMONT_D: /* AVN */
- do_rapl = RAPL_PKG | RAPL_CORES;
- if (rapl_joules) {
- BIC_PRESENT(BIC_Pkg_J);
- BIC_PRESENT(BIC_Cor_J);
- } else {
- BIC_PRESENT(BIC_PkgWatt);
- BIC_PRESENT(BIC_CorWatt);
- }
- break;
- case INTEL_FAM6_ATOM_GOLDMONT_D: /* DNV */
- do_rapl =
- RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS |
- RAPL_PKG_POWER_INFO | RAPL_CORES_ENERGY_STATUS;
- BIC_PRESENT(BIC_PKG__);
- BIC_PRESENT(BIC_RAM__);
- if (rapl_joules) {
- BIC_PRESENT(BIC_Pkg_J);
- BIC_PRESENT(BIC_Cor_J);
- BIC_PRESENT(BIC_RAM_J);
- } else {
- BIC_PRESENT(BIC_PkgWatt);
- BIC_PRESENT(BIC_CorWatt);
- BIC_PRESENT(BIC_RAMWatt);
- }
- break;
- default:
- return;
- }
/* units on package 0, verify later other packages match */
if (get_msr(base_cpu, MSR_RAPL_POWER_UNIT, &msr))
return;
rapl_power_units = 1.0 / (1 << (msr & 0xF));
- if (model == INTEL_FAM6_ATOM_SILVERMONT)
+ if (platform->has_rapl_divisor)
rapl_energy_units = 1.0 * (1 << (msr >> 8 & 0x1F)) / 1000000;
else
rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F));
- rapl_dram_energy_units = rapl_dram_energy_units_probe(model, rapl_energy_units);
+ if (platform->has_fixed_rapl_unit)
+ rapl_dram_energy_units = (15.3 / 1000000);
+ else
+ rapl_dram_energy_units = rapl_energy_units;
time_unit = msr >> 16 & 0xF;
if (time_unit == 0)
rapl_time_units = 1.0 / (1 << (time_unit));
- tdp = get_tdp_intel(model);
+ tdp = get_tdp_intel();
rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
if (!quiet)
fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp);
}
-void rapl_probe_amd(unsigned int family, unsigned int model)
+void rapl_probe_amd(void)
{
unsigned long long msr;
- unsigned int eax, ebx, ecx, edx;
- unsigned int has_rapl = 0;
double tdp;
- UNUSED(model);
-
- if (max_extended_level >= 0x80000007) {
- __cpuid(0x80000007, eax, ebx, ecx, edx);
- /* RAPL (Fam 17h+) */
- has_rapl = edx & (1 << 14);
- }
-
- if (!has_rapl || family < 0x17)
- return;
-
- do_rapl = RAPL_AMD_F17H | RAPL_PER_CORE_ENERGY;
if (rapl_joules) {
BIC_PRESENT(BIC_Pkg_J);
BIC_PRESENT(BIC_Cor_J);
rapl_energy_units = ldexp(1.0, -(msr >> 8 & 0x1f));
rapl_power_units = ldexp(1.0, -(msr & 0xf));
- tdp = get_tdp_amd(family);
+ tdp = get_tdp_amd();
rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
if (!quiet)
fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp);
}
-/*
- * rapl_probe()
- *
- * sets do_rapl, rapl_power_units, rapl_energy_units, rapl_time_units
- */
-void rapl_probe(unsigned int family, unsigned int model)
-{
- if (genuine_intel)
- rapl_probe_intel(family, model);
- if (authentic_amd || hygon_genuine)
- rapl_probe_amd(family, model);
-}
-
-void perf_limit_reasons_probe(unsigned int family, unsigned int model)
-{
- if (!genuine_intel)
- return;
-
- if (family != 6)
- return;
-
- switch (model) {
- case INTEL_FAM6_HASWELL: /* HSW */
- case INTEL_FAM6_HASWELL_L: /* HSW */
- case INTEL_FAM6_HASWELL_G: /* HSW */
- do_gfx_perf_limit_reasons = 1;
- /* FALLTHRU */
- case INTEL_FAM6_HASWELL_X: /* HSX */
- do_core_perf_limit_reasons = 1;
- do_ring_perf_limit_reasons = 1;
- default:
- return;
- }
-}
-
-void automatic_cstate_conversion_probe(unsigned int family, unsigned int model)
-{
- if (family != 6)
- return;
-
- switch (model) {
- case INTEL_FAM6_BROADWELL_X:
- case INTEL_FAM6_SKYLAKE_X:
- has_automatic_cstate_conversion = 1;
- }
-}
-
-void prewake_cstate_probe(unsigned int family, unsigned int model)
-{
- if (is_icx(family, model) || is_spr(family, model))
- dis_cstate_prewake = 1;
-}
-
-int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p)
-{
- unsigned long long msr;
- unsigned int dts, dts2;
- int cpu;
-
- UNUSED(c);
- UNUSED(p);
-
- if (!(do_dts || do_ptm))
- return 0;
-
- cpu = t->cpu_id;
-
- /* DTS is per-core, no need to print for each thread */
- if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
- return 0;
-
- if (cpu_migrate(cpu)) {
- fprintf(outf, "print_thermal: Could not migrate to CPU %d\n", cpu);
- return -1;
- }
-
- if (do_ptm && (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) {
- if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
- return 0;
-
- dts = (msr >> 16) & 0x7F;
- fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n", cpu, msr, tj_max - dts);
-
- if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &msr))
- return 0;
-
- dts = (msr >> 16) & 0x7F;
- dts2 = (msr >> 8) & 0x7F;
- fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
- cpu, msr, tj_max - dts, tj_max - dts2);
- }
-
- if (do_dts && debug) {
- unsigned int resolution;
-
- if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
- return 0;
-
- dts = (msr >> 16) & 0x7F;
- resolution = (msr >> 27) & 0xF;
- fprintf(outf, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n",
- cpu, msr, tj_max - dts, resolution);
-
- if (get_msr(cpu, MSR_IA32_THERM_INTERRUPT, &msr))
- return 0;
-
- dts = (msr >> 16) & 0x7F;
- dts2 = (msr >> 8) & 0x7F;
- fprintf(outf, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
- cpu, msr, tj_max - dts, tj_max - dts2);
- }
-
- return 0;
-}
-
void print_power_limit_msr(int cpu, unsigned long long msr, char *label)
{
fprintf(outf, "cpu%d: %s: %sabled (%0.3f Watts, %f sec, clamp %sabled)\n",
UNUSED(c);
UNUSED(p);
- if (!do_rapl)
+ if (!platform->rapl_msrs)
return 0;
/* RAPL counters are per package, so print only for 1st thread/package */
- if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
+ if (!is_cpu_first_thread_in_package(t, c, p))
return 0;
cpu = t->cpu_id;
return -1;
}
- if (do_rapl & RAPL_AMD_F17H) {
+ if (platform->rapl_msrs & RAPL_AMD_F17H) {
msr_name = "MSR_RAPL_PWR_UNIT";
if (get_msr(cpu, MSR_RAPL_PWR_UNIT, &msr))
return -1;
fprintf(outf, "cpu%d: %s: 0x%08llx (%f Watts, %f Joules, %f sec.)\n", cpu, msr_name, msr,
rapl_power_units, rapl_energy_units, rapl_time_units);
- if (do_rapl & RAPL_PKG_POWER_INFO) {
+ if (platform->rapl_msrs & RAPL_PKG_POWER_INFO) {
if (get_msr(cpu, MSR_PKG_POWER_INFO, &msr))
return -5;
((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
}
- if (do_rapl & RAPL_PKG) {
+ if (platform->rapl_msrs & RAPL_PKG) {
if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr))
return -9;
cpu, ((msr >> 0) & 0x1FFF) * rapl_power_units, (msr >> 31) & 1 ? "" : "UN");
}
- if (do_rapl & RAPL_DRAM_POWER_INFO) {
+ if (platform->rapl_msrs & RAPL_DRAM_POWER_INFO) {
if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr))
return -6;
((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
}
- if (do_rapl & RAPL_DRAM) {
+ if (platform->rapl_msrs & RAPL_DRAM) {
if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr))
return -9;
fprintf(outf, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n",
print_power_limit_msr(cpu, msr, "DRAM Limit");
}
- if (do_rapl & RAPL_CORE_POLICY) {
+ if (platform->rapl_msrs & RAPL_CORE_POLICY) {
if (get_msr(cpu, MSR_PP0_POLICY, &msr))
return -7;
fprintf(outf, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF);
}
- if (do_rapl & RAPL_CORES_POWER_LIMIT) {
+ if (platform->rapl_msrs & RAPL_CORE_POWER_LIMIT) {
if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr))
return -9;
fprintf(outf, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n",
cpu, msr, (msr >> 31) & 1 ? "" : "UN");
print_power_limit_msr(cpu, msr, "Cores Limit");
}
- if (do_rapl & RAPL_GFX) {
+ if (platform->rapl_msrs & RAPL_GFX) {
if (get_msr(cpu, MSR_PP1_POLICY, &msr))
return -8;
}
/*
- * SNB adds support for additional MSRs:
- *
- * MSR_PKG_C7_RESIDENCY 0x000003fa
- * MSR_CORE_C7_RESIDENCY 0x000003fe
- * MSR_PKG_C2_RESIDENCY 0x0000060d
- */
-
-int has_snb_msrs(unsigned int family, unsigned int model)
-{
- if (!genuine_intel)
- return 0;
-
- if (family != 6)
- return 0;
-
- switch (model) {
- case INTEL_FAM6_SANDYBRIDGE:
- case INTEL_FAM6_SANDYBRIDGE_X:
- case INTEL_FAM6_IVYBRIDGE: /* IVB */
- case INTEL_FAM6_IVYBRIDGE_X: /* IVB Xeon */
- case INTEL_FAM6_HASWELL: /* HSW */
- case INTEL_FAM6_HASWELL_X: /* HSW */
- case INTEL_FAM6_HASWELL_L: /* HSW */
- case INTEL_FAM6_HASWELL_G: /* HSW */
- case INTEL_FAM6_BROADWELL: /* BDW */
- case INTEL_FAM6_BROADWELL_G: /* BDW */
- case INTEL_FAM6_BROADWELL_X: /* BDX */
- case INTEL_FAM6_SKYLAKE_L: /* SKL */
- case INTEL_FAM6_CANNONLAKE_L: /* CNL */
- case INTEL_FAM6_SKYLAKE_X: /* SKX */
- case INTEL_FAM6_ICELAKE_X: /* ICX */
- case INTEL_FAM6_SAPPHIRERAPIDS_X: /* SPR */
- case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */
- case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
- case INTEL_FAM6_ATOM_GOLDMONT_D: /* DNV */
- case INTEL_FAM6_ATOM_TREMONT: /* EHL */
- case INTEL_FAM6_ATOM_TREMONT_D: /* JVL */
- return 1;
- }
- return 0;
-}
-
-/*
- * HSW ULT added support for C8/C9/C10 MSRs:
- *
- * MSR_PKG_C8_RESIDENCY 0x00000630
- * MSR_PKG_C9_RESIDENCY 0x00000631
- * MSR_PKG_C10_RESIDENCY 0x00000632
- *
- * MSR_PKGC8_IRTL 0x00000633
- * MSR_PKGC9_IRTL 0x00000634
- * MSR_PKGC10_IRTL 0x00000635
- *
- */
-int has_c8910_msrs(unsigned int family, unsigned int model)
-{
- if (!genuine_intel)
- return 0;
-
- if (family != 6)
- return 0;
-
- switch (model) {
- case INTEL_FAM6_HASWELL_L: /* HSW */
- case INTEL_FAM6_BROADWELL: /* BDW */
- case INTEL_FAM6_SKYLAKE_L: /* SKL */
- case INTEL_FAM6_CANNONLAKE_L: /* CNL */
- case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */
- case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
- case INTEL_FAM6_ATOM_TREMONT: /* EHL */
- return 1;
- }
- return 0;
-}
-
-/*
- * SKL adds support for additional MSRS:
+ * probe_rapl()
*
- * MSR_PKG_WEIGHTED_CORE_C0_RES 0x00000658
- * MSR_PKG_ANY_CORE_C0_RES 0x00000659
- * MSR_PKG_ANY_GFXE_C0_RES 0x0000065A
- * MSR_PKG_BOTH_CORE_GFXE_C0_RES 0x0000065B
+ * sets rapl_power_units, rapl_energy_units, rapl_time_units
*/
-int has_skl_msrs(unsigned int family, unsigned int model)
-{
- if (!genuine_intel)
- return 0;
-
- if (family != 6)
- return 0;
-
- switch (model) {
- case INTEL_FAM6_SKYLAKE_L: /* SKL */
- case INTEL_FAM6_CANNONLAKE_L: /* CNL */
- return 1;
- }
- return 0;
-}
-
-int is_slm(unsigned int family, unsigned int model)
-{
- if (!genuine_intel)
- return 0;
-
- if (family != 6)
- return 0;
-
- switch (model) {
- case INTEL_FAM6_ATOM_SILVERMONT: /* BYT */
- case INTEL_FAM6_ATOM_SILVERMONT_D: /* AVN */
- return 1;
- }
- return 0;
-}
-
-int is_knl(unsigned int family, unsigned int model)
-{
- if (!genuine_intel)
- return 0;
-
- if (family != 6)
- return 0;
-
- switch (model) {
- case INTEL_FAM6_XEON_PHI_KNL: /* KNL */
- return 1;
- }
- return 0;
-}
-
-int is_cnl(unsigned int family, unsigned int model)
-{
- if (!genuine_intel)
- return 0;
-
- if (family != 6)
- return 0;
-
- switch (model) {
- case INTEL_FAM6_CANNONLAKE_L: /* CNL */
- return 1;
- }
-
- return 0;
-}
-
-unsigned int get_aperf_mperf_multiplier(unsigned int family, unsigned int model)
-{
- if (is_knl(family, model))
- return 1024;
- return 1;
-}
-
-#define SLM_BCLK_FREQS 5
-double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0 };
-
-double slm_bclk(void)
-{
- unsigned long long msr = 3;
- unsigned int i;
- double freq;
-
- if (get_msr(base_cpu, MSR_FSB_FREQ, &msr))
- fprintf(outf, "SLM BCLK: unknown\n");
-
- i = msr & 0xf;
- if (i >= SLM_BCLK_FREQS) {
- fprintf(outf, "SLM BCLK[%d] invalid\n", i);
- i = 3;
- }
- freq = slm_freq_table[i];
-
- if (!quiet)
- fprintf(outf, "SLM BCLK: %.1f Mhz\n", freq);
-
- return freq;
-}
-
-double discover_bclk(unsigned int family, unsigned int model)
+void probe_rapl(void)
{
- if (has_snb_msrs(family, model) || is_knl(family, model))
- return 100.00;
- else if (is_slm(family, model))
- return slm_bclk();
- else
- return 133.33;
-}
-
-int get_cpu_type(struct thread_data *t, struct core_data *c, struct pkg_data *p)
-{
- unsigned int eax, ebx, ecx, edx;
-
- UNUSED(c);
- UNUSED(p);
-
- if (!genuine_intel)
- return 0;
+ if (!platform->rapl_msrs)
+ return;
- if (cpu_migrate(t->cpu_id)) {
- fprintf(outf, "Could not migrate to CPU %d\n", t->cpu_id);
- return -1;
- }
+ if (genuine_intel)
+ rapl_probe_intel();
+ if (authentic_amd || hygon_genuine)
+ rapl_probe_amd();
- if (max_level < 0x1a)
- return 0;
+ if (quiet)
+ return;
- __cpuid(0x1a, eax, ebx, ecx, edx);
- eax = (eax >> 24) & 0xFF;
- if (eax == 0x20)
- t->is_atom = true;
- return 0;
+ for_all_cpus(print_rapl, ODD_COUNTERS);
}
/*
return 0;
/* this is a per-package concept */
- if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
+ if (!is_cpu_first_thread_in_package(t, c, p))
return 0;
cpu = t->cpu_id;
}
/* Temperature Target MSR is Nehalem and newer only */
- if (!do_nhm_platform_info)
+ if (!platform->has_nhm_msrs)
goto guess;
if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr))
tcc_default = (msr >> 16) & 0xFF;
if (!quiet) {
- switch (tcc_offset_bits) {
- case 4:
- tcc_offset = (msr >> 24) & 0xF;
- fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C) (%d default - %d offset)\n",
- cpu, msr, tcc_default - tcc_offset, tcc_default, tcc_offset);
- break;
- case 6:
- tcc_offset = (msr >> 24) & 0x3F;
+ int bits = platform->tcc_offset_bits;
+ unsigned long long enabled = 0;
+
+ if (bits && !get_msr(base_cpu, MSR_PLATFORM_INFO, &enabled))
+ enabled = (enabled >> 30) & 1;
+
+ if (bits && enabled) {
+ tcc_offset = (msr >> 24) & GENMASK(bits - 1, 0);
fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C) (%d default - %d offset)\n",
cpu, msr, tcc_default - tcc_offset, tcc_default, tcc_offset);
- break;
- default:
+ } else {
fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n", cpu, msr, tcc_default);
- break;
}
}
- if (!tcc_default)
- goto guess;
+ if (!tcc_default)
+ goto guess;
+
+ tj_max = tcc_default;
+
+ return 0;
+
+guess:
+ tj_max = TJMAX_DEFAULT;
+ fprintf(outf, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n", cpu, tj_max);
+
+ return 0;
+}
+
+int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p)
+{
+ unsigned long long msr;
+ unsigned int dts, dts2;
+ int cpu;
+
+ UNUSED(c);
+ UNUSED(p);
+
+ if (!(do_dts || do_ptm))
+ return 0;
+
+ cpu = t->cpu_id;
+
+ /* DTS is per-core, no need to print for each thread */
+ if (!is_cpu_first_thread_in_core(t, c, p))
+ return 0;
+
+ if (cpu_migrate(cpu)) {
+ fprintf(outf, "print_thermal: Could not migrate to CPU %d\n", cpu);
+ return -1;
+ }
+
+ if (do_ptm && is_cpu_first_core_in_package(t, c, p)) {
+ if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
+ return 0;
+
+ dts = (msr >> 16) & 0x7F;
+ fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n", cpu, msr, tj_max - dts);
+
+ if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &msr))
+ return 0;
+
+ dts = (msr >> 16) & 0x7F;
+ dts2 = (msr >> 8) & 0x7F;
+ fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
+ cpu, msr, tj_max - dts, tj_max - dts2);
+ }
+
+ if (do_dts && debug) {
+ unsigned int resolution;
+
+ if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
+ return 0;
+
+ dts = (msr >> 16) & 0x7F;
+ resolution = (msr >> 27) & 0xF;
+ fprintf(outf, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n",
+ cpu, msr, tj_max - dts, resolution);
+
+ if (get_msr(cpu, MSR_IA32_THERM_INTERRUPT, &msr))
+ return 0;
+
+ dts = (msr >> 16) & 0x7F;
+ dts2 = (msr >> 8) & 0x7F;
+ fprintf(outf, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
+ cpu, msr, tj_max - dts, tj_max - dts2);
+ }
+
+ return 0;
+}
+
+void probe_thermal(void)
+{
+ if (!access("/sys/devices/system/cpu/cpu0/thermal_throttle/core_throttle_count", R_OK))
+ BIC_PRESENT(BIC_CORE_THROT_CNT);
+ else
+ BIC_NOT_PRESENT(BIC_CORE_THROT_CNT);
+
+ for_all_cpus(set_temperature_target, ODD_COUNTERS);
+
+ if (quiet)
+ return;
+
+ for_all_cpus(print_thermal, ODD_COUNTERS);
+}
+
+int get_cpu_type(struct thread_data *t, struct core_data *c, struct pkg_data *p)
+{
+ unsigned int eax, ebx, ecx, edx;
+
+ UNUSED(c);
+ UNUSED(p);
- tj_max = tcc_default;
+ if (!genuine_intel)
+ return 0;
- return 0;
+ if (cpu_migrate(t->cpu_id)) {
+ fprintf(outf, "Could not migrate to CPU %d\n", t->cpu_id);
+ return -1;
+ }
-guess:
- tj_max = TJMAX_DEFAULT;
- fprintf(outf, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n", cpu, tj_max);
+ if (max_level < 0x1a)
+ return 0;
+ __cpuid(0x1a, eax, ebx, ecx, edx);
+ eax = (eax >> 24) & 0xFF;
+ if (eax == 0x20)
+ t->is_atom = true;
return 0;
}
{
unsigned long long msr;
- if (!has_misc_feature_control)
+ if (!platform->has_msr_misc_feature_control)
return;
if (!get_msr(base_cpu, MSR_MISC_FEATURE_CONTROL, &msr))
{
unsigned long long msr;
- if (!do_nhm_platform_info)
- return;
-
- if (no_MSR_MISC_PWR_MGMT)
+ if (!platform->has_msr_misc_pwr_mgmt)
return;
if (!get_msr(base_cpu, MSR_MISC_PWR_MGMT, &msr))
{
unsigned long long msr;
+ if (!platform->has_msr_c6_demotion_policy_config)
+ return;
+
if (!get_msr(base_cpu, MSR_CC6_DEMOTION_POLICY_CONFIG, &msr))
fprintf(outf, "cpu%d: MSR_CC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-CC6-Demotion)\n",
base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS");
base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS");
}
-/*
- * When models are the same, for the purpose of turbostat, reuse
- */
-unsigned int intel_model_duplicates(unsigned int model)
-{
-
- switch (model) {
- case INTEL_FAM6_NEHALEM_EP: /* Core i7, Xeon 5500 series - Bloomfield, Gainstown NHM-EP */
- case INTEL_FAM6_NEHALEM: /* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */
- case 0x1F: /* Core i7 and i5 Processor - Nehalem */
- case INTEL_FAM6_WESTMERE: /* Westmere Client - Clarkdale, Arrandale */
- case INTEL_FAM6_WESTMERE_EP: /* Westmere EP - Gulftown */
- return INTEL_FAM6_NEHALEM;
-
- case INTEL_FAM6_NEHALEM_EX: /* Nehalem-EX Xeon - Beckton */
- case INTEL_FAM6_WESTMERE_EX: /* Westmere-EX Xeon - Eagleton */
- return INTEL_FAM6_NEHALEM_EX;
-
- case INTEL_FAM6_XEON_PHI_KNM:
- return INTEL_FAM6_XEON_PHI_KNL;
-
- case INTEL_FAM6_BROADWELL_X:
- case INTEL_FAM6_BROADWELL_D: /* BDX-DE */
- return INTEL_FAM6_BROADWELL_X;
-
- case INTEL_FAM6_SKYLAKE_L:
- case INTEL_FAM6_SKYLAKE:
- case INTEL_FAM6_KABYLAKE_L:
- case INTEL_FAM6_KABYLAKE:
- case INTEL_FAM6_COMETLAKE_L:
- case INTEL_FAM6_COMETLAKE:
- return INTEL_FAM6_SKYLAKE_L;
-
- case INTEL_FAM6_ICELAKE_L:
- case INTEL_FAM6_ICELAKE_NNPI:
- case INTEL_FAM6_TIGERLAKE_L:
- case INTEL_FAM6_TIGERLAKE:
- case INTEL_FAM6_ROCKETLAKE:
- case INTEL_FAM6_LAKEFIELD:
- case INTEL_FAM6_ALDERLAKE:
- case INTEL_FAM6_ALDERLAKE_L:
- case INTEL_FAM6_ATOM_GRACEMONT:
- case INTEL_FAM6_RAPTORLAKE:
- case INTEL_FAM6_RAPTORLAKE_P:
- case INTEL_FAM6_RAPTORLAKE_S:
- case INTEL_FAM6_METEORLAKE:
- case INTEL_FAM6_METEORLAKE_L:
- return INTEL_FAM6_CANNONLAKE_L;
-
- case INTEL_FAM6_ATOM_TREMONT_L:
- return INTEL_FAM6_ATOM_TREMONT;
-
- case INTEL_FAM6_ICELAKE_D:
- return INTEL_FAM6_ICELAKE_X;
-
- case INTEL_FAM6_EMERALDRAPIDS_X:
- return INTEL_FAM6_SAPPHIRERAPIDS_X;
- }
- return model;
-}
-
void print_dev_latency(void)
{
char *path = "/dev/cpu_dma_latency";
BIC_PRESENT(BIC_IPC);
}
+void probe_cstates(void)
+{
+ probe_cst_limit();
+
+ if (platform->supported_cstates & CC1)
+ BIC_PRESENT(BIC_CPU_c1);
+
+ if (platform->supported_cstates & CC3)
+ BIC_PRESENT(BIC_CPU_c3);
+
+ if (platform->supported_cstates & CC6)
+ BIC_PRESENT(BIC_CPU_c6);
+
+ if (platform->supported_cstates & CC7)
+ BIC_PRESENT(BIC_CPU_c7);
+
+ if (platform->supported_cstates & PC2 && (pkg_cstate_limit >= PCL__2))
+ BIC_PRESENT(BIC_Pkgpc2);
+
+ if (platform->supported_cstates & PC3 && (pkg_cstate_limit >= PCL__3))
+ BIC_PRESENT(BIC_Pkgpc3);
+
+ if (platform->supported_cstates & PC6 && (pkg_cstate_limit >= PCL__6))
+ BIC_PRESENT(BIC_Pkgpc6);
+
+ if (platform->supported_cstates & PC7 && (pkg_cstate_limit >= PCL__7))
+ BIC_PRESENT(BIC_Pkgpc7);
+
+ if (platform->supported_cstates & PC8 && (pkg_cstate_limit >= PCL__8))
+ BIC_PRESENT(BIC_Pkgpc8);
+
+ if (platform->supported_cstates & PC9 && (pkg_cstate_limit >= PCL__9))
+ BIC_PRESENT(BIC_Pkgpc9);
+
+ if (platform->supported_cstates & PC10 && (pkg_cstate_limit >= PCL_10))
+ BIC_PRESENT(BIC_Pkgpc10);
+
+ if (platform->has_msr_module_c6_res_ms)
+ BIC_PRESENT(BIC_Mod_c6);
+
+ if (platform->has_ext_cst_msrs) {
+ BIC_PRESENT(BIC_Totl_c0);
+ BIC_PRESENT(BIC_Any_c0);
+ BIC_PRESENT(BIC_GFX_c0);
+ BIC_PRESENT(BIC_CPUGFX);
+ }
+
+ if (quiet)
+ return;
+
+ dump_power_ctl();
+ dump_cst_cfg();
+ decode_c6_demotion_policy_msr();
+ print_dev_latency();
+ dump_sysfs_cstate_config();
+ print_irtl();
+}
+
+void probe_lpi(void)
+{
+ if (!access("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", R_OK))
+ BIC_PRESENT(BIC_CPU_LPI);
+ else
+ BIC_NOT_PRESENT(BIC_CPU_LPI);
+
+ if (!access(sys_lpi_file_sysfs, R_OK)) {
+ sys_lpi_file = sys_lpi_file_sysfs;
+ BIC_PRESENT(BIC_SYS_LPI);
+ } else if (!access(sys_lpi_file_debugfs, R_OK)) {
+ sys_lpi_file = sys_lpi_file_debugfs;
+ BIC_PRESENT(BIC_SYS_LPI);
+ } else {
+ sys_lpi_file_sysfs = NULL;
+ BIC_NOT_PRESENT(BIC_SYS_LPI);
+ }
+
+}
+
+void probe_pstates(void)
+{
+ probe_bclk();
+
+ if (quiet)
+ return;
+
+ dump_platform_info();
+ dump_turbo_ratio_info();
+ dump_sysfs_pstate_config();
+ decode_misc_pwr_mgmt_msr();
+
+ for_all_cpus(print_hwp, ODD_COUNTERS);
+ for_all_cpus(print_epb, ODD_COUNTERS);
+ for_all_cpus(print_perf_limit, ODD_COUNTERS);
+}
+
void process_cpuid()
{
unsigned int eax, ebx, ecx, edx;
edx_flags & (1 << 22) ? "ACPI-TM" : "-",
edx_flags & (1 << 28) ? "HT" : "-", edx_flags & (1 << 29) ? "TM" : "-");
}
- if (genuine_intel) {
- model_orig = model;
- model = intel_model_duplicates(model);
- }
+
+ probe_platform_features(family, model);
if (!(edx_flags & (1 << 5)))
errx(1, "CPUID: no MSR");
__cpuid(0x15, eax_crystal, ebx_tsc, crystal_hz, edx);
if (ebx_tsc != 0) {
-
if (!quiet && (ebx != 0))
fprintf(outf, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n",
eax_crystal, ebx_tsc, crystal_hz);
if (crystal_hz == 0)
- switch (model) {
- case INTEL_FAM6_SKYLAKE_L: /* SKL */
- crystal_hz = 24000000; /* 24.0 MHz */
- break;
- case INTEL_FAM6_ATOM_GOLDMONT_D: /* DNV */
- crystal_hz = 25000000; /* 25.0 MHz */
- break;
- case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */
- case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
- crystal_hz = 19200000; /* 19.2 MHz */
- break;
- default:
- crystal_hz = 0;
- }
+ crystal_hz = platform->crystal_freq;
if (crystal_hz) {
tsc_hz = (unsigned long long)crystal_hz *ebx_tsc / eax_crystal;
}
if (has_aperf)
- aperf_mperf_multiplier = get_aperf_mperf_multiplier(family, model);
+ aperf_mperf_multiplier = platform->need_perf_multiplier ? 1024 : 1;
BIC_PRESENT(BIC_IRQ);
BIC_PRESENT(BIC_TSC_MHz);
+}
- if (probe_nhm_msrs(family, model)) {
- do_nhm_platform_info = 1;
- BIC_PRESENT(BIC_CPU_c1);
- BIC_PRESENT(BIC_CPU_c3);
- BIC_PRESENT(BIC_CPU_c6);
- BIC_PRESENT(BIC_SMI);
- }
- do_snb_cstates = has_snb_msrs(family, model);
-
- if (do_snb_cstates)
- BIC_PRESENT(BIC_CPU_c7);
-
- do_irtl_snb = has_snb_msrs(family, model);
- if (do_snb_cstates && (pkg_cstate_limit >= PCL__2))
- BIC_PRESENT(BIC_Pkgpc2);
- if (pkg_cstate_limit >= PCL__3)
- BIC_PRESENT(BIC_Pkgpc3);
- if (pkg_cstate_limit >= PCL__6)
- BIC_PRESENT(BIC_Pkgpc6);
- if (do_snb_cstates && (pkg_cstate_limit >= PCL__7))
- BIC_PRESENT(BIC_Pkgpc7);
- if (has_slv_msrs(family, model)) {
- BIC_NOT_PRESENT(BIC_Pkgpc2);
- BIC_NOT_PRESENT(BIC_Pkgpc3);
- BIC_PRESENT(BIC_Pkgpc6);
- BIC_NOT_PRESENT(BIC_Pkgpc7);
- BIC_PRESENT(BIC_Mod_c6);
- use_c1_residency_msr = 1;
- }
- if (is_jvl(family, model)) {
- BIC_NOT_PRESENT(BIC_CPU_c3);
- BIC_NOT_PRESENT(BIC_CPU_c7);
- BIC_NOT_PRESENT(BIC_Pkgpc2);
- BIC_NOT_PRESENT(BIC_Pkgpc3);
- BIC_NOT_PRESENT(BIC_Pkgpc6);
- BIC_NOT_PRESENT(BIC_Pkgpc7);
- }
- if (is_dnv(family, model)) {
- BIC_PRESENT(BIC_CPU_c1);
- BIC_NOT_PRESENT(BIC_CPU_c3);
- BIC_NOT_PRESENT(BIC_Pkgpc3);
- BIC_NOT_PRESENT(BIC_CPU_c7);
- BIC_NOT_PRESENT(BIC_Pkgpc7);
- use_c1_residency_msr = 1;
- }
- if (is_skx(family, model) || is_icx(family, model) || is_spr(family, model)) {
- BIC_NOT_PRESENT(BIC_CPU_c3);
- BIC_NOT_PRESENT(BIC_Pkgpc3);
- BIC_NOT_PRESENT(BIC_CPU_c7);
- BIC_NOT_PRESENT(BIC_Pkgpc7);
- }
- if (is_bdx(family, model)) {
- BIC_NOT_PRESENT(BIC_CPU_c7);
- BIC_NOT_PRESENT(BIC_Pkgpc7);
- }
- if (has_c8910_msrs(family, model)) {
- if (pkg_cstate_limit >= PCL__8)
- BIC_PRESENT(BIC_Pkgpc8);
- if (pkg_cstate_limit >= PCL__9)
- BIC_PRESENT(BIC_Pkgpc9);
- if (pkg_cstate_limit >= PCL_10)
- BIC_PRESENT(BIC_Pkgpc10);
- }
- do_irtl_hsw = has_c8910_msrs(family, model);
- if (has_skl_msrs(family, model)) {
- BIC_PRESENT(BIC_Totl_c0);
- BIC_PRESENT(BIC_Any_c0);
- BIC_PRESENT(BIC_GFX_c0);
- BIC_PRESENT(BIC_CPUGFX);
- }
- do_slm_cstates = is_slm(family, model);
- do_knl_cstates = is_knl(family, model);
-
- if (do_slm_cstates || do_knl_cstates || is_cnl(family, model) || is_ehl(family, model))
- BIC_NOT_PRESENT(BIC_CPU_c3);
-
- if (!quiet)
- decode_misc_pwr_mgmt_msr();
-
- if (!quiet && has_slv_msrs(family, model))
- decode_c6_demotion_policy_msr();
-
- rapl_probe(family, model);
- perf_limit_reasons_probe(family, model);
- automatic_cstate_conversion_probe(family, model);
-
- check_tcc_offset(model_orig);
-
- if (!quiet)
- dump_cstate_pstate_config_info(family, model);
- intel_uncore_frequency_probe();
-
- if (!quiet)
- print_dev_latency();
- if (!quiet)
- dump_sysfs_cstate_config();
- if (!quiet)
- dump_sysfs_pstate_config();
+void probe_pm_features(void)
+{
+ probe_pstates();
- if (has_skl_msrs(family, model) || is_ehl(family, model))
- calculate_tsc_tweak();
+ probe_cstates();
- if (!access("/sys/class/drm/card0/power/rc6_residency_ms", R_OK))
- BIC_PRESENT(BIC_GFX_rc6);
+ probe_lpi();
- if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK))
- BIC_PRESENT(BIC_GFXMHz);
+ probe_intel_uncore_frequency();
- if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", R_OK))
- BIC_PRESENT(BIC_GFXACTMHz);
+ probe_graphics();
- if (!access("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", R_OK))
- BIC_PRESENT(BIC_CPU_LPI);
- else
- BIC_NOT_PRESENT(BIC_CPU_LPI);
+ probe_rapl();
- if (!access("/sys/devices/system/cpu/cpu0/thermal_throttle/core_throttle_count", R_OK))
- BIC_PRESENT(BIC_CORE_THROT_CNT);
- else
- BIC_NOT_PRESENT(BIC_CORE_THROT_CNT);
+ probe_thermal();
- if (!access(sys_lpi_file_sysfs, R_OK)) {
- sys_lpi_file = sys_lpi_file_sysfs;
- BIC_PRESENT(BIC_SYS_LPI);
- } else if (!access(sys_lpi_file_debugfs, R_OK)) {
- sys_lpi_file = sys_lpi_file_debugfs;
- BIC_PRESENT(BIC_SYS_LPI);
- } else {
- sys_lpi_file_sysfs = NULL;
- BIC_NOT_PRESENT(BIC_SYS_LPI);
- }
+ if (platform->has_nhm_msrs)
+ BIC_PRESENT(BIC_SMI);
if (!quiet)
decode_misc_feature_control();
-
- return;
}
/*
return 0;
}
-void topology_probe()
+void topology_probe(bool startup)
{
int i;
int max_core_id = 0;
for_all_proc_cpus(mark_cpu_present);
/*
- * Validate that all cpus in cpu_subset are also in cpu_present_set
+ * Allocate and initialize cpu_effective_set
+ */
+ cpu_effective_set = CPU_ALLOC((topo.max_cpu_num + 1));
+ if (cpu_effective_set == NULL)
+ err(3, "CPU_ALLOC");
+ cpu_effective_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
+ CPU_ZERO_S(cpu_effective_setsize, cpu_effective_set);
+ update_effective_set(startup);
+
+ /*
+ * Allocate and initialize cpu_allowed_set
+ */
+ cpu_allowed_set = CPU_ALLOC((topo.max_cpu_num + 1));
+ if (cpu_allowed_set == NULL)
+ err(3, "CPU_ALLOC");
+ cpu_allowed_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
+ CPU_ZERO_S(cpu_allowed_setsize, cpu_allowed_set);
+
+ /*
+ * Validate and update cpu_allowed_set.
+ *
+ * Make sure all cpus in cpu_subset are also in cpu_present_set during startup.
+ * Give a warning when cpus in cpu_subset become unavailable at runtime.
+ * Give a warning when cpus are not effective because of cgroup setting.
+ *
+ * cpu_allowed_set is the intersection of cpu_present_set/cpu_effective_set/cpu_subset.
*/
for (i = 0; i < CPU_SUBSET_MAXCPUS; ++i) {
- if (CPU_ISSET_S(i, cpu_subset_size, cpu_subset))
- if (!CPU_ISSET_S(i, cpu_present_setsize, cpu_present_set))
- err(1, "cpu%d not present", i);
+ if (cpu_subset && !CPU_ISSET_S(i, cpu_subset_size, cpu_subset))
+ continue;
+
+ if (!CPU_ISSET_S(i, cpu_present_setsize, cpu_present_set)) {
+ if (cpu_subset) {
+ /* cpus in cpu_subset must be in cpu_present_set during startup */
+ if (startup)
+ err(1, "cpu%d not present", i);
+ else
+ fprintf(stderr, "cpu%d not present\n", i);
+ }
+ continue;
+ }
+
+ if (CPU_COUNT_S(cpu_effective_setsize, cpu_effective_set)) {
+ if (!CPU_ISSET_S(i, cpu_effective_setsize, cpu_effective_set)) {
+ fprintf(stderr, "cpu%d not effective\n", i);
+ continue;
+ }
+ }
+
+ CPU_SET_S(i, cpu_allowed_setsize, cpu_allowed_set);
}
+ if (!CPU_COUNT_S(cpu_allowed_setsize, cpu_allowed_set))
+ err(-ENODEV, "No valid cpus found");
+ sched_setaffinity(0, cpu_allowed_setsize, cpu_allowed_set);
+
/*
* Allocate and initialize cpu_affinity_set
*/
if (*c == NULL)
goto error;
- for (i = 0; i < num_cores; i++)
+ for (i = 0; i < num_cores; i++) {
(*c)[i].core_id = -1;
+ (*c)[i].base_cpu = -1;
+ }
*p = calloc(topo.num_packages, sizeof(struct pkg_data));
if (*p == NULL)
goto error;
- for (i = 0; i < topo.num_packages; i++)
+ for (i = 0; i < topo.num_packages; i++) {
(*p)[i].package_id = i;
+ (*p)[i].base_cpu = -1;
+ }
return;
error:
p = GET_PKG(pkg_base, pkg_id);
t->cpu_id = cpu_id;
- if (thread_id == 0) {
- t->flags |= CPU_IS_FIRST_THREAD_IN_CORE;
- if (cpu_is_first_core_in_package(cpu_id))
- t->flags |= CPU_IS_FIRST_CORE_IN_PACKAGE;
+ if (!cpu_is_not_allowed(cpu_id)) {
+ if (c->base_cpu < 0)
+ c->base_cpu = t->cpu_id;
+ if (p->base_cpu < 0)
+ p->base_cpu = t->cpu_id;
}
c->core_id = core_id;
err(-1, "calloc %d", topo.max_cpu_num + 1);
}
-void setup_all_buffers(void)
+int update_topo(struct thread_data *t, struct core_data *c, struct pkg_data *p)
+{
+ topo.allowed_cpus++;
+ if ((int)t->cpu_id == c->base_cpu)
+ topo.allowed_cores++;
+ if ((int)t->cpu_id == p->base_cpu)
+ topo.allowed_packages++;
+
+ return 0;
+}
+
+void topology_update(void)
+{
+ topo.allowed_cpus = 0;
+ topo.allowed_cores = 0;
+ topo.allowed_packages = 0;
+ for_all_cpus(update_topo, ODD_COUNTERS);
+}
+void setup_all_buffers(bool startup)
{
- topology_probe();
+ topology_probe(startup);
allocate_irq_buffers();
allocate_fd_percpu();
allocate_counters(&thread_even, &core_even, &package_even);
allocate_counters(&thread_odd, &core_odd, &package_odd);
allocate_output_buffer();
for_all_proc_cpus(initialize_counters);
+ topology_update();
}
void set_base_cpu(void)
{
- base_cpu = sched_getcpu();
- if (base_cpu < 0)
- err(-ENODEV, "No valid cpus found");
+ int i;
- if (debug > 1)
- fprintf(outf, "base_cpu = %d\n", base_cpu);
+ for (i = 0; i < topo.max_cpu_num + 1; ++i) {
+ if (cpu_is_not_allowed(i))
+ continue;
+ base_cpu = i;
+ if (debug > 1)
+ fprintf(outf, "base_cpu = %d\n", base_cpu);
+ return;
+ }
+ err(-ENODEV, "No valid cpus found");
}
void turbostat_init()
{
- setup_all_buffers();
+ setup_all_buffers(true);
set_base_cpu();
check_dev_msr();
check_permissions();
process_cpuid();
+ probe_pm_features();
linux_perf_init();
- if (!quiet)
- for_all_cpus(print_hwp, ODD_COUNTERS);
-
- if (!quiet)
- for_all_cpus(print_epb, ODD_COUNTERS);
-
- if (!quiet)
- for_all_cpus(print_perf_limit, ODD_COUNTERS);
-
- if (!quiet)
- for_all_cpus(print_rapl, ODD_COUNTERS);
-
- for_all_cpus(set_temperature_target, ODD_COUNTERS);
-
for_all_cpus(get_cpu_type, ODD_COUNTERS);
for_all_cpus(get_cpu_type, EVEN_COUNTERS);
- if (!quiet)
- for_all_cpus(print_thermal, ODD_COUNTERS);
-
- if (!quiet && do_irtl_snb)
- print_irtl();
-
if (DO_BIC(BIC_IPC))
(void)get_instr_count_fd(base_cpu);
}
first_counter_read = 0;
if (status)
exit(status);
- /* clear affinity side-effect of get_counters() */
- sched_setaffinity(0, cpu_present_setsize, cpu_present_set);
gettimeofday(&tv_even, (struct timezone *)NULL);
child_pid = fork();
void print_version()
{
- fprintf(outf, "turbostat version 2023.03.17 - Len Brown <lenb@kernel.org>\n");
+ fprintf(outf, "turbostat version 2023.11.07 - Len Brown <lenb@kernel.org>\n");
}
#define COMMAND_LINE_SIZE 2048
*/
void parse_cpu_command(char *optarg)
{
- unsigned int start, end;
- char *next;
-
if (!strcmp(optarg, "core")) {
if (cpu_subset)
goto error;
CPU_ZERO_S(cpu_subset_size, cpu_subset);
- next = optarg;
-
- while (next && *next) {
-
- if (*next == '-') /* no negative cpu numbers */
- goto error;
-
- start = strtoul(next, &next, 10);
-
- if (start >= CPU_SUBSET_MAXCPUS)
- goto error;
- CPU_SET_S(start, cpu_subset_size, cpu_subset);
-
- if (*next == '\0')
- break;
-
- if (*next == ',') {
- next += 1;
- continue;
- }
-
- if (*next == '-') {
- next += 1; /* start range */
- } else if (*next == '.') {
- next += 1;
- if (*next == '.')
- next += 1; /* start range */
- else
- goto error;
- }
-
- end = strtoul(next, &next, 10);
- if (end <= start)
- goto error;
-
- while (++start <= end) {
- if (start >= CPU_SUBSET_MAXCPUS)
- goto error;
- CPU_SET_S(start, cpu_subset_size, cpu_subset);
- }
-
- if (*next == ',')
- next += 1;
- else if (*next != '\0')
- goto error;
- }
+ if (parse_cpu_str(optarg, cpu_subset, cpu_subset_size))
+ goto error;
return;
int main(int argc, char **argv)
{
+ int fd, ret;
+
+ fd = open("/sys/fs/cgroup/cgroup.procs", O_WRONLY);
+ if (fd < 0)
+ goto skip_cgroup_setting;
+
+ ret = write(fd, "0\n", 2);
+ if (ret == -1)
+ perror("Can't update cgroup\n");
+
+ close(fd);
+
+skip_cgroup_setting:
outf = stderr;
cmdline(argc, argv);
" ::: __clobber_all);
}
+SEC("socket")
+__description("conditional loop (2)")
+__success
+__failure_unpriv __msg_unpriv("back-edge from insn 10 to 11")
+__naked void conditional_loop2(void)
+{
+ asm volatile (" \
+ r9 = 2 ll; \
+ r3 = 0x20 ll; \
+ r4 = 0x35 ll; \
+ r8 = r4; \
+ goto l1_%=; \
+l0_%=: r9 -= r3; \
+ r9 -= r4; \
+ r9 -= r8; \
+l1_%=: r8 += r4; \
+ if r8 < 0x64 goto l0_%=; \
+ r0 = r9; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("unconditional loop after conditional jump")
+__failure __msg("infinite loop detected")
+__failure_unpriv __msg_unpriv("back-edge from insn 3 to 2")
+__naked void uncond_loop_after_cond_jmp(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ if r0 > 0 goto l1_%=; \
+l0_%=: r0 = 1; \
+ goto l0_%=; \
+l1_%=: exit; \
+" ::: __clobber_all);
+}
+
+
+__naked __noinline __used
+static unsigned long never_ending_subprog()
+{
+ asm volatile (" \
+ r0 = r1; \
+ goto -1; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("unconditional loop after conditional jump")
+/* infinite loop is detected *after* check_cfg() */
+__failure __msg("infinite loop detected")
+__naked void uncond_loop_in_subprog_after_cond_jmp(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ if r0 > 0 goto l1_%=; \
+l0_%=: r0 += 1; \
+ call never_ending_subprog; \
+l1_%=: exit; \
+" ::: __clobber_all);
+}
+
char _license[] SEC("license") = "GPL";
" ::: __clobber_all);
}
-SEC("tracepoint")
+SEC("socket")
__description("bounded loop, start in the middle")
-__failure __msg("back-edge")
+__success
+__failure_unpriv __msg_unpriv("back-edge")
__naked void loop_start_in_the_middle(void)
{
asm volatile (" \
SEC("tracepoint")
__description("bounded recursion")
-__failure __msg("back-edge")
+__failure
+/* verifier limitation in detecting max stack depth */
+__msg("the call stack of 8 frames is too deep !")
__naked void bounded_recursion(void)
{
asm volatile (" \
}
#endif /* v4 instruction */
+
+SEC("?raw_tp")
+__success __log_level(2)
+/*
+ * Without the bug fix there will be no history between "last_idx 3 first_idx 3"
+ * and "parent state regs=" lines. "R0_w=6" parts are here to help anchor
+ * expected log messages to the one specific mark_chain_precision operation.
+ *
+ * This is quite fragile: if verifier checkpointing heuristic changes, this
+ * might need adjusting.
+ */
+__msg("2: (07) r0 += 1 ; R0_w=6")
+__msg("3: (35) if r0 >= 0xa goto pc+1")
+__msg("mark_precise: frame0: last_idx 3 first_idx 3 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r0 stack= before 2: (07) r0 += 1")
+__msg("mark_precise: frame0: regs=r0 stack= before 1: (07) r0 += 1")
+__msg("mark_precise: frame0: regs=r0 stack= before 4: (05) goto pc-4")
+__msg("mark_precise: frame0: regs=r0 stack= before 3: (35) if r0 >= 0xa goto pc+1")
+__msg("mark_precise: frame0: parent state regs= stack=: R0_rw=P4")
+__msg("3: R0_w=6")
+__naked int state_loop_first_last_equal(void)
+{
+ asm volatile (
+ "r0 = 0;"
+ "l0_%=:"
+ "r0 += 1;"
+ "r0 += 1;"
+ /* every few iterations we'll have a checkpoint here with
+ * first_idx == last_idx, potentially confusing precision
+ * backtracking logic
+ */
+ "if r0 >= 10 goto l1_%=;" /* checkpoint + mark_precise */
+ "goto l0_%=;"
+ "l1_%=:"
+ "exit;"
+ ::: __clobber_common
+ );
+}
+
+char _license[] SEC("license") = "GPL";
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
- .errstr = "back-edge from insn 0 to 0",
+ .errstr = "the call stack of 9 frames is too deep",
.result = REJECT,
},
{
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
- .errstr = "back-edge",
+ .errstr = "the call stack of 9 frames is too deep",
.result = REJECT,
},
{
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
- .errstr = "back-edge",
+ .errstr = "the call stack of 9 frames is too deep",
.result = REJECT,
},
{
BPF_MOV64_IMM(BPF_REG_0, 2),
BPF_EXIT_INSN(),
},
- .errstr = "invalid BPF_LD_IMM insn",
- .errstr_unpriv = "R1 pointer comparison",
+ .errstr = "jump into the middle of ldimm64 insn 1",
+ .errstr_unpriv = "jump into the middle of ldimm64 insn 1",
.result = REJECT,
},
{
BPF_LD_IMM64(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
- .errstr = "invalid BPF_LD_IMM insn",
- .errstr_unpriv = "R1 pointer comparison",
+ .errstr = "jump into the middle of ldimm64 insn 1",
+ .errstr_unpriv = "jump into the middle of ldimm64 insn 1",
.result = REJECT,
},
{
struct xdp_info *meta = data - sizeof(struct xdp_info);
if (meta->count != pkt->pkt_nb) {
- ksft_print_msg("[%s] expected meta_count [%d], got meta_count [%d]\n",
- __func__, pkt->pkt_nb, meta->count);
+ ksft_print_msg("[%s] expected meta_count [%d], got meta_count [%llu]\n",
+ __func__, pkt->pkt_nb,
+ (unsigned long long)meta->count);
return false;
}
if (addr >= umem->num_frames * umem->frame_size ||
addr + len > umem->num_frames * umem->frame_size) {
- ksft_print_msg("Frag invalid addr: %llx len: %u\n", addr, len);
+ ksft_print_msg("Frag invalid addr: %llx len: %u\n",
+ (unsigned long long)addr, len);
return false;
}
if (!umem->unaligned_mode && addr % umem->frame_size + len > umem->frame_size) {
- ksft_print_msg("Frag crosses frame boundary addr: %llx len: %u\n", addr, len);
+ ksft_print_msg("Frag crosses frame boundary addr: %llx len: %u\n",
+ (unsigned long long)addr, len);
return false;
}
u64 addr = *xsk_ring_cons__comp_addr(&xsk->umem->cq, idx + rcvd - 1);
ksft_print_msg("[%s] Too many packets completed\n", __func__);
- ksft_print_msg("Last completion address: %llx\n", addr);
+ ksft_print_msg("Last completion address: %llx\n",
+ (unsigned long long)addr);
return TEST_FAILURE;
}
}
if (stats.tx_invalid_descs != ifobject->xsk->pkt_stream->nb_pkts / 2) {
- ksft_print_msg("[%s] tx_invalid_descs incorrect. Got [%u] expected [%u]\n",
- __func__, stats.tx_invalid_descs,
+ ksft_print_msg("[%s] tx_invalid_descs incorrect. Got [%llu] expected [%u]\n",
+ __func__,
+ (unsigned long long)stats.tx_invalid_descs,
ifobject->xsk->pkt_stream->nb_pkts);
return TEST_FAILURE;
}
gup_longterm
mkdirty
va_high_addr_switch
+hugetlb_fault_after_madv
uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK | UFFD_USER_MODE_ONLY);
if (uffd == -1)
- ksft_exit_fail_msg("uffd syscall failed\n");
+ return uffd;
uffdio_api.api = UFFD_API;
uffdio_api.features = UFFD_FEATURE_WP_UNPOPULATED | UFFD_FEATURE_WP_ASYNC |
UFFD_FEATURE_WP_HUGETLBFS_SHMEM;
if (ioctl(uffd, UFFDIO_API, &uffdio_api))
- ksft_exit_fail_msg("UFFDIO_API\n");
+ return -1;
if (!(uffdio_api.api & UFFDIO_REGISTER_MODE_WP) ||
!(uffdio_api.features & UFFD_FEATURE_WP_UNPOPULATED) ||
!(uffdio_api.features & UFFD_FEATURE_WP_ASYNC) ||
!(uffdio_api.features & UFFD_FEATURE_WP_HUGETLBFS_SHMEM))
- ksft_exit_fail_msg("UFFDIO_API error %llu\n", uffdio_api.api);
+ return -1;
return 0;
}
/* 9. Memory mapped file */
fd = open(__FILE__, O_RDONLY);
if (fd < 0)
- ksft_exit_fail_msg("%s Memory mapped file\n");
+ ksft_exit_fail_msg("%s Memory mapped file\n", __func__);
ret = stat(__FILE__, &sbuf);
if (ret < 0)
fmem = mmap(NULL, sbuf.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
if (fmem == MAP_FAILED)
- ksft_exit_fail_msg("error nomem %ld %s\n", errno, strerror(errno));
+ ksft_exit_fail_msg("error nomem %d %s\n", errno, strerror(errno));
tmp_buf = malloc(sbuf.st_size);
memcpy(tmp_buf, fmem, sbuf.st_size);
fmem = mmap(NULL, buf_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
if (fmem == MAP_FAILED)
- ksft_exit_fail_msg("error nomem %ld %s\n", errno, strerror(errno));
+ ksft_exit_fail_msg("error nomem %d %s\n", errno, strerror(errno));
wp_init(fmem, buf_size);
wp_addr_range(fmem, buf_size);
struct stat sbuf;
ksft_print_header();
+
+ if (init_uffd())
+ return ksft_exit_pass();
+
ksft_set_plan(115);
page_size = getpagesize();
if (pagemap_fd < 0)
return -EINVAL;
- if (init_uffd())
- ksft_exit_fail_msg("uffd init failed\n");
-
/* 1. Sanity testing */
sanity_tests_sd();
fmem = mmap(NULL, sbuf.st_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
if (fmem == MAP_FAILED)
- ksft_exit_fail_msg("error nomem %ld %s\n", errno, strerror(errno));
+ ksft_exit_fail_msg("error nomem %d %s\n", errno, strerror(errno));
wp_init(fmem, sbuf.st_size);
wp_addr_range(fmem, sbuf.st_size);
fmem = mmap(NULL, buf_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
if (fmem == MAP_FAILED)
- ksft_exit_fail_msg("error nomem %ld %s\n", errno, strerror(errno));
+ ksft_exit_fail_msg("error nomem %d %s\n", errno, strerror(errno));
wp_init(fmem, buf_size);
wp_addr_range(fmem, buf_size);
CATEGORY="hugetlb" run_test ./hugepage-vmemmap
CATEGORY="hugetlb" run_test ./hugetlb-madvise
+nr_hugepages_tmp=$(cat /proc/sys/vm/nr_hugepages)
# For this test, we need one and just one huge page
echo 1 > /proc/sys/vm/nr_hugepages
CATEGORY="hugetlb" run_test ./hugetlb_fault_after_madv
+# Restore the previous number of huge pages, since further tests rely on it
+echo "$nr_hugepages_tmp" > /proc/sys/vm/nr_hugepages
if test_selected "hugetlb"; then
echo "NOTE: These hugetlb tests provide minimal coverage. Use"
if reset_check_counter "fastclose server test" "MPTcpExtMPFastcloseRx"; then
test_linkfail=1024 fastclose=server \
run_tests $ns1 $ns2 10.0.1.1
- chk_join_nr 0 0 0
+ chk_join_nr 0 0 0 0 0 0 1
chk_fclose_nr 1 1 invert
chk_rst_nr 1 1
fi