Merge branch 'kvm-amd-fixes' into HEAD
[linux-2.6-microblaze.git] / arch / s390 / kvm / kvm-s390.c
index c2e6d4b..389ff1b 100644 (file)
@@ -2,7 +2,7 @@
 /*
  * hosting IBM Z kernel virtual machines (s390x)
  *
- * Copyright IBM Corp. 2008, 2018
+ * Copyright IBM Corp. 2008, 2020
  *
  *    Author(s): Carsten Otte <cotte@de.ibm.com>
  *               Christian Borntraeger <borntraeger@de.ibm.com>
@@ -44,6 +44,7 @@
 #include <asm/cpacf.h>
 #include <asm/timex.h>
 #include <asm/ap.h>
+#include <asm/uv.h>
 #include "kvm-s390.h"
 #include "gaccess.h"
 
 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
                           (KVM_MAX_VCPUS + LOCAL_IRQS))
 
-#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
-#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
-
 struct kvm_stats_debugfs_item debugfs_entries[] = {
-       { "userspace_handled", VCPU_STAT(exit_userspace) },
-       { "exit_null", VCPU_STAT(exit_null) },
-       { "exit_validity", VCPU_STAT(exit_validity) },
-       { "exit_stop_request", VCPU_STAT(exit_stop_request) },
-       { "exit_external_request", VCPU_STAT(exit_external_request) },
-       { "exit_io_request", VCPU_STAT(exit_io_request) },
-       { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
-       { "exit_instruction", VCPU_STAT(exit_instruction) },
-       { "exit_pei", VCPU_STAT(exit_pei) },
-       { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
-       { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
-       { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
-       { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
-       { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
-       { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
-       { "halt_no_poll_steal", VCPU_STAT(halt_no_poll_steal) },
-       { "halt_wakeup", VCPU_STAT(halt_wakeup) },
-       { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
-       { "instruction_lctl", VCPU_STAT(instruction_lctl) },
-       { "instruction_stctl", VCPU_STAT(instruction_stctl) },
-       { "instruction_stctg", VCPU_STAT(instruction_stctg) },
-       { "deliver_ckc", VCPU_STAT(deliver_ckc) },
-       { "deliver_cputm", VCPU_STAT(deliver_cputm) },
-       { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
-       { "deliver_external_call", VCPU_STAT(deliver_external_call) },
-       { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
-       { "deliver_virtio", VCPU_STAT(deliver_virtio) },
-       { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
-       { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
-       { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
-       { "deliver_program", VCPU_STAT(deliver_program) },
-       { "deliver_io", VCPU_STAT(deliver_io) },
-       { "deliver_machine_check", VCPU_STAT(deliver_machine_check) },
-       { "exit_wait_state", VCPU_STAT(exit_wait_state) },
-       { "inject_ckc", VCPU_STAT(inject_ckc) },
-       { "inject_cputm", VCPU_STAT(inject_cputm) },
-       { "inject_external_call", VCPU_STAT(inject_external_call) },
-       { "inject_float_mchk", VM_STAT(inject_float_mchk) },
-       { "inject_emergency_signal", VCPU_STAT(inject_emergency_signal) },
-       { "inject_io", VM_STAT(inject_io) },
-       { "inject_mchk", VCPU_STAT(inject_mchk) },
-       { "inject_pfault_done", VM_STAT(inject_pfault_done) },
-       { "inject_program", VCPU_STAT(inject_program) },
-       { "inject_restart", VCPU_STAT(inject_restart) },
-       { "inject_service_signal", VM_STAT(inject_service_signal) },
-       { "inject_set_prefix", VCPU_STAT(inject_set_prefix) },
-       { "inject_stop_signal", VCPU_STAT(inject_stop_signal) },
-       { "inject_pfault_init", VCPU_STAT(inject_pfault_init) },
-       { "inject_virtio", VM_STAT(inject_virtio) },
-       { "instruction_epsw", VCPU_STAT(instruction_epsw) },
-       { "instruction_gs", VCPU_STAT(instruction_gs) },
-       { "instruction_io_other", VCPU_STAT(instruction_io_other) },
-       { "instruction_lpsw", VCPU_STAT(instruction_lpsw) },
-       { "instruction_lpswe", VCPU_STAT(instruction_lpswe) },
-       { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
-       { "instruction_ptff", VCPU_STAT(instruction_ptff) },
-       { "instruction_stidp", VCPU_STAT(instruction_stidp) },
-       { "instruction_sck", VCPU_STAT(instruction_sck) },
-       { "instruction_sckpf", VCPU_STAT(instruction_sckpf) },
-       { "instruction_spx", VCPU_STAT(instruction_spx) },
-       { "instruction_stpx", VCPU_STAT(instruction_stpx) },
-       { "instruction_stap", VCPU_STAT(instruction_stap) },
-       { "instruction_iske", VCPU_STAT(instruction_iske) },
-       { "instruction_ri", VCPU_STAT(instruction_ri) },
-       { "instruction_rrbe", VCPU_STAT(instruction_rrbe) },
-       { "instruction_sske", VCPU_STAT(instruction_sske) },
-       { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
-       { "instruction_essa", VCPU_STAT(instruction_essa) },
-       { "instruction_stsi", VCPU_STAT(instruction_stsi) },
-       { "instruction_stfl", VCPU_STAT(instruction_stfl) },
-       { "instruction_tb", VCPU_STAT(instruction_tb) },
-       { "instruction_tpi", VCPU_STAT(instruction_tpi) },
-       { "instruction_tprot", VCPU_STAT(instruction_tprot) },
-       { "instruction_tsch", VCPU_STAT(instruction_tsch) },
-       { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
-       { "instruction_sie", VCPU_STAT(instruction_sie) },
-       { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
-       { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
-       { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
-       { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
-       { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
-       { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
-       { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
-       { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
-       { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
-       { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
-       { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
-       { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
-       { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
-       { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
-       { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
-       { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
-       { "instruction_diag_10", VCPU_STAT(diagnose_10) },
-       { "instruction_diag_44", VCPU_STAT(diagnose_44) },
-       { "instruction_diag_9c", VCPU_STAT(diagnose_9c) },
-       { "diag_9c_ignored", VCPU_STAT(diagnose_9c_ignored) },
-       { "instruction_diag_258", VCPU_STAT(diagnose_258) },
-       { "instruction_diag_308", VCPU_STAT(diagnose_308) },
-       { "instruction_diag_500", VCPU_STAT(diagnose_500) },
-       { "instruction_diag_other", VCPU_STAT(diagnose_other) },
+       VCPU_STAT("userspace_handled", exit_userspace),
+       VCPU_STAT("exit_null", exit_null),
+       VCPU_STAT("exit_validity", exit_validity),
+       VCPU_STAT("exit_stop_request", exit_stop_request),
+       VCPU_STAT("exit_external_request", exit_external_request),
+       VCPU_STAT("exit_io_request", exit_io_request),
+       VCPU_STAT("exit_external_interrupt", exit_external_interrupt),
+       VCPU_STAT("exit_instruction", exit_instruction),
+       VCPU_STAT("exit_pei", exit_pei),
+       VCPU_STAT("exit_program_interruption", exit_program_interruption),
+       VCPU_STAT("exit_instr_and_program_int", exit_instr_and_program),
+       VCPU_STAT("exit_operation_exception", exit_operation_exception),
+       VCPU_STAT("halt_successful_poll", halt_successful_poll),
+       VCPU_STAT("halt_attempted_poll", halt_attempted_poll),
+       VCPU_STAT("halt_poll_invalid", halt_poll_invalid),
+       VCPU_STAT("halt_no_poll_steal", halt_no_poll_steal),
+       VCPU_STAT("halt_wakeup", halt_wakeup),
+       VCPU_STAT("instruction_lctlg", instruction_lctlg),
+       VCPU_STAT("instruction_lctl", instruction_lctl),
+       VCPU_STAT("instruction_stctl", instruction_stctl),
+       VCPU_STAT("instruction_stctg", instruction_stctg),
+       VCPU_STAT("deliver_ckc", deliver_ckc),
+       VCPU_STAT("deliver_cputm", deliver_cputm),
+       VCPU_STAT("deliver_emergency_signal", deliver_emergency_signal),
+       VCPU_STAT("deliver_external_call", deliver_external_call),
+       VCPU_STAT("deliver_service_signal", deliver_service_signal),
+       VCPU_STAT("deliver_virtio", deliver_virtio),
+       VCPU_STAT("deliver_stop_signal", deliver_stop_signal),
+       VCPU_STAT("deliver_prefix_signal", deliver_prefix_signal),
+       VCPU_STAT("deliver_restart_signal", deliver_restart_signal),
+       VCPU_STAT("deliver_program", deliver_program),
+       VCPU_STAT("deliver_io", deliver_io),
+       VCPU_STAT("deliver_machine_check", deliver_machine_check),
+       VCPU_STAT("exit_wait_state", exit_wait_state),
+       VCPU_STAT("inject_ckc", inject_ckc),
+       VCPU_STAT("inject_cputm", inject_cputm),
+       VCPU_STAT("inject_external_call", inject_external_call),
+       VM_STAT("inject_float_mchk", inject_float_mchk),
+       VCPU_STAT("inject_emergency_signal", inject_emergency_signal),
+       VM_STAT("inject_io", inject_io),
+       VCPU_STAT("inject_mchk", inject_mchk),
+       VM_STAT("inject_pfault_done", inject_pfault_done),
+       VCPU_STAT("inject_program", inject_program),
+       VCPU_STAT("inject_restart", inject_restart),
+       VM_STAT("inject_service_signal", inject_service_signal),
+       VCPU_STAT("inject_set_prefix", inject_set_prefix),
+       VCPU_STAT("inject_stop_signal", inject_stop_signal),
+       VCPU_STAT("inject_pfault_init", inject_pfault_init),
+       VM_STAT("inject_virtio", inject_virtio),
+       VCPU_STAT("instruction_epsw", instruction_epsw),
+       VCPU_STAT("instruction_gs", instruction_gs),
+       VCPU_STAT("instruction_io_other", instruction_io_other),
+       VCPU_STAT("instruction_lpsw", instruction_lpsw),
+       VCPU_STAT("instruction_lpswe", instruction_lpswe),
+       VCPU_STAT("instruction_pfmf", instruction_pfmf),
+       VCPU_STAT("instruction_ptff", instruction_ptff),
+       VCPU_STAT("instruction_stidp", instruction_stidp),
+       VCPU_STAT("instruction_sck", instruction_sck),
+       VCPU_STAT("instruction_sckpf", instruction_sckpf),
+       VCPU_STAT("instruction_spx", instruction_spx),
+       VCPU_STAT("instruction_stpx", instruction_stpx),
+       VCPU_STAT("instruction_stap", instruction_stap),
+       VCPU_STAT("instruction_iske", instruction_iske),
+       VCPU_STAT("instruction_ri", instruction_ri),
+       VCPU_STAT("instruction_rrbe", instruction_rrbe),
+       VCPU_STAT("instruction_sske", instruction_sske),
+       VCPU_STAT("instruction_ipte_interlock", instruction_ipte_interlock),
+       VCPU_STAT("instruction_essa", instruction_essa),
+       VCPU_STAT("instruction_stsi", instruction_stsi),
+       VCPU_STAT("instruction_stfl", instruction_stfl),
+       VCPU_STAT("instruction_tb", instruction_tb),
+       VCPU_STAT("instruction_tpi", instruction_tpi),
+       VCPU_STAT("instruction_tprot", instruction_tprot),
+       VCPU_STAT("instruction_tsch", instruction_tsch),
+       VCPU_STAT("instruction_sthyi", instruction_sthyi),
+       VCPU_STAT("instruction_sie", instruction_sie),
+       VCPU_STAT("instruction_sigp_sense", instruction_sigp_sense),
+       VCPU_STAT("instruction_sigp_sense_running", instruction_sigp_sense_running),
+       VCPU_STAT("instruction_sigp_external_call", instruction_sigp_external_call),
+       VCPU_STAT("instruction_sigp_emergency", instruction_sigp_emergency),
+       VCPU_STAT("instruction_sigp_cond_emergency", instruction_sigp_cond_emergency),
+       VCPU_STAT("instruction_sigp_start", instruction_sigp_start),
+       VCPU_STAT("instruction_sigp_stop", instruction_sigp_stop),
+       VCPU_STAT("instruction_sigp_stop_store_status", instruction_sigp_stop_store_status),
+       VCPU_STAT("instruction_sigp_store_status", instruction_sigp_store_status),
+       VCPU_STAT("instruction_sigp_store_adtl_status", instruction_sigp_store_adtl_status),
+       VCPU_STAT("instruction_sigp_set_arch", instruction_sigp_arch),
+       VCPU_STAT("instruction_sigp_set_prefix", instruction_sigp_prefix),
+       VCPU_STAT("instruction_sigp_restart", instruction_sigp_restart),
+       VCPU_STAT("instruction_sigp_cpu_reset", instruction_sigp_cpu_reset),
+       VCPU_STAT("instruction_sigp_init_cpu_reset", instruction_sigp_init_cpu_reset),
+       VCPU_STAT("instruction_sigp_unknown", instruction_sigp_unknown),
+       VCPU_STAT("instruction_diag_10", diagnose_10),
+       VCPU_STAT("instruction_diag_44", diagnose_44),
+       VCPU_STAT("instruction_diag_9c", diagnose_9c),
+       VCPU_STAT("diag_9c_ignored", diagnose_9c_ignored),
+       VCPU_STAT("instruction_diag_258", diagnose_258),
+       VCPU_STAT("instruction_diag_308", diagnose_308),
+       VCPU_STAT("instruction_diag_500", diagnose_500),
+       VCPU_STAT("instruction_diag_other", diagnose_other),
        { NULL }
 };
 
@@ -184,6 +182,11 @@ static u8 halt_poll_max_steal = 10;
 module_param(halt_poll_max_steal, byte, 0644);
 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
 
+/* if set to true, the GISA will be initialized and used if available */
+static bool use_gisa  = true;
+module_param(use_gisa, bool, 0644);
+MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
+
 /*
  * For now we handle at most 16 double words as this is what the s390 base
  * kernel handles and stores in the prefix page. If we ever need to go beyond
@@ -220,6 +223,7 @@ static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
 static struct gmap_notifier gmap_notifier;
 static struct gmap_notifier vsie_gmap_notifier;
 debug_info_t *kvm_s390_dbf;
+debug_info_t *kvm_s390_dbf_uv;
 
 /* Section: not file related */
 int kvm_arch_hardware_enable(void)
@@ -228,13 +232,15 @@ int kvm_arch_hardware_enable(void)
        return 0;
 }
 
-int kvm_arch_check_processor_compat(void)
+int kvm_arch_check_processor_compat(void *opaque)
 {
        return 0;
 }
 
+/* forward declarations */
 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
                              unsigned long end);
+static int sca_switch_to_extended(struct kvm *kvm);
 
 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
 {
@@ -293,7 +299,7 @@ static struct notifier_block kvm_clock_notifier = {
        .notifier_call = kvm_clock_sync,
 };
 
-int kvm_arch_hardware_setup(void)
+int kvm_arch_hardware_setup(void *opaque)
 {
        gmap_notifier.notifier_call = kvm_gmap_notifier;
        gmap_register_pte_notifier(&gmap_notifier);
@@ -460,7 +466,12 @@ int kvm_arch_init(void *opaque)
        if (!kvm_s390_dbf)
                return -ENOMEM;
 
-       if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view))
+       kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
+       if (!kvm_s390_dbf_uv)
+               goto out;
+
+       if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
+           debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
                goto out;
 
        kvm_s390_cpu_feat_init();
@@ -487,6 +498,7 @@ void kvm_arch_exit(void)
 {
        kvm_s390_gib_destroy();
        debug_unregister(kvm_s390_dbf);
+       debug_unregister(kvm_s390_dbf_uv);
 }
 
 /* Section: device related */
@@ -530,6 +542,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
        case KVM_CAP_S390_AIS:
        case KVM_CAP_S390_AIS_MIGRATION:
        case KVM_CAP_S390_VCPU_RESETS:
+       case KVM_CAP_SET_GUEST_DEBUG:
                r = 1;
                break;
        case KVM_CAP_S390_HPAGE_1M:
@@ -564,14 +577,16 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
        case KVM_CAP_S390_BPB:
                r = test_facility(82);
                break;
+       case KVM_CAP_S390_PROTECTED:
+               r = is_prot_virt_host();
+               break;
        default:
                r = 0;
        }
        return r;
 }
 
-static void kvm_s390_sync_dirty_log(struct kvm *kvm,
-                                   struct kvm_memory_slot *memslot)
+void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
 {
        int i;
        gfn_t cur_gfn, last_gfn;
@@ -612,9 +627,8 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 {
        int r;
        unsigned long n;
-       struct kvm_memslots *slots;
        struct kvm_memory_slot *memslot;
-       int is_dirty = 0;
+       int is_dirty;
 
        if (kvm_is_ucontrol(kvm))
                return -EINVAL;
@@ -625,14 +639,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
        if (log->slot >= KVM_USER_MEM_SLOTS)
                goto out;
 
-       slots = kvm_memslots(kvm);
-       memslot = id_to_memslot(slots, log->slot);
-       r = -ENOENT;
-       if (!memslot->dirty_bitmap)
-               goto out;
-
-       kvm_s390_sync_dirty_log(kvm, memslot);
-       r = kvm_get_dirty_log(kvm, log, &is_dirty);
+       r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
        if (r)
                goto out;
 
@@ -1930,6 +1937,9 @@ static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
                        start = slot + 1;
        }
 
+       if (start >= slots->used_slots)
+               return slots->used_slots - 1;
+
        if (gfn >= memslots[start].base_gfn &&
            gfn < memslots[start].base_gfn + memslots[start].npages) {
                atomic_set(&slots->lru_slot, start);
@@ -1993,6 +2003,9 @@ static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
        struct kvm_memslots *slots = kvm_memslots(kvm);
        struct kvm_memory_slot *ms;
 
+       if (unlikely(!slots->used_slots))
+               return 0;
+
        cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
        ms = gfn_to_memslot(kvm, cur_gfn);
        args->count = 0;
@@ -2158,6 +2171,194 @@ out:
        return r;
 }
 
+static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
+{
+       struct kvm_vcpu *vcpu;
+       u16 rc, rrc;
+       int ret = 0;
+       int i;
+
+       /*
+        * We ignore failures and try to destroy as many CPUs as possible.
+        * At the same time we must not free the assigned resources when
+        * this fails, as the ultravisor has still access to that memory.
+        * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
+        * behind.
+        * We want to return the first failure rc and rrc, though.
+        */
+       kvm_for_each_vcpu(i, vcpu, kvm) {
+               mutex_lock(&vcpu->mutex);
+               if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) {
+                       *rcp = rc;
+                       *rrcp = rrc;
+                       ret = -EIO;
+               }
+               mutex_unlock(&vcpu->mutex);
+       }
+       return ret;
+}
+
+static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
+{
+       int i, r = 0;
+       u16 dummy;
+
+       struct kvm_vcpu *vcpu;
+
+       kvm_for_each_vcpu(i, vcpu, kvm) {
+               mutex_lock(&vcpu->mutex);
+               r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
+               mutex_unlock(&vcpu->mutex);
+               if (r)
+                       break;
+       }
+       if (r)
+               kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
+       return r;
+}
+
+static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
+{
+       int r = 0;
+       u16 dummy;
+       void __user *argp = (void __user *)cmd->data;
+
+       switch (cmd->cmd) {
+       case KVM_PV_ENABLE: {
+               r = -EINVAL;
+               if (kvm_s390_pv_is_protected(kvm))
+                       break;
+
+               /*
+                *  FMT 4 SIE needs esca. As we never switch back to bsca from
+                *  esca, we need no cleanup in the error cases below
+                */
+               r = sca_switch_to_extended(kvm);
+               if (r)
+                       break;
+
+               down_write(&current->mm->mmap_sem);
+               r = gmap_mark_unmergeable();
+               up_write(&current->mm->mmap_sem);
+               if (r)
+                       break;
+
+               r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
+               if (r)
+                       break;
+
+               r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
+               if (r)
+                       kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
+
+               /* we need to block service interrupts from now on */
+               set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
+               break;
+       }
+       case KVM_PV_DISABLE: {
+               r = -EINVAL;
+               if (!kvm_s390_pv_is_protected(kvm))
+                       break;
+
+               r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
+               /*
+                * If a CPU could not be destroyed, destroy VM will also fail.
+                * There is no point in trying to destroy it. Instead return
+                * the rc and rrc from the first CPU that failed destroying.
+                */
+               if (r)
+                       break;
+               r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc);
+
+               /* no need to block service interrupts any more */
+               clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
+               break;
+       }
+       case KVM_PV_SET_SEC_PARMS: {
+               struct kvm_s390_pv_sec_parm parms = {};
+               void *hdr;
+
+               r = -EINVAL;
+               if (!kvm_s390_pv_is_protected(kvm))
+                       break;
+
+               r = -EFAULT;
+               if (copy_from_user(&parms, argp, sizeof(parms)))
+                       break;
+
+               /* Currently restricted to 8KB */
+               r = -EINVAL;
+               if (parms.length > PAGE_SIZE * 2)
+                       break;
+
+               r = -ENOMEM;
+               hdr = vmalloc(parms.length);
+               if (!hdr)
+                       break;
+
+               r = -EFAULT;
+               if (!copy_from_user(hdr, (void __user *)parms.origin,
+                                   parms.length))
+                       r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
+                                                     &cmd->rc, &cmd->rrc);
+
+               vfree(hdr);
+               break;
+       }
+       case KVM_PV_UNPACK: {
+               struct kvm_s390_pv_unp unp = {};
+
+               r = -EINVAL;
+               if (!kvm_s390_pv_is_protected(kvm))
+                       break;
+
+               r = -EFAULT;
+               if (copy_from_user(&unp, argp, sizeof(unp)))
+                       break;
+
+               r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
+                                      &cmd->rc, &cmd->rrc);
+               break;
+       }
+       case KVM_PV_VERIFY: {
+               r = -EINVAL;
+               if (!kvm_s390_pv_is_protected(kvm))
+                       break;
+
+               r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
+                                 UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
+               KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
+                            cmd->rrc);
+               break;
+       }
+       case KVM_PV_PREP_RESET: {
+               r = -EINVAL;
+               if (!kvm_s390_pv_is_protected(kvm))
+                       break;
+
+               r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
+                                 UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
+               KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
+                            cmd->rc, cmd->rrc);
+               break;
+       }
+       case KVM_PV_UNSHARE_ALL: {
+               r = -EINVAL;
+               if (!kvm_s390_pv_is_protected(kvm))
+                       break;
+
+               r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
+                                 UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
+               KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
+                            cmd->rc, cmd->rrc);
+               break;
+       }
+       default:
+               r = -ENOTTY;
+       }
+       return r;
+}
+
 long kvm_arch_vm_ioctl(struct file *filp,
                       unsigned int ioctl, unsigned long arg)
 {
@@ -2255,6 +2456,33 @@ long kvm_arch_vm_ioctl(struct file *filp,
                mutex_unlock(&kvm->slots_lock);
                break;
        }
+       case KVM_S390_PV_COMMAND: {
+               struct kvm_pv_cmd args;
+
+               /* protvirt means user sigp */
+               kvm->arch.user_cpu_state_ctrl = 1;
+               r = 0;
+               if (!is_prot_virt_host()) {
+                       r = -EINVAL;
+                       break;
+               }
+               if (copy_from_user(&args, argp, sizeof(args))) {
+                       r = -EFAULT;
+                       break;
+               }
+               if (args.flags) {
+                       r = -EINVAL;
+                       break;
+               }
+               mutex_lock(&kvm->lock);
+               r = kvm_s390_handle_pv(kvm, &args);
+               mutex_unlock(&kvm->lock);
+               if (copy_to_user(argp, &args, sizeof(args))) {
+                       r = -EFAULT;
+                       break;
+               }
+               break;
+       }
        default:
                r = -ENOTTY;
        }
@@ -2504,7 +2732,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
        kvm->arch.use_skf = sclp.has_skey;
        spin_lock_init(&kvm->arch.start_stop_lock);
        kvm_s390_vsie_init(kvm);
-       kvm_s390_gisa_init(kvm);
+       if (use_gisa)
+               kvm_s390_gisa_init(kvm);
        KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
 
        return 0;
@@ -2518,6 +2747,8 @@ out_err:
 
 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 {
+       u16 rc, rrc;
+
        VCPU_EVENT(vcpu, 3, "%s", "free cpu");
        trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
        kvm_s390_clear_local_irqs(vcpu);
@@ -2530,6 +2761,9 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 
        if (vcpu->kvm->arch.use_cmma)
                kvm_s390_vcpu_unsetup_cmma(vcpu);
+       /* We can not hold the vcpu mutex here, we are already dying */
+       if (kvm_s390_pv_cpu_get_handle(vcpu))
+               kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
        free_page((unsigned long)(vcpu->arch.sie_block));
 }
 
@@ -2551,10 +2785,20 @@ static void kvm_free_vcpus(struct kvm *kvm)
 
 void kvm_arch_destroy_vm(struct kvm *kvm)
 {
+       u16 rc, rrc;
+
        kvm_free_vcpus(kvm);
        sca_dispose(kvm);
-       debug_unregister(kvm->arch.dbf);
        kvm_s390_gisa_destroy(kvm);
+       /*
+        * We are already at the end of life and kvm->lock is not taken.
+        * This is ok as the file descriptor is closed by now and nobody
+        * can mess with the pv state. To avoid lockdep_assert_held from
+        * complaining we do not use kvm_s390_pv_is_protected.
+        */
+       if (kvm_s390_pv_get_handle(kvm))
+               kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
+       debug_unregister(kvm->arch.dbf);
        free_page((unsigned long)kvm->arch.sie_page2);
        if (!kvm_is_ucontrol(kvm))
                gmap_remove(kvm->arch.gmap);
@@ -2650,6 +2894,9 @@ static int sca_switch_to_extended(struct kvm *kvm)
        unsigned int vcpu_idx;
        u32 scaol, scaoh;
 
+       if (kvm->arch.use_esca)
+               return 0;
+
        new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
        if (!new_sca)
                return -ENOMEM;
@@ -2901,6 +3148,7 @@ static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
 {
        int rc = 0;
+       u16 uvrc, uvrrc;
 
        atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
                                                    CPUSTAT_SM |
@@ -2968,6 +3216,14 @@ static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
 
        kvm_s390_vcpu_crypto_setup(vcpu);
 
+       mutex_lock(&vcpu->kvm->lock);
+       if (kvm_s390_pv_is_protected(vcpu->kvm)) {
+               rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
+               if (rc)
+                       kvm_s390_vcpu_unsetup_cmma(vcpu);
+       }
+       mutex_unlock(&vcpu->kvm->lock);
+
        return rc;
 }
 
@@ -3277,7 +3533,6 @@ static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
        kvm_s390_set_prefix(vcpu, 0);
        kvm_s390_set_cpu_timer(vcpu, 0);
        vcpu->arch.sie_block->ckc = 0;
-       vcpu->arch.sie_block->todpr = 0;
        memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
        vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
        vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
@@ -3295,9 +3550,17 @@ static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
        vcpu->run->s.regs.pp = 0;
        vcpu->run->s.regs.gbea = 1;
        vcpu->run->s.regs.fpc = 0;
-       vcpu->arch.sie_block->gbea = 1;
-       vcpu->arch.sie_block->pp = 0;
-       vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
+       /*
+        * Do not reset these registers in the protected case, as some of
+        * them are overlayed and they are not accessible in this case
+        * anyway.
+        */
+       if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
+               vcpu->arch.sie_block->gbea = 1;
+               vcpu->arch.sie_block->pp = 0;
+               vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
+               vcpu->arch.sie_block->todpr = 0;
+       }
 }
 
 static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
@@ -3487,14 +3750,20 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
 
        switch (mp_state->mp_state) {
        case KVM_MP_STATE_STOPPED:
-               kvm_s390_vcpu_stop(vcpu);
+               rc = kvm_s390_vcpu_stop(vcpu);
                break;
        case KVM_MP_STATE_OPERATING:
-               kvm_s390_vcpu_start(vcpu);
+               rc = kvm_s390_vcpu_start(vcpu);
                break;
        case KVM_MP_STATE_LOAD:
+               if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
+                       rc = -ENXIO;
+                       break;
+               }
+               rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
+               break;
        case KVM_MP_STATE_CHECK_STOP:
-               /* fall through - CHECK_STOP and LOAD are not supported yet */
+               fallthrough;    /* CHECK_STOP and LOAD are not supported yet */
        default:
                rc = -ENXIO;
        }
@@ -3844,9 +4113,11 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
        return vcpu_post_run_fault_in_sie(vcpu);
 }
 
+#define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
 static int __vcpu_run(struct kvm_vcpu *vcpu)
 {
        int rc, exit_reason;
+       struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
 
        /*
         * We try to hold kvm->srcu during most of vcpu_run (except when run-
@@ -3868,8 +4139,28 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
                guest_enter_irqoff();
                __disable_cpu_timer_accounting(vcpu);
                local_irq_enable();
+               if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+                       memcpy(sie_page->pv_grregs,
+                              vcpu->run->s.regs.gprs,
+                              sizeof(sie_page->pv_grregs));
+               }
                exit_reason = sie64a(vcpu->arch.sie_block,
                                     vcpu->run->s.regs.gprs);
+               if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+                       memcpy(vcpu->run->s.regs.gprs,
+                              sie_page->pv_grregs,
+                              sizeof(sie_page->pv_grregs));
+                       /*
+                        * We're not allowed to inject interrupts on intercepts
+                        * that leave the guest state in an "in-between" state
+                        * where the next SIE entry will do a continuation.
+                        * Fence interrupts in our "internal" PSW.
+                        */
+                       if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
+                           vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
+                               vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
+                       }
+               }
                local_irq_disable();
                __enable_cpu_timer_accounting(vcpu);
                guest_exit_irqoff();
@@ -3883,7 +4174,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
        return rc;
 }
 
-static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+static void sync_regs_fmt2(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
        struct runtime_instr_cb *riccb;
        struct gs_cb *gscb;
@@ -3892,16 +4183,7 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
        gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
        vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
        vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
-       if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
-               kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
-       if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
-               memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
-               /* some control register changes require a tlb flush */
-               kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
-       }
        if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
-               kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
-               vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
                vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
                vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
                vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
@@ -3942,6 +4224,36 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
                vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
                vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
        }
+       if (MACHINE_HAS_GS) {
+               preempt_disable();
+               __ctl_set_bit(2, 4);
+               if (current->thread.gs_cb) {
+                       vcpu->arch.host_gscb = current->thread.gs_cb;
+                       save_gs_cb(vcpu->arch.host_gscb);
+               }
+               if (vcpu->arch.gs_enabled) {
+                       current->thread.gs_cb = (struct gs_cb *)
+                                               &vcpu->run->s.regs.gscb;
+                       restore_gs_cb(current->thread.gs_cb);
+               }
+               preempt_enable();
+       }
+       /* SIE will load etoken directly from SDNX and therefore kvm_run */
+}
+
+static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+       if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
+               kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
+       if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
+               memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
+               /* some control register changes require a tlb flush */
+               kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+       }
+       if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
+               kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
+               vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
+       }
        save_access_regs(vcpu->arch.host_acrs);
        restore_access_regs(vcpu->run->s.regs.acrs);
        /* save host (userspace) fprs/vrs */
@@ -3956,23 +4268,47 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
        if (test_fp_ctl(current->thread.fpu.fpc))
                /* User space provided an invalid FPC, let's clear it */
                current->thread.fpu.fpc = 0;
+
+       /* Sync fmt2 only data */
+       if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
+               sync_regs_fmt2(vcpu, kvm_run);
+       } else {
+               /*
+                * In several places we have to modify our internal view to
+                * not do things that are disallowed by the ultravisor. For
+                * example we must not inject interrupts after specific exits
+                * (e.g. 112 prefix page not secure). We do this by turning
+                * off the machine check, external and I/O interrupt bits
+                * of our PSW copy. To avoid getting validity intercepts, we
+                * do only accept the condition code from userspace.
+                */
+               vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
+               vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
+                                                  PSW_MASK_CC;
+       }
+
+       kvm_run->kvm_dirty_regs = 0;
+}
+
+static void store_regs_fmt2(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+       kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
+       kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
+       kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
+       kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
        if (MACHINE_HAS_GS) {
-               preempt_disable();
                __ctl_set_bit(2, 4);
-               if (current->thread.gs_cb) {
-                       vcpu->arch.host_gscb = current->thread.gs_cb;
-                       save_gs_cb(vcpu->arch.host_gscb);
-               }
-               if (vcpu->arch.gs_enabled) {
-                       current->thread.gs_cb = (struct gs_cb *)
-                                               &vcpu->run->s.regs.gscb;
-                       restore_gs_cb(current->thread.gs_cb);
-               }
+               if (vcpu->arch.gs_enabled)
+                       save_gs_cb(current->thread.gs_cb);
+               preempt_disable();
+               current->thread.gs_cb = vcpu->arch.host_gscb;
+               restore_gs_cb(vcpu->arch.host_gscb);
                preempt_enable();
+               if (!vcpu->arch.host_gscb)
+                       __ctl_clear_bit(2, 4);
+               vcpu->arch.host_gscb = NULL;
        }
-       /* SIE will load etoken directly from SDNX and therefore kvm_run */
-
-       kvm_run->kvm_dirty_regs = 0;
+       /* SIE will save etoken directly into SDNX and therefore kvm_run */
 }
 
 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
@@ -3983,13 +4319,9 @@ static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
        memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
        kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
        kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
-       kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
-       kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
-       kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
        kvm_run->s.regs.pft = vcpu->arch.pfault_token;
        kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
        kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
-       kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
        save_access_regs(vcpu->run->s.regs.acrs);
        restore_access_regs(vcpu->arch.host_acrs);
        /* Save guest register state */
@@ -3998,23 +4330,13 @@ static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
        /* Restore will be done lazily at return */
        current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
        current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
-       if (MACHINE_HAS_GS) {
-               __ctl_set_bit(2, 4);
-               if (vcpu->arch.gs_enabled)
-                       save_gs_cb(current->thread.gs_cb);
-               preempt_disable();
-               current->thread.gs_cb = vcpu->arch.host_gscb;
-               restore_gs_cb(vcpu->arch.host_gscb);
-               preempt_enable();
-               if (!vcpu->arch.host_gscb)
-                       __ctl_clear_bit(2, 4);
-               vcpu->arch.host_gscb = NULL;
-       }
-       /* SIE will save etoken directly into SDNX and therefore kvm_run */
+       if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
+               store_regs_fmt2(vcpu, kvm_run);
 }
 
-int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
 {
+       struct kvm_run *kvm_run = vcpu->run;
        int rc;
 
        if (kvm_run->immediate_exit)
@@ -4034,6 +4356,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
        kvm_sigset_activate(vcpu);
 
+       /*
+        * no need to check the return value of vcpu_start as it can only have
+        * an error for protvirt, but protvirt means user cpu state
+        */
        if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
                kvm_s390_vcpu_start(vcpu);
        } else if (is_vcpu_stopped(vcpu)) {
@@ -4171,18 +4497,27 @@ static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
        kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
 }
 
-void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
+int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
 {
-       int i, online_vcpus, started_vcpus = 0;
+       int i, online_vcpus, r = 0, started_vcpus = 0;
 
        if (!is_vcpu_stopped(vcpu))
-               return;
+               return 0;
 
        trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
        /* Only one cpu at a time may enter/leave the STOPPED state. */
        spin_lock(&vcpu->kvm->arch.start_stop_lock);
        online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
 
+       /* Let's tell the UV that we want to change into the operating state */
+       if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+               r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
+               if (r) {
+                       spin_unlock(&vcpu->kvm->arch.start_stop_lock);
+                       return r;
+               }
+       }
+
        for (i = 0; i < online_vcpus; i++) {
                if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
                        started_vcpus++;
@@ -4201,28 +4536,44 @@ void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
        }
 
        kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
+       /*
+        * The real PSW might have changed due to a RESTART interpreted by the
+        * ultravisor. We block all interrupts and let the next sie exit
+        * refresh our view.
+        */
+       if (kvm_s390_pv_cpu_is_protected(vcpu))
+               vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
        /*
         * Another VCPU might have used IBS while we were offline.
         * Let's play safe and flush the VCPU at startup.
         */
        kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
        spin_unlock(&vcpu->kvm->arch.start_stop_lock);
-       return;
+       return 0;
 }
 
-void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
+int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
 {
-       int i, online_vcpus, started_vcpus = 0;
+       int i, online_vcpus, r = 0, started_vcpus = 0;
        struct kvm_vcpu *started_vcpu = NULL;
 
        if (is_vcpu_stopped(vcpu))
-               return;
+               return 0;
 
        trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
        /* Only one cpu at a time may enter/leave the STOPPED state. */
        spin_lock(&vcpu->kvm->arch.start_stop_lock);
        online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
 
+       /* Let's tell the UV that we want to change into the stopped state */
+       if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+               r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
+               if (r) {
+                       spin_unlock(&vcpu->kvm->arch.start_stop_lock);
+                       return r;
+               }
+       }
+
        /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
        kvm_s390_clear_stop_irq(vcpu);
 
@@ -4245,7 +4596,7 @@ void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
        }
 
        spin_unlock(&vcpu->kvm->arch.start_stop_lock);
-       return;
+       return 0;
 }
 
 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
@@ -4272,12 +4623,40 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
        return r;
 }
 
+static long kvm_s390_guest_sida_op(struct kvm_vcpu *vcpu,
+                                  struct kvm_s390_mem_op *mop)
+{
+       void __user *uaddr = (void __user *)mop->buf;
+       int r = 0;
+
+       if (mop->flags || !mop->size)
+               return -EINVAL;
+       if (mop->size + mop->sida_offset < mop->size)
+               return -EINVAL;
+       if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
+               return -E2BIG;
+
+       switch (mop->op) {
+       case KVM_S390_MEMOP_SIDA_READ:
+               if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) +
+                                mop->sida_offset), mop->size))
+                       r = -EFAULT;
+
+               break;
+       case KVM_S390_MEMOP_SIDA_WRITE:
+               if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) +
+                                  mop->sida_offset), uaddr, mop->size))
+                       r = -EFAULT;
+               break;
+       }
+       return r;
+}
 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
                                  struct kvm_s390_mem_op *mop)
 {
        void __user *uaddr = (void __user *)mop->buf;
        void *tmpbuf = NULL;
-       int r, srcu_idx;
+       int r = 0;
        const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
                                    | KVM_S390_MEMOP_F_CHECK_ONLY;
 
@@ -4287,14 +4666,15 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
        if (mop->size > MEM_OP_MAX_SIZE)
                return -E2BIG;
 
+       if (kvm_s390_pv_cpu_is_protected(vcpu))
+               return -EINVAL;
+
        if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
                tmpbuf = vmalloc(mop->size);
                if (!tmpbuf)
                        return -ENOMEM;
        }
 
-       srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
-
        switch (mop->op) {
        case KVM_S390_MEMOP_LOGICAL_READ:
                if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
@@ -4320,12 +4700,8 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
                }
                r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
                break;
-       default:
-               r = -EINVAL;
        }
 
-       srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
-
        if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
                kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
 
@@ -4333,6 +4709,31 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
        return r;
 }
 
+static long kvm_s390_guest_memsida_op(struct kvm_vcpu *vcpu,
+                                     struct kvm_s390_mem_op *mop)
+{
+       int r, srcu_idx;
+
+       srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+
+       switch (mop->op) {
+       case KVM_S390_MEMOP_LOGICAL_READ:
+       case KVM_S390_MEMOP_LOGICAL_WRITE:
+               r = kvm_s390_guest_mem_op(vcpu, mop);
+               break;
+       case KVM_S390_MEMOP_SIDA_READ:
+       case KVM_S390_MEMOP_SIDA_WRITE:
+               /* we are locked against sida going away by the vcpu->mutex */
+               r = kvm_s390_guest_sida_op(vcpu, mop);
+               break;
+       default:
+               r = -EINVAL;
+       }
+
+       srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
+       return r;
+}
+
 long kvm_arch_vcpu_async_ioctl(struct file *filp,
                               unsigned int ioctl, unsigned long arg)
 {
@@ -4368,6 +4769,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
        void __user *argp = (void __user *)arg;
        int idx;
        long r;
+       u16 rc, rrc;
 
        vcpu_load(vcpu);
 
@@ -4389,18 +4791,40 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
        case KVM_S390_CLEAR_RESET:
                r = 0;
                kvm_arch_vcpu_ioctl_clear_reset(vcpu);
+               if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+                       r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
+                                         UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
+                       VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
+                                  rc, rrc);
+               }
                break;
        case KVM_S390_INITIAL_RESET:
                r = 0;
                kvm_arch_vcpu_ioctl_initial_reset(vcpu);
+               if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+                       r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
+                                         UVC_CMD_CPU_RESET_INITIAL,
+                                         &rc, &rrc);
+                       VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
+                                  rc, rrc);
+               }
                break;
        case KVM_S390_NORMAL_RESET:
                r = 0;
                kvm_arch_vcpu_ioctl_normal_reset(vcpu);
+               if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+                       r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
+                                         UVC_CMD_CPU_RESET, &rc, &rrc);
+                       VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
+                                  rc, rrc);
+               }
                break;
        case KVM_SET_ONE_REG:
        case KVM_GET_ONE_REG: {
                struct kvm_one_reg reg;
+               r = -EINVAL;
+               if (kvm_s390_pv_cpu_is_protected(vcpu))
+                       break;
                r = -EFAULT;
                if (copy_from_user(&reg, argp, sizeof(reg)))
                        break;
@@ -4463,7 +4887,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
                struct kvm_s390_mem_op mem_op;
 
                if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
-                       r = kvm_s390_guest_mem_op(vcpu, &mem_op);
+                       r = kvm_s390_guest_memsida_op(vcpu, &mem_op);
                else
                        r = -EFAULT;
                break;
@@ -4523,12 +4947,6 @@ vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
        return VM_FAULT_SIGBUS;
 }
 
-int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
-                           unsigned long npages)
-{
-       return 0;
-}
-
 /* Section: memory related */
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
                                   struct kvm_memory_slot *memslot,
@@ -4549,12 +4967,15 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
        if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
                return -EINVAL;
 
+       /* When we are protected, we should not change the memory slots */
+       if (kvm_s390_pv_get_handle(kvm))
+               return -EINVAL;
        return 0;
 }
 
 void kvm_arch_commit_memory_region(struct kvm *kvm,
                                const struct kvm_userspace_memory_region *mem,
-                               const struct kvm_memory_slot *old,
+                               struct kvm_memory_slot *old,
                                const struct kvm_memory_slot *new,
                                enum kvm_mr_change change)
 {
@@ -4570,7 +4991,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
                                        old->npages * PAGE_SIZE);
                if (rc)
                        break;
-               /* FALLTHROUGH */
+               fallthrough;
        case KVM_MR_CREATE:
                rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
                                      mem->guest_phys_addr, mem->memory_size);