arch/x86/kvm/xen.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * Copyright © 2019 Oracle and/or its affiliates. All rights reserved.
   4  * Copyright © 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
   5  *
   6  * KVM Xen emulation
   7  */
   8
   9 #include "x86.h"
  10 #include "xen.h"
  11 #include "hyperv.h"
  12
  13 #include <linux/kvm_host.h>
  14 #include <linux/sched/stat.h>
  15
  16 #include <trace/events/kvm.h>
  17 #include <xen/interface/xen.h>
  18 #include <xen/interface/vcpu.h>
  19
  20 #include "trace.h"
  21
  22 DEFINE_STATIC_KEY_DEFERRED_FALSE(kvm_xen_enabled, HZ);
  23
  24 static int kvm_xen_shared_info_init(struct kvm *kvm, gfn_t gfn)
  25 {
  26         gpa_t gpa = gfn_to_gpa(gfn);
  27         int wc_ofs, sec_hi_ofs;
  28         int ret;
  29         int idx = srcu_read_lock(&kvm->srcu);
  30
  31         ret = kvm_gfn_to_hva_cache_init(kvm, &kvm->arch.xen.shinfo_cache,
  32                                         gpa, PAGE_SIZE);
  33         if (ret)
  34                 goto out;
  35
  36         kvm->arch.xen.shinfo_set = true;
  37
  38         /* Paranoia checks on the 32-bit struct layout */
  39         BUILD_BUG_ON(offsetof(struct compat_shared_info, wc) != 0x900);
  40         BUILD_BUG_ON(offsetof(struct compat_shared_info, arch.wc_sec_hi) != 0x924);
  41         BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0);
  42
  43         /* 32-bit location by default */
  44         wc_ofs = offsetof(struct compat_shared_info, wc);
  45         sec_hi_ofs = offsetof(struct compat_shared_info, arch.wc_sec_hi);
  46
  47 #ifdef CONFIG_X86_64
  48         /* Paranoia checks on the 64-bit struct layout */
  49         BUILD_BUG_ON(offsetof(struct shared_info, wc) != 0xc00);
  50         BUILD_BUG_ON(offsetof(struct shared_info, wc_sec_hi) != 0xc0c);
  51
  52         if (kvm->arch.xen.long_mode) {
  53                 wc_ofs = offsetof(struct shared_info, wc);
  54                 sec_hi_ofs = offsetof(struct shared_info, wc_sec_hi);
  55         }
  56 #endif
  57
  58         kvm_write_wall_clock(kvm, gpa + wc_ofs, sec_hi_ofs - wc_ofs);
  59         kvm_make_all_cpus_request(kvm, KVM_REQ_MASTERCLOCK_UPDATE);
  60
  61 out:
  62         srcu_read_unlock(&kvm->srcu, idx);
  63         return ret;
  64 }
  65
  66 static void kvm_xen_update_runstate(struct kvm_vcpu *v, int state)
  67 {
  68         struct kvm_vcpu_xen *vx = &v->arch.xen;
  69         u64 now = get_kvmclock_ns(v->kvm);
  70         u64 delta_ns = now - vx->runstate_entry_time;
  71         u64 run_delay = current->sched_info.run_delay;
  72
  73         if (unlikely(!vx->runstate_entry_time))
  74                 vx->current_runstate = RUNSTATE_offline;
  75
  76         /*
  77          * Time waiting for the scheduler isn't "stolen" if the
  78          * vCPU wasn't running anyway.
  79          */
  80         if (vx->current_runstate == RUNSTATE_running) {
  81                 u64 steal_ns = run_delay - vx->last_steal;
  82
  83                 delta_ns -= steal_ns;
  84
  85                 vx->runstate_times[RUNSTATE_runnable] += steal_ns;
  86         }
  87         vx->last_steal = run_delay;
  88
  89         vx->runstate_times[vx->current_runstate] += delta_ns;
  90         vx->current_runstate = state;
  91         vx->runstate_entry_time = now;
  92 }
  93
  94 void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state)
  95 {
  96         struct kvm_vcpu_xen *vx = &v->arch.xen;
  97         uint64_t state_entry_time;
  98         unsigned int offset;
  99
 100         kvm_xen_update_runstate(v, state);
 101
 102         if (!vx->runstate_set)
 103                 return;
 104
 105         BUILD_BUG_ON(sizeof(struct compat_vcpu_runstate_info) != 0x2c);
 106
 107         offset = offsetof(struct compat_vcpu_runstate_info, state_entry_time);
 108 #ifdef CONFIG_X86_64
 109         /*
 110          * The only difference is alignment of uint64_t in 32-bit.
 111          * So the first field 'state' is accessed directly using
 112          * offsetof() (where its offset happens to be zero), while the
 113          * remaining fields which are all uint64_t, start at 'offset'
 114          * which we tweak here by adding 4.
 115          */
 116         BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) !=
 117                      offsetof(struct compat_vcpu_runstate_info, state_entry_time) + 4);
 118         BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, time) !=
 119                      offsetof(struct compat_vcpu_runstate_info, time) + 4);
 120
 121         if (v->kvm->arch.xen.long_mode)
 122                 offset = offsetof(struct vcpu_runstate_info, state_entry_time);
 123 #endif
 124         /*
 125          * First write the updated state_entry_time at the appropriate
 126          * location determined by 'offset'.
 127          */
 128         state_entry_time = vx->runstate_entry_time;
 129         state_entry_time |= XEN_RUNSTATE_UPDATE;
 130
 131         BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->state_entry_time) !=
 132                      sizeof(state_entry_time));
 133         BUILD_BUG_ON(sizeof(((struct compat_vcpu_runstate_info *)0)->state_entry_time) !=
 134                      sizeof(state_entry_time));
 135
 136         if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
 137                                           &state_entry_time, offset,
 138                                           sizeof(state_entry_time)))
 139                 return;
 140         smp_wmb();
 141
 142         /*
 143          * Next, write the new runstate. This is in the *same* place
 144          * for 32-bit and 64-bit guests, asserted here for paranoia.
 145          */
 146         BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) !=
 147                      offsetof(struct compat_vcpu_runstate_info, state));
 148         BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->state) !=
 149                      sizeof(vx->current_runstate));
 150         BUILD_BUG_ON(sizeof(((struct compat_vcpu_runstate_info *)0)->state) !=
 151                      sizeof(vx->current_runstate));
 152
 153         if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
 154                                           &vx->current_runstate,
 155                                           offsetof(struct vcpu_runstate_info, state),
 156                                           sizeof(vx->current_runstate)))
 157                 return;
 158
 159         /*
 160          * Write the actual runstate times immediately after the
 161          * runstate_entry_time.
 162          */
 163         BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) !=
 164                      offsetof(struct vcpu_runstate_info, time) - sizeof(u64));
 165         BUILD_BUG_ON(offsetof(struct compat_vcpu_runstate_info, state_entry_time) !=
 166                      offsetof(struct compat_vcpu_runstate_info, time) - sizeof(u64));
 167         BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->time) !=
 168                      sizeof(((struct compat_vcpu_runstate_info *)0)->time));
 169         BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->time) !=
 170                      sizeof(vx->runstate_times));
 171
 172         if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
 173                                           &vx->runstate_times[0],
 174                                           offset + sizeof(u64),
 175                                           sizeof(vx->runstate_times)))
 176                 return;
 177
 178         smp_wmb();
 179
 180         /*
 181          * Finally, clear the XEN_RUNSTATE_UPDATE bit in the guest's
 182          * runstate_entry_time field.
 183          */
 184
 185         state_entry_time &= ~XEN_RUNSTATE_UPDATE;
 186         if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
 187                                           &state_entry_time, offset,
 188                                           sizeof(state_entry_time)))
 189                 return;
 190 }
 191
 192 int __kvm_xen_has_interrupt(struct kvm_vcpu *v)
 193 {
 194         u8 rc = 0;
 195
 196         /*
 197          * If the global upcall vector (HVMIRQ_callback_vector) is set and
 198          * the vCPU's evtchn_upcall_pending flag is set, the IRQ is pending.
 199          */
 200         struct gfn_to_hva_cache *ghc = &v->arch.xen.vcpu_info_cache;
 201         struct kvm_memslots *slots = kvm_memslots(v->kvm);
 202         unsigned int offset = offsetof(struct vcpu_info, evtchn_upcall_pending);
 203
 204         /* No need for compat handling here */
 205         BUILD_BUG_ON(offsetof(struct vcpu_info, evtchn_upcall_pending) !=
 206                      offsetof(struct compat_vcpu_info, evtchn_upcall_pending));
 207         BUILD_BUG_ON(sizeof(rc) !=
 208                      sizeof(((struct vcpu_info *)0)->evtchn_upcall_pending));
 209         BUILD_BUG_ON(sizeof(rc) !=
 210                      sizeof(((struct compat_vcpu_info *)0)->evtchn_upcall_pending));
 211
 212         /*
 213          * For efficiency, this mirrors the checks for using the valid
 214          * cache in kvm_read_guest_offset_cached(), but just uses
 215          * __get_user() instead. And falls back to the slow path.
 216          */
 217         if (likely(slots->generation == ghc->generation &&
 218                    !kvm_is_error_hva(ghc->hva) && ghc->memslot)) {
 219                 /* Fast path */
 220                 __get_user(rc, (u8 __user *)ghc->hva + offset);
 221         } else {
 222                 /* Slow path */
 223                 kvm_read_guest_offset_cached(v->kvm, ghc, &rc, offset,
 224                                              sizeof(rc));
 225         }
 226
 227         return rc;
 228 }
 229
 230 int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data)
 231 {
 232         int r = -ENOENT;
 233
 234         mutex_lock(&kvm->lock);
 235
 236         switch (data->type) {
 237         case KVM_XEN_ATTR_TYPE_LONG_MODE:
 238                 if (!IS_ENABLED(CONFIG_64BIT) && data->u.long_mode) {
 239                         r = -EINVAL;
 240                 } else {
 241                         kvm->arch.xen.long_mode = !!data->u.long_mode;
 242                         r = 0;
 243                 }
 244                 break;
 245
 246         case KVM_XEN_ATTR_TYPE_SHARED_INFO:
 247                 if (data->u.shared_info.gfn == GPA_INVALID) {
 248                         kvm->arch.xen.shinfo_set = false;
 249                         r = 0;
 250                         break;
 251                 }
 252                 r = kvm_xen_shared_info_init(kvm, data->u.shared_info.gfn);
 253                 break;
 254
 255
 256         case KVM_XEN_ATTR_TYPE_UPCALL_VECTOR:
 257                 if (data->u.vector && data->u.vector < 0x10)
 258                         r = -EINVAL;
 259                 else {
 260                         kvm->arch.xen.upcall_vector = data->u.vector;
 261                         r = 0;
 262                 }
 263                 break;
 264
 265         default:
 266                 break;
 267         }
 268
 269         mutex_unlock(&kvm->lock);
 270         return r;
 271 }
 272
 273 int kvm_xen_hvm_get_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data)
 274 {
 275         int r = -ENOENT;
 276
 277         mutex_lock(&kvm->lock);
 278
 279         switch (data->type) {
 280         case KVM_XEN_ATTR_TYPE_LONG_MODE:
 281                 data->u.long_mode = kvm->arch.xen.long_mode;
 282                 r = 0;
 283                 break;
 284
 285         case KVM_XEN_ATTR_TYPE_SHARED_INFO:
 286                 if (kvm->arch.xen.shinfo_set)
 287                         data->u.shared_info.gfn = gpa_to_gfn(kvm->arch.xen.shinfo_cache.gpa);
 288                 else
 289                         data->u.shared_info.gfn = GPA_INVALID;
 290                 r = 0;
 291                 break;
 292
 293         case KVM_XEN_ATTR_TYPE_UPCALL_VECTOR:
 294                 data->u.vector = kvm->arch.xen.upcall_vector;
 295                 r = 0;
 296                 break;
 297
 298         default:
 299                 break;
 300         }
 301
 302         mutex_unlock(&kvm->lock);
 303         return r;
 304 }
 305
 306 int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
 307 {
 308         int idx, r = -ENOENT;
 309
 310         mutex_lock(&vcpu->kvm->lock);
 311         idx = srcu_read_lock(&vcpu->kvm->srcu);
 312
 313         switch (data->type) {
 314         case KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO:
 315                 /* No compat necessary here. */
 316                 BUILD_BUG_ON(sizeof(struct vcpu_info) !=
 317                              sizeof(struct compat_vcpu_info));
 318                 BUILD_BUG_ON(offsetof(struct vcpu_info, time) !=
 319                              offsetof(struct compat_vcpu_info, time));
 320
 321                 if (data->u.gpa == GPA_INVALID) {
 322                         vcpu->arch.xen.vcpu_info_set = false;
 323                         r = 0;
 324                         break;
 325                 }
 326
 327                 r = kvm_gfn_to_hva_cache_init(vcpu->kvm,
 328                                               &vcpu->arch.xen.vcpu_info_cache,
 329                                               data->u.gpa,
 330                                               sizeof(struct vcpu_info));
 331                 if (!r) {
 332                         vcpu->arch.xen.vcpu_info_set = true;
 333                         kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
 334                 }
 335                 break;
 336
 337         case KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO:
 338                 if (data->u.gpa == GPA_INVALID) {
 339                         vcpu->arch.xen.vcpu_time_info_set = false;
 340                         r = 0;
 341                         break;
 342                 }
 343
 344                 r = kvm_gfn_to_hva_cache_init(vcpu->kvm,
 345                                               &vcpu->arch.xen.vcpu_time_info_cache,
 346                                               data->u.gpa,
 347                                               sizeof(struct pvclock_vcpu_time_info));
 348                 if (!r) {
 349                         vcpu->arch.xen.vcpu_time_info_set = true;
 350                         kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
 351                 }
 352                 break;
 353
 354         case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR:
 355                 if (!sched_info_on()) {
 356                         r = -EOPNOTSUPP;
 357                         break;
 358                 }
 359                 if (data->u.gpa == GPA_INVALID) {
 360                         vcpu->arch.xen.runstate_set = false;
 361                         r = 0;
 362                         break;
 363                 }
 364
 365                 r = kvm_gfn_to_hva_cache_init(vcpu->kvm,
 366                                               &vcpu->arch.xen.runstate_cache,
 367                                               data->u.gpa,
 368                                               sizeof(struct vcpu_runstate_info));
 369                 if (!r) {
 370                         vcpu->arch.xen.runstate_set = true;
 371                 }
 372                 break;
 373
 374         case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT:
 375                 if (!sched_info_on()) {
 376                         r = -EOPNOTSUPP;
 377                         break;
 378                 }
 379                 if (data->u.runstate.state > RUNSTATE_offline) {
 380                         r = -EINVAL;
 381                         break;
 382                 }
 383
 384                 kvm_xen_update_runstate(vcpu, data->u.runstate.state);
 385                 r = 0;
 386                 break;
 387
 388         case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA:
 389                 if (!sched_info_on()) {
 390                         r = -EOPNOTSUPP;
 391                         break;
 392                 }
 393                 if (data->u.runstate.state > RUNSTATE_offline) {
 394                         r = -EINVAL;
 395                         break;
 396                 }
 397                 if (data->u.runstate.state_entry_time !=
 398                     (data->u.runstate.time_running +
 399                      data->u.runstate.time_runnable +
 400                      data->u.runstate.time_blocked +
 401                      data->u.runstate.time_offline)) {
 402                         r = -EINVAL;
 403                         break;
 404                 }
 405                 if (get_kvmclock_ns(vcpu->kvm) <
 406                     data->u.runstate.state_entry_time) {
 407                         r = -EINVAL;
 408                         break;
 409                 }
 410
 411                 vcpu->arch.xen.current_runstate = data->u.runstate.state;
 412                 vcpu->arch.xen.runstate_entry_time =
 413                         data->u.runstate.state_entry_time;
 414                 vcpu->arch.xen.runstate_times[RUNSTATE_running] =
 415                         data->u.runstate.time_running;
 416                 vcpu->arch.xen.runstate_times[RUNSTATE_runnable] =
 417                         data->u.runstate.time_runnable;
 418                 vcpu->arch.xen.runstate_times[RUNSTATE_blocked] =
 419                         data->u.runstate.time_blocked;
 420                 vcpu->arch.xen.runstate_times[RUNSTATE_offline] =
 421                         data->u.runstate.time_offline;
 422                 vcpu->arch.xen.last_steal = current->sched_info.run_delay;
 423                 r = 0;
 424                 break;
 425
 426         case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST:
 427                 if (!sched_info_on()) {
 428                         r = -EOPNOTSUPP;
 429                         break;
 430                 }
 431                 if (data->u.runstate.state > RUNSTATE_offline &&
 432                     data->u.runstate.state != (u64)-1) {
 433                         r = -EINVAL;
 434                         break;
 435                 }
 436                 /* The adjustment must add up */
 437                 if (data->u.runstate.state_entry_time !=
 438                     (data->u.runstate.time_running +
 439                      data->u.runstate.time_runnable +
 440                      data->u.runstate.time_blocked +
 441                      data->u.runstate.time_offline)) {
 442                         r = -EINVAL;
 443                         break;
 444                 }
 445
 446                 if (get_kvmclock_ns(vcpu->kvm) <
 447                     (vcpu->arch.xen.runstate_entry_time +
 448                      data->u.runstate.state_entry_time)) {
 449                         r = -EINVAL;
 450                         break;
 451                 }
 452
 453                 vcpu->arch.xen.runstate_entry_time +=
 454                         data->u.runstate.state_entry_time;
 455                 vcpu->arch.xen.runstate_times[RUNSTATE_running] +=
 456                         data->u.runstate.time_running;
 457                 vcpu->arch.xen.runstate_times[RUNSTATE_runnable] +=
 458                         data->u.runstate.time_runnable;
 459                 vcpu->arch.xen.runstate_times[RUNSTATE_blocked] +=
 460                         data->u.runstate.time_blocked;
 461                 vcpu->arch.xen.runstate_times[RUNSTATE_offline] +=
 462                         data->u.runstate.time_offline;
 463
 464                 if (data->u.runstate.state <= RUNSTATE_offline)
 465                         kvm_xen_update_runstate(vcpu, data->u.runstate.state);
 466                 r = 0;
 467                 break;
 468
 469         default:
 470                 break;
 471         }
 472
 473         srcu_read_unlock(&vcpu->kvm->srcu, idx);
 474         mutex_unlock(&vcpu->kvm->lock);
 475         return r;
 476 }
 477
 478 int kvm_xen_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
 479 {
 480         int r = -ENOENT;
 481
 482         mutex_lock(&vcpu->kvm->lock);
 483
 484         switch (data->type) {
 485         case KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO:
 486                 if (vcpu->arch.xen.vcpu_info_set)
 487                         data->u.gpa = vcpu->arch.xen.vcpu_info_cache.gpa;
 488                 else
 489                         data->u.gpa = GPA_INVALID;
 490                 r = 0;
 491                 break;
 492
 493         case KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO:
 494                 if (vcpu->arch.xen.vcpu_time_info_set)
 495                         data->u.gpa = vcpu->arch.xen.vcpu_time_info_cache.gpa;
 496                 else
 497                         data->u.gpa = GPA_INVALID;
 498                 r = 0;
 499                 break;
 500
 501         case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR:
 502                 if (!sched_info_on()) {
 503                         r = -EOPNOTSUPP;
 504                         break;
 505                 }
 506                 if (vcpu->arch.xen.runstate_set) {
 507                         data->u.gpa = vcpu->arch.xen.runstate_cache.gpa;
 508                         r = 0;
 509                 }
 510                 break;
 511
 512         case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT:
 513                 if (!sched_info_on()) {
 514                         r = -EOPNOTSUPP;
 515                         break;
 516                 }
 517                 data->u.runstate.state = vcpu->arch.xen.current_runstate;
 518                 r = 0;
 519                 break;
 520
 521         case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA:
 522                 if (!sched_info_on()) {
 523                         r = -EOPNOTSUPP;
 524                         break;
 525                 }
 526                 data->u.runstate.state = vcpu->arch.xen.current_runstate;
 527                 data->u.runstate.state_entry_time =
 528                         vcpu->arch.xen.runstate_entry_time;
 529                 data->u.runstate.time_running =
 530                         vcpu->arch.xen.runstate_times[RUNSTATE_running];
 531                 data->u.runstate.time_runnable =
 532                         vcpu->arch.xen.runstate_times[RUNSTATE_runnable];
 533                 data->u.runstate.time_blocked =
 534                         vcpu->arch.xen.runstate_times[RUNSTATE_blocked];
 535                 data->u.runstate.time_offline =
 536                         vcpu->arch.xen.runstate_times[RUNSTATE_offline];
 537                 r = 0;
 538                 break;
 539
 540         case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST:
 541                 r = -EINVAL;
 542                 break;
 543
 544         default:
 545                 break;
 546         }
 547
 548         mutex_unlock(&vcpu->kvm->lock);
 549         return r;
 550 }
 551
 552 int kvm_xen_write_hypercall_page(struct kvm_vcpu *vcpu, u64 data)
 553 {
 554         struct kvm *kvm = vcpu->kvm;
 555         u32 page_num = data & ~PAGE_MASK;
 556         u64 page_addr = data & PAGE_MASK;
 557         bool lm = is_long_mode(vcpu);
 558
 559         /* Latch long_mode for shared_info pages etc. */
 560         vcpu->kvm->arch.xen.long_mode = lm;
 561
 562         /*
 563          * If Xen hypercall intercept is enabled, fill the hypercall
 564          * page with VMCALL/VMMCALL instructions since that's what
 565          * we catch. Else the VMM has provided the hypercall pages
 566          * with instructions of its own choosing, so use those.
 567          */
 568         if (kvm_xen_hypercall_enabled(kvm)) {
 569                 u8 instructions[32];
 570                 int i;
 571
 572                 if (page_num)
 573                         return 1;
 574
 575                 /* mov imm32, %eax */
 576                 instructions[0] = 0xb8;
 577
 578                 /* vmcall / vmmcall */
 579                 kvm_x86_ops.patch_hypercall(vcpu, instructions + 5);
 580
 581                 /* ret */
 582                 instructions[8] = 0xc3;
 583
 584                 /* int3 to pad */
 585                 memset(instructions + 9, 0xcc, sizeof(instructions) - 9);
 586
 587                 for (i = 0; i < PAGE_SIZE / sizeof(instructions); i++) {
 588                         *(u32 *)&instructions[1] = i;
 589                         if (kvm_vcpu_write_guest(vcpu,
 590                                                  page_addr + (i * sizeof(instructions)),
 591                                                  instructions, sizeof(instructions)))
 592                                 return 1;
 593                 }
 594         } else {
 595                 /*
 596                  * Note, truncation is a non-issue as 'lm' is guaranteed to be
 597                  * false for a 32-bit kernel, i.e. when hva_t is only 4 bytes.
 598                  */
 599                 hva_t blob_addr = lm ? kvm->arch.xen_hvm_config.blob_addr_64
 600                                      : kvm->arch.xen_hvm_config.blob_addr_32;
 601                 u8 blob_size = lm ? kvm->arch.xen_hvm_config.blob_size_64
 602                                   : kvm->arch.xen_hvm_config.blob_size_32;
 603                 u8 *page;
 604
 605                 if (page_num >= blob_size)
 606                         return 1;
 607
 608                 blob_addr += page_num * PAGE_SIZE;
 609
 610                 page = memdup_user((u8 __user *)blob_addr, PAGE_SIZE);
 611                 if (IS_ERR(page))
 612                         return PTR_ERR(page);
 613
 614                 if (kvm_vcpu_write_guest(vcpu, page_addr, page, PAGE_SIZE)) {
 615                         kfree(page);
 616                         return 1;
 617                 }
 618         }
 619         return 0;
 620 }
 621
 622 int kvm_xen_hvm_config(struct kvm *kvm, struct kvm_xen_hvm_config *xhc)
 623 {
 624         if (xhc->flags & ~KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL)
 625                 return -EINVAL;
 626
 627         /*
 628          * With hypercall interception the kernel generates its own
 629          * hypercall page so it must not be provided.
 630          */
 631         if ((xhc->flags & KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL) &&
 632             (xhc->blob_addr_32 || xhc->blob_addr_64 ||
 633              xhc->blob_size_32 || xhc->blob_size_64))
 634                 return -EINVAL;
 635
 636         mutex_lock(&kvm->lock);
 637
 638         if (xhc->msr && !kvm->arch.xen_hvm_config.msr)
 639                 static_branch_inc(&kvm_xen_enabled.key);
 640         else if (!xhc->msr && kvm->arch.xen_hvm_config.msr)
 641                 static_branch_slow_dec_deferred(&kvm_xen_enabled);
 642
 643         memcpy(&kvm->arch.xen_hvm_config, xhc, sizeof(*xhc));
 644
 645         mutex_unlock(&kvm->lock);
 646         return 0;
 647 }
 648
 649 void kvm_xen_destroy_vm(struct kvm *kvm)
 650 {
 651         if (kvm->arch.xen_hvm_config.msr)
 652                 static_branch_slow_dec_deferred(&kvm_xen_enabled);
 653 }
 654
 655 static int kvm_xen_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result)
 656 {
 657         kvm_rax_write(vcpu, result);
 658         return kvm_skip_emulated_instruction(vcpu);
 659 }
 660
 661 static int kvm_xen_hypercall_complete_userspace(struct kvm_vcpu *vcpu)
 662 {
 663         struct kvm_run *run = vcpu->run;
 664
 665         if (unlikely(!kvm_is_linear_rip(vcpu, vcpu->arch.xen.hypercall_rip)))
 666                 return 1;
 667
 668         return kvm_xen_hypercall_set_result(vcpu, run->xen.u.hcall.result);
 669 }
 670
 671 int kvm_xen_hypercall(struct kvm_vcpu *vcpu)
 672 {
 673         bool longmode;
 674         u64 input, params[6];
 675
 676         input = (u64)kvm_register_read(vcpu, VCPU_REGS_RAX);
 677
 678         /* Hyper-V hypercalls get bit 31 set in EAX */
 679         if ((input & 0x80000000) &&
 680             kvm_hv_hypercall_enabled(vcpu))
 681                 return kvm_hv_hypercall(vcpu);
 682
 683         longmode = is_64_bit_mode(vcpu);
 684         if (!longmode) {
 685                 params[0] = (u32)kvm_rbx_read(vcpu);
 686                 params[1] = (u32)kvm_rcx_read(vcpu);
 687                 params[2] = (u32)kvm_rdx_read(vcpu);
 688                 params[3] = (u32)kvm_rsi_read(vcpu);
 689                 params[4] = (u32)kvm_rdi_read(vcpu);
 690                 params[5] = (u32)kvm_rbp_read(vcpu);
 691         }
 692 #ifdef CONFIG_X86_64
 693         else {
 694                 params[0] = (u64)kvm_rdi_read(vcpu);
 695                 params[1] = (u64)kvm_rsi_read(vcpu);
 696                 params[2] = (u64)kvm_rdx_read(vcpu);
 697                 params[3] = (u64)kvm_r10_read(vcpu);
 698                 params[4] = (u64)kvm_r8_read(vcpu);
 699                 params[5] = (u64)kvm_r9_read(vcpu);
 700         }
 701 #endif
 702         trace_kvm_xen_hypercall(input, params[0], params[1], params[2],
 703                                 params[3], params[4], params[5]);
 704
 705         vcpu->run->exit_reason = KVM_EXIT_XEN;
 706         vcpu->run->xen.type = KVM_EXIT_XEN_HCALL;
 707         vcpu->run->xen.u.hcall.longmode = longmode;
 708         vcpu->run->xen.u.hcall.cpl = kvm_x86_ops.get_cpl(vcpu);
 709         vcpu->run->xen.u.hcall.input = input;
 710         vcpu->run->xen.u.hcall.params[0] = params[0];
 711         vcpu->run->xen.u.hcall.params[1] = params[1];
 712         vcpu->run->xen.u.hcall.params[2] = params[2];
 713         vcpu->run->xen.u.hcall.params[3] = params[3];
 714         vcpu->run->xen.u.hcall.params[4] = params[4];
 715         vcpu->run->xen.u.hcall.params[5] = params[5];
 716         vcpu->arch.xen.hypercall_rip = kvm_get_linear_rip(vcpu);
 717         vcpu->arch.complete_userspace_io =
 718                 kvm_xen_hypercall_complete_userspace;
 719
 720         return 0;
 721 }