/* Initial target buffer size */
#define VIRTIOVF_TARGET_INITIAL_BUF_SIZE SZ_1M
+static int
+virtiovf_read_device_context_chunk(struct virtiovf_migration_file *migf,
+ u32 ctx_size);
+
static struct page *
virtiovf_get_migration_page(struct virtiovf_data_buffer *buf,
unsigned long offset)
VIRTIO_RESOURCE_OBJ_DEV_PARTS, obj_id);
}
+static struct virtiovf_data_buffer *
+virtiovf_get_data_buffer(struct virtiovf_migration_file *migf, size_t length)
+{
+ struct virtiovf_data_buffer *buf, *temp_buf;
+ struct list_head free_list;
+
+ INIT_LIST_HEAD(&free_list);
+
+ spin_lock_irq(&migf->list_lock);
+ list_for_each_entry_safe(buf, temp_buf, &migf->avail_list, buf_elm) {
+ list_del_init(&buf->buf_elm);
+ if (buf->allocated_length >= length) {
+ spin_unlock_irq(&migf->list_lock);
+ goto found;
+ }
+ /*
+ * Prevent holding redundant buffers. Put in a free
+ * list and call at the end not under the spin lock
+ * (&migf->list_lock) to minimize its scope usage.
+ */
+ list_add(&buf->buf_elm, &free_list);
+ }
+ spin_unlock_irq(&migf->list_lock);
+ buf = virtiovf_alloc_data_buffer(migf, length);
+
+found:
+ while ((temp_buf = list_first_entry_or_null(&free_list,
+ struct virtiovf_data_buffer, buf_elm))) {
+ list_del(&temp_buf->buf_elm);
+ virtiovf_free_data_buffer(temp_buf);
+ }
+
+ return buf;
+}
+
static void virtiovf_clean_migf_resources(struct virtiovf_migration_file *migf)
{
struct virtiovf_data_buffer *entry;
{
struct virtiovf_migration_file *migf = filp->private_data;
struct virtiovf_data_buffer *vhca_buf;
+ bool first_loop_call = true;
bool end_of_data;
ssize_t done = 0;
ssize_t count;
vhca_buf = virtiovf_get_data_buff_from_pos(migf, *pos, &end_of_data);
+ if (first_loop_call) {
+ first_loop_call = false;
+ /* Temporary end of file as part of PRE_COPY */
+ if (end_of_data && migf->state == VIRTIOVF_MIGF_STATE_PRECOPY) {
+ done = -ENOMSG;
+ goto out_unlock;
+ }
+ if (end_of_data && migf->state != VIRTIOVF_MIGF_STATE_COMPLETE) {
+ done = -EINVAL;
+ goto out_unlock;
+ }
+ }
+
if (end_of_data)
goto out_unlock;
return done;
}
+static long virtiovf_precopy_ioctl(struct file *filp, unsigned int cmd,
+ unsigned long arg)
+{
+ struct virtiovf_migration_file *migf = filp->private_data;
+ struct virtiovf_pci_core_device *virtvdev = migf->virtvdev;
+ struct vfio_precopy_info info = {};
+ loff_t *pos = &filp->f_pos;
+ bool end_of_data = false;
+ unsigned long minsz;
+ u32 ctx_size = 0;
+ int ret;
+
+ if (cmd != VFIO_MIG_GET_PRECOPY_INFO)
+ return -ENOTTY;
+
+ minsz = offsetofend(struct vfio_precopy_info, dirty_bytes);
+ if (copy_from_user(&info, (void __user *)arg, minsz))
+ return -EFAULT;
+
+ if (info.argsz < minsz)
+ return -EINVAL;
+
+ mutex_lock(&virtvdev->state_mutex);
+ if (virtvdev->mig_state != VFIO_DEVICE_STATE_PRE_COPY &&
+ virtvdev->mig_state != VFIO_DEVICE_STATE_PRE_COPY_P2P) {
+ ret = -EINVAL;
+ goto err_state_unlock;
+ }
+
+ /*
+ * The virtio specification does not include a PRE_COPY concept.
+ * Since we can expect the data to remain the same for a certain period,
+ * we use a rate limiter mechanism before making a call to the device.
+ */
+ if (__ratelimit(&migf->pre_copy_rl_state)) {
+
+ ret = virtio_pci_admin_dev_parts_metadata_get(virtvdev->core_device.pdev,
+ VIRTIO_RESOURCE_OBJ_DEV_PARTS, migf->obj_id,
+ VIRTIO_ADMIN_CMD_DEV_PARTS_METADATA_TYPE_SIZE,
+ &ctx_size);
+ if (ret)
+ goto err_state_unlock;
+ }
+
+ mutex_lock(&migf->lock);
+ if (migf->state == VIRTIOVF_MIGF_STATE_ERROR) {
+ ret = -ENODEV;
+ goto err_migf_unlock;
+ }
+
+ if (migf->pre_copy_initial_bytes > *pos) {
+ info.initial_bytes = migf->pre_copy_initial_bytes - *pos;
+ } else {
+ info.dirty_bytes = migf->max_pos - *pos;
+ if (!info.dirty_bytes)
+ end_of_data = true;
+ info.dirty_bytes += ctx_size;
+ }
+
+ if (!end_of_data || !ctx_size) {
+ mutex_unlock(&migf->lock);
+ goto done;
+ }
+
+ mutex_unlock(&migf->lock);
+ /*
+ * We finished transferring the current state and the device has a
+ * dirty state, read a new state.
+ */
+ ret = virtiovf_read_device_context_chunk(migf, ctx_size);
+ if (ret)
+ /*
+ * The machine is running, and context size could be grow, so no reason to mark
+ * the device state as VIRTIOVF_MIGF_STATE_ERROR.
+ */
+ goto err_state_unlock;
+
+done:
+ virtiovf_state_mutex_unlock(virtvdev);
+ if (copy_to_user((void __user *)arg, &info, minsz))
+ return -EFAULT;
+ return 0;
+
+err_migf_unlock:
+ mutex_unlock(&migf->lock);
+err_state_unlock:
+ virtiovf_state_mutex_unlock(virtvdev);
+ return ret;
+}
+
static const struct file_operations virtiovf_save_fops = {
.owner = THIS_MODULE,
.read = virtiovf_save_read,
+ .unlocked_ioctl = virtiovf_precopy_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
.release = virtiovf_release_file,
};
int nent;
int ret;
- buf = virtiovf_alloc_data_buffer(migf, ctx_size);
+ buf = virtiovf_get_data_buffer(migf, ctx_size);
if (IS_ERR(buf))
return PTR_ERR(buf);
goto out;
buf->length = res_size;
- header_buf = virtiovf_alloc_data_buffer(migf,
+ header_buf = virtiovf_get_data_buffer(migf,
sizeof(struct virtiovf_migration_header));
if (IS_ERR(header_buf)) {
ret = PTR_ERR(header_buf);
return ret;
}
+static int
+virtiovf_pci_save_device_final_data(struct virtiovf_pci_core_device *virtvdev)
+{
+ struct virtiovf_migration_file *migf = virtvdev->saving_migf;
+ u32 ctx_size;
+ int ret;
+
+ if (migf->state == VIRTIOVF_MIGF_STATE_ERROR)
+ return -ENODEV;
+
+ ret = virtio_pci_admin_dev_parts_metadata_get(virtvdev->core_device.pdev,
+ VIRTIO_RESOURCE_OBJ_DEV_PARTS, migf->obj_id,
+ VIRTIO_ADMIN_CMD_DEV_PARTS_METADATA_TYPE_SIZE,
+ &ctx_size);
+ if (ret)
+ goto err;
+
+ if (!ctx_size) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ ret = virtiovf_read_device_context_chunk(migf, ctx_size);
+ if (ret)
+ goto err;
+
+ migf->state = VIRTIOVF_MIGF_STATE_COMPLETE;
+ return 0;
+
+err:
+ migf->state = VIRTIOVF_MIGF_STATE_ERROR;
+ return ret;
+}
+
static struct virtiovf_migration_file *
-virtiovf_pci_save_device_data(struct virtiovf_pci_core_device *virtvdev)
+virtiovf_pci_save_device_data(struct virtiovf_pci_core_device *virtvdev,
+ bool pre_copy)
{
struct virtiovf_migration_file *migf;
u32 ctx_size;
if (ret)
goto out_clean;
+ if (pre_copy) {
+ migf->pre_copy_initial_bytes = migf->max_pos;
+ /* Arbitrarily set the pre-copy rate limit to 1-second intervals */
+ ratelimit_state_init(&migf->pre_copy_rl_state, 1 * HZ, 1);
+ /* Prevent any rate messages upon its usage */
+ ratelimit_set_flags(&migf->pre_copy_rl_state,
+ RATELIMIT_MSG_ON_RELEASE);
+ migf->state = VIRTIOVF_MIGF_STATE_PRECOPY;
+ } else {
+ migf->state = VIRTIOVF_MIGF_STATE_COMPLETE;
+ }
+
return migf;
out_clean:
return NULL;
}
- if (cur == VFIO_DEVICE_STATE_RUNNING && new == VFIO_DEVICE_STATE_RUNNING_P2P) {
+ if ((cur == VFIO_DEVICE_STATE_RUNNING && new == VFIO_DEVICE_STATE_RUNNING_P2P) ||
+ (cur == VFIO_DEVICE_STATE_PRE_COPY && new == VFIO_DEVICE_STATE_PRE_COPY_P2P)) {
ret = virtio_pci_admin_mode_set(virtvdev->core_device.pdev,
BIT(VIRTIO_ADMIN_CMD_DEV_MODE_F_STOPPED));
if (ret)
return NULL;
}
- if (cur == VFIO_DEVICE_STATE_RUNNING_P2P && new == VFIO_DEVICE_STATE_RUNNING) {
+ if ((cur == VFIO_DEVICE_STATE_RUNNING_P2P && new == VFIO_DEVICE_STATE_RUNNING) ||
+ (cur == VFIO_DEVICE_STATE_PRE_COPY_P2P && new == VFIO_DEVICE_STATE_PRE_COPY)) {
ret = virtio_pci_admin_mode_set(virtvdev->core_device.pdev, 0);
if (ret)
return ERR_PTR(ret);
if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_STOP_COPY) {
struct virtiovf_migration_file *migf;
- migf = virtiovf_pci_save_device_data(virtvdev);
+ migf = virtiovf_pci_save_device_data(virtvdev, false);
if (IS_ERR(migf))
return ERR_CAST(migf);
get_file(migf->filp);
return migf->filp;
}
- if (cur == VFIO_DEVICE_STATE_STOP_COPY && new == VFIO_DEVICE_STATE_STOP) {
+ if ((cur == VFIO_DEVICE_STATE_STOP_COPY && new == VFIO_DEVICE_STATE_STOP) ||
+ (cur == VFIO_DEVICE_STATE_PRE_COPY && new == VFIO_DEVICE_STATE_RUNNING) ||
+ (cur == VFIO_DEVICE_STATE_PRE_COPY_P2P && new == VFIO_DEVICE_STATE_RUNNING_P2P)) {
virtiovf_disable_fds(virtvdev);
return NULL;
}
return NULL;
}
+ if ((cur == VFIO_DEVICE_STATE_RUNNING && new == VFIO_DEVICE_STATE_PRE_COPY) ||
+ (cur == VFIO_DEVICE_STATE_RUNNING_P2P &&
+ new == VFIO_DEVICE_STATE_PRE_COPY_P2P)) {
+ struct virtiovf_migration_file *migf;
+
+ migf = virtiovf_pci_save_device_data(virtvdev, true);
+ if (IS_ERR(migf))
+ return ERR_CAST(migf);
+ get_file(migf->filp);
+ virtvdev->saving_migf = migf;
+ return migf->filp;
+ }
+
+ if (cur == VFIO_DEVICE_STATE_PRE_COPY_P2P && new == VFIO_DEVICE_STATE_STOP_COPY) {
+ ret = virtiovf_pci_save_device_final_data(virtvdev);
+ return ret ? ERR_PTR(ret) : NULL;
+ }
+
/*
* vfio_mig_get_next_state() does not use arcs other than the above
*/
spin_lock_init(&virtvdev->reset_lock);
virtvdev->core_device.vdev.migration_flags =
VFIO_MIGRATION_STOP_COPY |
- VFIO_MIGRATION_P2P;
+ VFIO_MIGRATION_P2P |
+ VFIO_MIGRATION_PRE_COPY;
virtvdev->core_device.vdev.mig_ops = &virtvdev_pci_mig_ops;
}