From scsi_init_command(), a function called by scsi_mq_prep_fn():
	/* zero out the cmd, except for the embedded scsi_request */
	memset((char *)cmd + sizeof(cmd->req), 0,
		sizeof(*cmd) - sizeof(cmd->req) + dev->host->hostt->cmd_size);
In other words, scsi_mq_prep_fn() clears scsi_cmnd.flags. Hence move the
clear_bit() call into the else branch, the only branch in which this code
is necessary.
See also commit 
f1342709d18a ("scsi: Do not rely on blk-mq for double
completions").
Cc: Keith Busch <keith.busch@intel.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Hannes Reinecke <hare@suse.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Reviewed-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
        if (!scsi_host_queue_ready(q, shost, sdev))
                goto out_dec_target_busy;
 
-       clear_bit(SCMD_STATE_COMPLETE, &cmd->state);
        if (!(req->rq_flags & RQF_DONTPREP)) {
                ret = scsi_mq_prep_fn(req);
                if (ret != BLK_STS_OK)
                        goto out_dec_host_busy;
                req->rq_flags |= RQF_DONTPREP;
        } else {
+               clear_bit(SCMD_STATE_COMPLETE, &cmd->state);
                blk_mq_start_request(req);
        }