s390/cio: recover from bad paths
authorSebastian Ott <sebott@linux.vnet.ibm.com>
Thu, 14 Sep 2017 11:55:22 +0000 (13:55 +0200)
committerMartin Schwidefsky <schwidefsky@de.ibm.com>
Tue, 19 Sep 2017 06:36:19 +0000 (08:36 +0200)
In some situations we don't receive notification from firmware that
a previously unusable channelpath is usable again.

Schedule recovery for devices that return from path verification
without using all potentially usable paths. The recovery thread will
periodically trigger a path verification on the affected devices.

Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Suggested-by: Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
Reviewed-by: Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
drivers/s390/cio/device.c
drivers/s390/cio/device.h
drivers/s390/cio/device_fsm.c
drivers/s390/cio/io_sch.h

index 489b583..e5c32f4 100644 (file)
@@ -1225,10 +1225,16 @@ static int device_is_disconnected(struct ccw_device *cdev)
 static int recovery_check(struct device *dev, void *data)
 {
        struct ccw_device *cdev = to_ccwdev(dev);
+       struct subchannel *sch;
        int *redo = data;
 
        spin_lock_irq(cdev->ccwlock);
        switch (cdev->private->state) {
+       case DEV_STATE_ONLINE:
+               sch = to_subchannel(cdev->dev.parent);
+               if ((sch->schib.pmcw.pam & sch->opm) == sch->vpm)
+                       break;
+               /* fall through */
        case DEV_STATE_DISCONNECTED:
                CIO_MSG_EVENT(3, "recovery: trigger 0.%x.%04x\n",
                              cdev->private->dev_id.ssid,
@@ -1260,7 +1266,7 @@ static void recovery_work_func(struct work_struct *unused)
                }
                spin_unlock_irq(&recovery_lock);
        } else
-               CIO_MSG_EVENT(4, "recovery: end\n");
+               CIO_MSG_EVENT(3, "recovery: end\n");
 }
 
 static DECLARE_WORK(recovery_work, recovery_work_func);
@@ -1274,11 +1280,11 @@ static void recovery_func(unsigned long data)
        schedule_work(&recovery_work);
 }
 
-static void ccw_device_schedule_recovery(void)
+void ccw_device_schedule_recovery(void)
 {
        unsigned long flags;
 
-       CIO_MSG_EVENT(4, "recovery: schedule\n");
+       CIO_MSG_EVENT(3, "recovery: schedule\n");
        spin_lock_irqsave(&recovery_lock, flags);
        if (!timer_pending(&recovery_timer) || (recovery_phase != 0)) {
                recovery_phase = 0;
index ec497af..69cb70f 100644 (file)
@@ -134,6 +134,7 @@ void ccw_device_set_disconnected(struct ccw_device *cdev);
 void ccw_device_set_notoper(struct ccw_device *cdev);
 
 void ccw_device_set_timeout(struct ccw_device *, int);
+void ccw_device_schedule_recovery(void);
 
 /* Channel measurement facility related */
 void retry_set_schib(struct ccw_device *cdev);
index 12016e3..f98ea67 100644 (file)
@@ -476,6 +476,17 @@ static void create_fake_irb(struct irb *irb, int type)
        }
 }
 
+static void ccw_device_handle_broken_paths(struct ccw_device *cdev)
+{
+       struct subchannel *sch = to_subchannel(cdev->dev.parent);
+       u8 broken_paths = (sch->schib.pmcw.pam & sch->opm) ^ sch->vpm;
+
+       if (broken_paths && (cdev->private->path_broken_mask != broken_paths))
+               ccw_device_schedule_recovery();
+
+       cdev->private->path_broken_mask = broken_paths;
+}
+
 void ccw_device_verify_done(struct ccw_device *cdev, int err)
 {
        struct subchannel *sch;
@@ -508,6 +519,7 @@ callback:
                        memset(&cdev->private->irb, 0, sizeof(struct irb));
                }
                ccw_device_report_path_events(cdev);
+               ccw_device_handle_broken_paths(cdev);
                break;
        case -ETIME:
        case -EUSERS:
index 220f491..9a1b56b 100644 (file)
@@ -131,6 +131,8 @@ struct ccw_device_private {
                                   not operable */
        u8 path_gone_mask;      /* mask of paths, that became unavailable */
        u8 path_new_mask;       /* mask of paths, that became available */
+       u8 path_broken_mask;    /* mask of paths, which were found to be
+                                  unusable */
        struct {
                unsigned int fast:1;    /* post with "channel end" */
                unsigned int repall:1;  /* report every interrupt status */