ath10k: add atomic protection for device recovery
authorWen Gong <wgong@codeaurora.org>
Tue, 8 Sep 2020 04:13:06 +0000 (04:13 +0000)
committerKalle Valo <kvalo@codeaurora.org>
Sat, 12 Dec 2020 04:38:12 +0000 (06:38 +0200)
When it has more than one restart_work queued meanwhile, the 2nd
restart_work is very easy to break the 1st restart work and lead
recovery fail.

Add a flag to allow only one restart work running untill
device successfully recovered.

It already has flag ATH10K_FLAG_CRASH_FLUSH, but it can not use this
flag again, because it is clear in ath10k_core_start. The function
ieee80211_reconfig(called by ieee80211_restart_work) of mac80211 do
many things and drv_start(call to ath10k_core_start) is 1st thing,
when drv_start complete, it does not mean restart complete. So it
add new flag and clear it in ath10k_reconfig_complete, because it
is the last thing called from drv_reconfig_complete of function
ieee80211_reconfig, after it, the restart process finished.

Tested-on: QCA6174 hw3.2 SDIO WLAN.RMH.4.4.1-00049

Signed-off-by: Wen Gong <wgong@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/010101746bead6a0-d5e97c66-dedd-4b92-810e-c2e4840fafc9-000000@us-west-2.amazonses.com
drivers/net/wireless/ath/ath10k/core.c
drivers/net/wireless/ath/ath10k/core.h
drivers/net/wireless/ath/ath10k/debug.c
drivers/net/wireless/ath/ath10k/mac.c
drivers/net/wireless/ath/ath10k/pci.c
drivers/net/wireless/ath/ath10k/sdio.c
drivers/net/wireless/ath/ath10k/snoc.c
drivers/net/wireless/ath/ath10k/wmi.c

index 93bbc01..eeb6ff6 100644 (file)
@@ -2294,6 +2294,17 @@ static int ath10k_init_hw_params(struct ath10k *ar)
        return 0;
 }
 
+void ath10k_core_start_recovery(struct ath10k *ar)
+{
+       if (test_and_set_bit(ATH10K_FLAG_RESTARTING, &ar->dev_flags)) {
+               ath10k_warn(ar, "already restarting\n");
+               return;
+       }
+
+       queue_work(ar->workqueue, &ar->restart_work);
+}
+EXPORT_SYMBOL(ath10k_core_start_recovery);
+
 static void ath10k_core_restart(struct work_struct *work)
 {
        struct ath10k *ar = container_of(work, struct ath10k, restart_work);
index f3853bd..51f7e96 100644 (file)
@@ -865,6 +865,9 @@ enum ath10k_dev_flags {
 
        /* Per Station statistics service */
        ATH10K_FLAG_PEER_STATS,
+
+       /* Indicates that ath10k device is during recovery process and not complete */
+       ATH10K_FLAG_RESTARTING,
 };
 
 enum ath10k_cal_mode {
@@ -1320,6 +1323,7 @@ int ath10k_core_start(struct ath10k *ar, enum ath10k_firmware_mode mode,
                      const struct ath10k_fw_components *fw_components);
 int ath10k_wait_for_suspend(struct ath10k *ar, u32 suspend_opt);
 void ath10k_core_stop(struct ath10k *ar);
+void ath10k_core_start_recovery(struct ath10k *ar);
 int ath10k_core_register(struct ath10k *ar,
                         const struct ath10k_bus_params *bus_params);
 void ath10k_core_unregister(struct ath10k *ar);
index b72cd81..fd052f6 100644 (file)
@@ -583,7 +583,7 @@ static ssize_t ath10k_write_simulate_fw_crash(struct file *file,
                ret = ath10k_debug_fw_assert(ar);
        } else if (!strcmp(buf, "hw-restart")) {
                ath10k_info(ar, "user requested hw restart\n");
-               queue_work(ar->workqueue, &ar->restart_work);
+               ath10k_core_start_recovery(ar);
                ret = 0;
        } else {
                ret = -EINVAL;
@@ -2005,7 +2005,7 @@ static ssize_t ath10k_write_btcoex(struct file *file,
                }
        } else {
                ath10k_info(ar, "restarting firmware due to btcoex change");
-               queue_work(ar->workqueue, &ar->restart_work);
+               ath10k_core_start_recovery(ar);
        }
 
        if (val)
@@ -2136,7 +2136,7 @@ static ssize_t ath10k_write_peer_stats(struct file *file,
 
        ath10k_info(ar, "restarting firmware due to Peer stats change");
 
-       queue_work(ar->workqueue, &ar->restart_work);
+       ath10k_core_start_recovery(ar);
        ret = count;
 
 exit:
index dc32c78..7d98250 100644 (file)
@@ -7932,6 +7932,7 @@ static void ath10k_reconfig_complete(struct ieee80211_hw *hw,
                ath10k_info(ar, "device successfully recovered\n");
                ar->state = ATH10K_STATE_ON;
                ieee80211_wake_queues(ar->hw);
+               clear_bit(ATH10K_FLAG_RESTARTING, &ar->dev_flags);
        }
 
        mutex_unlock(&ar->conf_mutex);
index 8ab2629..2328df0 100644 (file)
@@ -1774,7 +1774,7 @@ static void ath10k_pci_fw_dump_work(struct work_struct *work)
 
        mutex_unlock(&ar->dump_mutex);
 
-       queue_work(ar->workqueue, &ar->restart_work);
+       ath10k_core_start_recovery(ar);
 }
 
 static void ath10k_pci_fw_crashed_dump(struct ath10k *ar)
index 15474c1..c415090 100644 (file)
@@ -561,7 +561,7 @@ static int ath10k_sdio_mbox_rx_alloc(struct ath10k *ar,
                                    ATH10K_HTC_MBOX_MAX_PAYLOAD_LENGTH);
                        ret = -ENOMEM;
 
-                       queue_work(ar->workqueue, &ar->restart_work);
+                       ath10k_core_start_recovery(ar);
                        ath10k_warn(ar, "exceeds length, start recovery\n");
 
                        goto err;
@@ -960,7 +960,7 @@ static int ath10k_sdio_mbox_read_int_status(struct ath10k *ar,
        ret = ath10k_sdio_read(ar, MBOX_HOST_INT_STATUS_ADDRESS,
                               irq_proc_reg, sizeof(*irq_proc_reg));
        if (ret) {
-               queue_work(ar->workqueue, &ar->restart_work);
+               ath10k_core_start_recovery(ar);
                ath10k_warn(ar, "read int status fail, start recovery\n");
                goto out;
        }
@@ -2501,7 +2501,7 @@ void ath10k_sdio_fw_crashed_dump(struct ath10k *ar)
 
        ath10k_sdio_enable_intrs(ar);
 
-       queue_work(ar->workqueue, &ar->restart_work);
+       ath10k_core_start_recovery(ar);
 }
 
 static int ath10k_sdio_probe(struct sdio_func *func,
index fd41f25..bf9a8cb 100644 (file)
@@ -1305,7 +1305,7 @@ int ath10k_snoc_fw_indication(struct ath10k *ar, u64 type)
        switch (type) {
        case ATH10K_QMI_EVENT_FW_READY_IND:
                if (test_bit(ATH10K_SNOC_FLAG_REGISTERED, &ar_snoc->flags)) {
-                       queue_work(ar->workqueue, &ar->restart_work);
+                       ath10k_core_start_recovery(ar);
                        break;
                }
 
index c491ace..1f33947 100644 (file)
@@ -1937,7 +1937,7 @@ int ath10k_wmi_cmd_send(struct ath10k *ar, struct sk_buff *skb, u32 cmd_id)
        if (ret == -EAGAIN) {
                ath10k_warn(ar, "wmi command %d timeout, restarting hardware\n",
                            cmd_id);
-               queue_work(ar->workqueue, &ar->restart_work);
+               ath10k_core_start_recovery(ar);
        }
 
        return ret;