{ /* sentinel */ }
};
+static const struct hclge_hw_module_id hclge_hw_module_id_st[] = {
+ {
+ .module_id = MODULE_NONE,
+ .msg = "MODULE_NONE"
+ }, {
+ .module_id = MODULE_BIOS_COMMON,
+ .msg = "MODULE_BIOS_COMMON"
+ }, {
+ .module_id = MODULE_GE,
+ .msg = "MODULE_GE"
+ }, {
+ .module_id = MODULE_IGU_EGU,
+ .msg = "MODULE_IGU_EGU"
+ }, {
+ .module_id = MODULE_LGE,
+ .msg = "MODULE_LGE"
+ }, {
+ .module_id = MODULE_NCSI,
+ .msg = "MODULE_NCSI"
+ }, {
+ .module_id = MODULE_PPP,
+ .msg = "MODULE_PPP"
+ }, {
+ .module_id = MODULE_QCN,
+ .msg = "MODULE_QCN"
+ }, {
+ .module_id = MODULE_RCB_RX,
+ .msg = "MODULE_RCB_RX"
+ }, {
+ .module_id = MODULE_RTC,
+ .msg = "MODULE_RTC"
+ }, {
+ .module_id = MODULE_SSU,
+ .msg = "MODULE_SSU"
+ }, {
+ .module_id = MODULE_TM,
+ .msg = "MODULE_TM"
+ }, {
+ .module_id = MODULE_RCB_TX,
+ .msg = "MODULE_RCB_TX"
+ }, {
+ .module_id = MODULE_TXDMA,
+ .msg = "MODULE_TXDMA"
+ }, {
+ .module_id = MODULE_MASTER,
+ .msg = "MODULE_MASTER"
+ }, {
+ .module_id = MODULE_ROCEE_TOP,
+ .msg = "MODULE_ROCEE_TOP"
+ }, {
+ .module_id = MODULE_ROCEE_TIMER,
+ .msg = "MODULE_ROCEE_TIMER"
+ }, {
+ .module_id = MODULE_ROCEE_MDB,
+ .msg = "MODULE_ROCEE_MDB"
+ }, {
+ .module_id = MODULE_ROCEE_TSP,
+ .msg = "MODULE_ROCEE_TSP"
+ }, {
+ .module_id = MODULE_ROCEE_TRP,
+ .msg = "MODULE_ROCEE_TRP"
+ }, {
+ .module_id = MODULE_ROCEE_SCC,
+ .msg = "MODULE_ROCEE_SCC"
+ }, {
+ .module_id = MODULE_ROCEE_CAEP,
+ .msg = "MODULE_ROCEE_CAEP"
+ }, {
+ .module_id = MODULE_ROCEE_GEN_AC,
+ .msg = "MODULE_ROCEE_GEN_AC"
+ }, {
+ .module_id = MODULE_ROCEE_QMM,
+ .msg = "MODULE_ROCEE_QMM"
+ }, {
+ .module_id = MODULE_ROCEE_LSAN,
+ .msg = "MODULE_ROCEE_LSAN"
+ }
+};
+
+static const struct hclge_hw_type_id hclge_hw_type_id_st[] = {
+ {
+ .type_id = NONE_ERROR,
+ .msg = "none_error"
+ }, {
+ .type_id = FIFO_ERROR,
+ .msg = "fifo_error"
+ }, {
+ .type_id = MEMORY_ERROR,
+ .msg = "memory_error"
+ }, {
+ .type_id = POISON_ERROR,
+ .msg = "poison_error"
+ }, {
+ .type_id = MSIX_ECC_ERROR,
+ .msg = "msix_ecc_error"
+ }, {
+ .type_id = TQP_INT_ECC_ERROR,
+ .msg = "tqp_int_ecc_error"
+ }, {
+ .type_id = PF_ABNORMAL_INT_ERROR,
+ .msg = "pf_abnormal_int_error"
+ }, {
+ .type_id = MPF_ABNORMAL_INT_ERROR,
+ .msg = "mpf_abnormal_int_error"
+ }, {
+ .type_id = COMMON_ERROR,
+ .msg = "common_error"
+ }, {
+ .type_id = PORT_ERROR,
+ .msg = "port_error"
+ }, {
+ .type_id = ETS_ERROR,
+ .msg = "ets_error"
+ }, {
+ .type_id = NCSI_ERROR,
+ .msg = "ncsi_error"
+ }, {
+ .type_id = GLB_ERROR,
+ .msg = "glb_error"
+ }, {
+ .type_id = ROCEE_NORMAL_ERR,
+ .msg = "rocee_normal_error"
+ }, {
+ .type_id = ROCEE_OVF_ERR,
+ .msg = "rocee_ovf_error"
+ }
+};
+
static void hclge_log_error(struct device *dev, char *reg,
const struct hclge_hw_error *err,
u32 err_sts, unsigned long *reset_requests)
{ /* sentinel */ }
};
+static void hclge_config_all_msix_error(struct hclge_dev *hdev, bool enable)
+{
+ u32 reg_val;
+
+ reg_val = hclge_read_dev(&hdev->hw, HCLGE_PF_OTHER_INT_REG);
+
+ if (enable)
+ reg_val |= BIT(HCLGE_VECTOR0_ALL_MSIX_ERR_B);
+ else
+ reg_val &= ~BIT(HCLGE_VECTOR0_ALL_MSIX_ERR_B);
+
+ hclge_write_dev(&hdev->hw, HCLGE_PF_OTHER_INT_REG, reg_val);
+}
+
int hclge_config_nic_hw_error(struct hclge_dev *hdev, bool state)
{
const struct hclge_hw_blk *module = hw_blk;
int ret = 0;
+ hclge_config_all_msix_error(hdev, state);
+
while (module->name) {
if (module->config_err_int) {
ret = module->config_err_int(hdev, state);
static int hclge_handle_all_hw_msix_error(struct hclge_dev *hdev,
unsigned long *reset_requests)
{
- struct hclge_mac_tnl_stats mac_tnl_stats;
- struct device *dev = &hdev->pdev->dev;
u32 mpf_bd_num, pf_bd_num, bd_num;
struct hclge_desc *desc;
- u32 status;
int ret;
/* query the number of bds for the MSIx int status */
if (ret)
goto msi_error;
- /* query and clear mac tnl interruptions */
- hclge_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_QUERY_MAC_TNL_INT,
- true);
- ret = hclge_cmd_send(&hdev->hw, &desc[0], 1);
- if (ret) {
- dev_err(dev, "query mac tnl int cmd failed (%d)\n", ret);
- goto msi_error;
- }
-
- status = le32_to_cpu(desc->data[0]);
- if (status) {
- /* When mac tnl interrupt occurs, we record current time and
- * register status here in a fifo, then clear the status. So
- * that if link status changes suddenly at some time, we can
- * query them by debugfs.
- */
- mac_tnl_stats.time = local_clock();
- mac_tnl_stats.status = status;
- kfifo_put(&hdev->mac_tnl_log, mac_tnl_stats);
- ret = hclge_clear_mac_tnl_int(hdev);
- if (ret)
- dev_err(dev, "clear mac tnl int failed (%d)\n", ret);
- }
+ ret = hclge_handle_mac_tnl(hdev);
msi_error:
kfree(desc);
return hclge_handle_all_hw_msix_error(hdev, reset_requests);
}
-void hclge_handle_all_hns_hw_errors(struct hnae3_ae_dev *ae_dev)
+int hclge_handle_mac_tnl(struct hclge_dev *hdev)
{
-#define HCLGE_DESC_NO_DATA_LEN 8
+ struct hclge_mac_tnl_stats mac_tnl_stats;
+ struct device *dev = &hdev->pdev->dev;
+ struct hclge_desc desc;
+ u32 status;
+ int ret;
+
+ /* query and clear mac tnl interruptions */
+ hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_QUERY_MAC_TNL_INT, true);
+ ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+ if (ret) {
+ dev_err(dev, "failed to query mac tnl int, ret = %d.\n", ret);
+ return ret;
+ }
+ status = le32_to_cpu(desc.data[0]);
+ if (status) {
+ /* When mac tnl interrupt occurs, we record current time and
+ * register status here in a fifo, then clear the status. So
+ * that if link status changes suddenly at some time, we can
+ * query them by debugfs.
+ */
+ mac_tnl_stats.time = local_clock();
+ mac_tnl_stats.status = status;
+ kfifo_put(&hdev->mac_tnl_log, mac_tnl_stats);
+ ret = hclge_clear_mac_tnl_int(hdev);
+ if (ret)
+ dev_err(dev, "failed to clear mac tnl int, ret = %d.\n",
+ ret);
+ }
+
+ return ret;
+}
+
+void hclge_handle_all_hns_hw_errors(struct hnae3_ae_dev *ae_dev)
+{
struct hclge_dev *hdev = ae_dev->priv;
struct device *dev = &hdev->pdev->dev;
u32 mpf_bd_num, pf_bd_num, bd_num;
msi_error:
kfree(desc);
}
+
+bool hclge_find_error_source(struct hclge_dev *hdev)
+{
+ u32 msix_src_flag, hw_err_src_flag;
+
+ msix_src_flag = hclge_read_dev(&hdev->hw, HCLGE_MISC_VECTOR_INT_STS) &
+ HCLGE_VECTOR0_REG_MSIX_MASK;
+
+ hw_err_src_flag = hclge_read_dev(&hdev->hw,
+ HCLGE_RAS_PF_OTHER_INT_STS_REG) &
+ HCLGE_RAS_REG_ERR_MASK;
+
+ return msix_src_flag || hw_err_src_flag;
+}
+
+void hclge_handle_occurred_error(struct hclge_dev *hdev)
+{
+ struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev);
+
+ if (hclge_find_error_source(hdev))
+ hclge_handle_error_info_log(ae_dev);
+}
+
+static void
+hclge_handle_error_type_reg_log(struct device *dev,
+ struct hclge_mod_err_info *mod_info,
+ struct hclge_type_reg_err_info *type_reg_info)
+{
+#define HCLGE_ERR_TYPE_MASK 0x7F
+#define HCLGE_ERR_TYPE_IS_RAS_OFFSET 7
+
+ u8 mod_id, total_module, type_id, total_type, i, is_ras;
+ u8 index_module = MODULE_NONE;
+ u8 index_type = NONE_ERROR;
+
+ mod_id = mod_info->mod_id;
+ type_id = type_reg_info->type_id & HCLGE_ERR_TYPE_MASK;
+ is_ras = type_reg_info->type_id >> HCLGE_ERR_TYPE_IS_RAS_OFFSET;
+
+ total_module = ARRAY_SIZE(hclge_hw_module_id_st);
+ total_type = ARRAY_SIZE(hclge_hw_type_id_st);
+
+ for (i = 0; i < total_module; i++) {
+ if (mod_id == hclge_hw_module_id_st[i].module_id) {
+ index_module = i;
+ break;
+ }
+ }
+
+ for (i = 0; i < total_type; i++) {
+ if (type_id == hclge_hw_type_id_st[i].type_id) {
+ index_type = i;
+ break;
+ }
+ }
+
+ if (index_module != MODULE_NONE && index_type != NONE_ERROR)
+ dev_err(dev,
+ "found %s %s, is %s error.\n",
+ hclge_hw_module_id_st[index_module].msg,
+ hclge_hw_type_id_st[index_type].msg,
+ is_ras ? "ras" : "msix");
+ else
+ dev_err(dev,
+ "unknown module[%u] or type[%u].\n", mod_id, type_id);
+
+ dev_err(dev, "reg_value:\n");
+ for (i = 0; i < type_reg_info->reg_num; i++)
+ dev_err(dev, "0x%08x\n", type_reg_info->hclge_reg[i]);
+}
+
+static void hclge_handle_error_module_log(struct hnae3_ae_dev *ae_dev,
+ const u32 *buf, u32 buf_size)
+{
+ struct hclge_type_reg_err_info *type_reg_info;
+ struct hclge_dev *hdev = ae_dev->priv;
+ struct device *dev = &hdev->pdev->dev;
+ struct hclge_mod_err_info *mod_info;
+ struct hclge_sum_err_info *sum_info;
+ u8 mod_num, err_num, i;
+ u32 offset = 0;
+
+ sum_info = (struct hclge_sum_err_info *)&buf[offset++];
+ if (sum_info->reset_type &&
+ sum_info->reset_type != HNAE3_NONE_RESET)
+ set_bit(sum_info->reset_type, &ae_dev->hw_err_reset_req);
+ mod_num = sum_info->mod_num;
+
+ while (mod_num--) {
+ if (offset >= buf_size) {
+ dev_err(dev, "The offset(%u) exceeds buf's size(%u).\n",
+ offset, buf_size);
+ return;
+ }
+ mod_info = (struct hclge_mod_err_info *)&buf[offset++];
+ err_num = mod_info->err_num;
+
+ for (i = 0; i < err_num; i++) {
+ if (offset >= buf_size) {
+ dev_err(dev,
+ "The offset(%u) exceeds buf size(%u).\n",
+ offset, buf_size);
+ return;
+ }
+
+ type_reg_info = (struct hclge_type_reg_err_info *)
+ &buf[offset++];
+ hclge_handle_error_type_reg_log(dev, mod_info,
+ type_reg_info);
+
+ offset += type_reg_info->reg_num;
+ }
+ }
+}
+
+static int hclge_query_all_err_bd_num(struct hclge_dev *hdev, u32 *bd_num)
+{
+ struct device *dev = &hdev->pdev->dev;
+ struct hclge_desc desc_bd;
+ int ret;
+
+ hclge_cmd_setup_basic_desc(&desc_bd, HCLGE_QUERY_ALL_ERR_BD_NUM, true);
+ ret = hclge_cmd_send(&hdev->hw, &desc_bd, 1);
+ if (ret) {
+ dev_err(dev, "failed to query error bd_num, ret = %d.\n", ret);
+ return ret;
+ }
+
+ *bd_num = le32_to_cpu(desc_bd.data[0]);
+ if (!(*bd_num)) {
+ dev_err(dev, "The value of bd_num is 0!\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int hclge_query_all_err_info(struct hclge_dev *hdev,
+ struct hclge_desc *desc, u32 bd_num)
+{
+ struct device *dev = &hdev->pdev->dev;
+ int ret;
+
+ hclge_cmd_setup_basic_desc(desc, HCLGE_QUERY_ALL_ERR_INFO, true);
+ ret = hclge_cmd_send(&hdev->hw, desc, bd_num);
+ if (ret)
+ dev_err(dev, "failed to query error info, ret = %d.\n", ret);
+
+ return ret;
+}
+
+int hclge_handle_error_info_log(struct hnae3_ae_dev *ae_dev)
+{
+ u32 bd_num, desc_len, buf_len, buf_size, i;
+ struct hclge_dev *hdev = ae_dev->priv;
+ struct hclge_desc *desc;
+ __le32 *desc_data;
+ u32 *buf;
+ int ret;
+
+ ret = hclge_query_all_err_bd_num(hdev, &bd_num);
+ if (ret)
+ goto out;
+
+ desc_len = bd_num * sizeof(struct hclge_desc);
+ desc = kzalloc(desc_len, GFP_KERNEL);
+ if (!desc) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = hclge_query_all_err_info(hdev, desc, bd_num);
+ if (ret)
+ goto err_desc;
+
+ buf_len = bd_num * sizeof(struct hclge_desc) - HCLGE_DESC_NO_DATA_LEN;
+ buf_size = buf_len / sizeof(u32);
+
+ desc_data = kzalloc(buf_len, GFP_KERNEL);
+ if (!desc_data)
+ return -ENOMEM;
+
+ buf = kzalloc(buf_len, GFP_KERNEL);
+ if (!buf) {
+ ret = -ENOMEM;
+ goto err_buf_alloc;
+ }
+
+ memcpy(desc_data, &desc[0].data[0], buf_len);
+ for (i = 0; i < buf_size; i++)
+ buf[i] = le32_to_cpu(desc_data[i]);
+
+ hclge_handle_error_module_log(ae_dev, buf, buf_size);
+ kfree(buf);
+
+err_buf_alloc:
+ kfree(desc_data);
+err_desc:
+ kfree(desc);
+out:
+ return ret;
+}
#define HCLGE_RAS_PF_OTHER_INT_STS_REG 0x20B00
#define HCLGE_RAS_REG_NFE_MASK 0xFF00
#define HCLGE_RAS_REG_ROCEE_ERR_MASK 0x3000000
+#define HCLGE_RAS_REG_ERR_MASK \
+ (HCLGE_RAS_REG_NFE_MASK | HCLGE_RAS_REG_ROCEE_ERR_MASK)
#define HCLGE_VECTOR0_REG_MSIX_MASK 0x1FF00
#define HCLGE_ROCEE_OVF_ERR_INT_MASK 0x10000
#define HCLGE_ROCEE_OVF_ERR_TYPE_MASK 0x3F
+#define HCLGE_DESC_DATA_MAX 8
+#define HCLGE_REG_NUM_MAX 256
+#define HCLGE_DESC_NO_DATA_LEN 8
+
enum hclge_err_int_type {
HCLGE_ERR_INT_MSIX = 0,
HCLGE_ERR_INT_RAS_CE = 1,
HCLGE_ERR_INT_RAS_FE = 3,
};
+enum hclge_mod_name_list {
+ MODULE_NONE = 0,
+ MODULE_BIOS_COMMON = 1,
+ MODULE_GE = 2,
+ MODULE_IGU_EGU = 3,
+ MODULE_LGE = 4,
+ MODULE_NCSI = 5,
+ MODULE_PPP = 6,
+ MODULE_QCN = 7,
+ MODULE_RCB_RX = 8,
+ MODULE_RTC = 9,
+ MODULE_SSU = 10,
+ MODULE_TM = 11,
+ MODULE_RCB_TX = 12,
+ MODULE_TXDMA = 13,
+ MODULE_MASTER = 14,
+ /* add new MODULE NAME for NIC here in order */
+ MODULE_ROCEE_TOP = 40,
+ MODULE_ROCEE_TIMER = 41,
+ MODULE_ROCEE_MDB = 42,
+ MODULE_ROCEE_TSP = 43,
+ MODULE_ROCEE_TRP = 44,
+ MODULE_ROCEE_SCC = 45,
+ MODULE_ROCEE_CAEP = 46,
+ MODULE_ROCEE_GEN_AC = 47,
+ MODULE_ROCEE_QMM = 48,
+ MODULE_ROCEE_LSAN = 49,
+ /* add new MODULE NAME for RoCEE here in order */
+};
+
+enum hclge_err_type_list {
+ NONE_ERROR = 0,
+ FIFO_ERROR = 1,
+ MEMORY_ERROR = 2,
+ POISON_ERROR = 3,
+ MSIX_ECC_ERROR = 4,
+ TQP_INT_ECC_ERROR = 5,
+ PF_ABNORMAL_INT_ERROR = 6,
+ MPF_ABNORMAL_INT_ERROR = 7,
+ COMMON_ERROR = 8,
+ PORT_ERROR = 9,
+ ETS_ERROR = 10,
+ NCSI_ERROR = 11,
+ GLB_ERROR = 12,
+ /* add new ERROR TYPE for NIC here in order */
+ ROCEE_NORMAL_ERR = 40,
+ ROCEE_OVF_ERR = 41,
+ /* add new ERROR TYPE for ROCEE here in order */
+};
+
struct hclge_hw_blk {
u32 msk;
const char *name;
enum hnae3_reset_type reset_level;
};
+struct hclge_hw_module_id {
+ enum hclge_mod_name_list module_id;
+ const char *msg;
+};
+
+struct hclge_hw_type_id {
+ enum hclge_err_type_list type_id;
+ const char *msg;
+};
+
+struct hclge_sum_err_info {
+ u8 reset_type;
+ u8 mod_num;
+ u8 rsv[2];
+};
+
+struct hclge_mod_err_info {
+ u8 mod_id;
+ u8 err_num;
+ u8 rsv[2];
+};
+
+struct hclge_type_reg_err_info {
+ u8 type_id;
+ u8 reg_num;
+ u8 rsv[2];
+ u32 hclge_reg[HCLGE_REG_NUM_MAX];
+};
+
int hclge_config_mac_tnl_int(struct hclge_dev *hdev, bool en);
int hclge_config_nic_hw_error(struct hclge_dev *hdev, bool state);
int hclge_config_rocee_ras_interrupt(struct hclge_dev *hdev, bool en);
void hclge_handle_all_hns_hw_errors(struct hnae3_ae_dev *ae_dev);
+bool hclge_find_error_source(struct hclge_dev *hdev);
+void hclge_handle_occurred_error(struct hclge_dev *hdev);
pci_ers_result_t hclge_handle_hw_ras_error(struct hnae3_ae_dev *ae_dev);
int hclge_handle_hw_msix_error(struct hclge_dev *hdev,
unsigned long *reset_requests);
+int hclge_handle_error_info_log(struct hnae3_ae_dev *ae_dev);
+int hclge_handle_mac_tnl(struct hclge_dev *hdev);
#endif
static u32 hclge_check_event_cause(struct hclge_dev *hdev, u32 *clearval)
{
- u32 cmdq_src_reg, msix_src_reg;
+ u32 cmdq_src_reg, msix_src_reg, hw_err_src_reg;
/* fetch the events from their corresponding regs */
cmdq_src_reg = hclge_read_dev(&hdev->hw, HCLGE_VECTOR0_CMDQ_SRC_REG);
msix_src_reg = hclge_read_dev(&hdev->hw, HCLGE_MISC_VECTOR_INT_STS);
+ hw_err_src_reg = hclge_read_dev(&hdev->hw,
+ HCLGE_RAS_PF_OTHER_INT_STS_REG);
/* Assumption: If by any chance reset and mailbox events are reported
* together then we will only process reset event in this go and will
return HCLGE_VECTOR0_EVENT_RST;
}
- /* check for vector0 msix event source */
- if (msix_src_reg & HCLGE_VECTOR0_REG_MSIX_MASK) {
- *clearval = msix_src_reg;
+ /* check for vector0 msix event and hardware error event source */
+ if (msix_src_reg & HCLGE_VECTOR0_REG_MSIX_MASK ||
+ hw_err_src_reg & HCLGE_RAS_REG_ERR_MASK)
return HCLGE_VECTOR0_EVENT_ERR;
- }
/* check for vector0 mailbox(=CMDQ RX) event source */
if (BIT(HCLGE_VECTOR0_RX_CMDQ_INT_B) & cmdq_src_reg) {
/* print other vector0 event source */
dev_info(&hdev->pdev->dev,
- "CMDQ INT status:0x%x, other INT status:0x%x\n",
- cmdq_src_reg, msix_src_reg);
- *clearval = msix_src_reg;
+ "INT status: CMDQ(%#x) HW errors(%#x) other(%#x)\n",
+ cmdq_src_reg, hw_err_src_reg, msix_src_reg);
return HCLGE_VECTOR0_EVENT_OTHER;
}
hclge_clear_event_cause(hdev, event_cause, clearval);
- /* Enable interrupt if it is not cause by reset. And when
- * clearval equal to 0, it means interrupt status may be
- * cleared by hardware before driver reads status register.
- * For this case, vector0 interrupt also should be enabled.
- */
- if (!clearval ||
- event_cause == HCLGE_VECTOR0_EVENT_MBX) {
+ /* Enable interrupt if it is not caused by reset event or error event */
+ if (event_cause == HCLGE_VECTOR0_EVENT_MBX ||
+ event_cause == HCLGE_VECTOR0_EVENT_OTHER)
hclge_enable_vector(&hdev->misc_vector, true);
- }
return IRQ_HANDLED;
}
hdev->reset_type = HNAE3_NONE_RESET;
}
+static void hclge_handle_err_reset_request(struct hclge_dev *hdev)
+{
+ struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev);
+ enum hnae3_reset_type reset_type;
+
+ if (ae_dev->hw_err_reset_req) {
+ reset_type = hclge_get_reset_level(ae_dev,
+ &ae_dev->hw_err_reset_req);
+ hclge_set_def_reset_request(ae_dev, reset_type);
+ }
+
+ if (hdev->default_reset_request && ae_dev->ops->reset_event)
+ ae_dev->ops->reset_event(hdev->pdev, NULL);
+
+ /* enable interrupt after error handling complete */
+ hclge_enable_vector(&hdev->misc_vector, true);
+}
+
+static void hclge_handle_err_recovery(struct hclge_dev *hdev)
+{
+ struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev);
+
+ ae_dev->hw_err_reset_req = 0;
+
+ if (hclge_find_error_source(hdev)) {
+ hclge_handle_error_info_log(ae_dev);
+ hclge_handle_mac_tnl(hdev);
+ }
+
+ hclge_handle_err_reset_request(hdev);
+}
+
static void hclge_misc_err_recovery(struct hclge_dev *hdev)
{
struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev);
u32 msix_sts_reg;
msix_sts_reg = hclge_read_dev(&hdev->hw, HCLGE_MISC_VECTOR_INT_STS);
-
if (msix_sts_reg & HCLGE_VECTOR0_REG_MSIX_MASK) {
- if (hclge_handle_hw_msix_error(hdev,
- &hdev->default_reset_request))
+ if (hclge_handle_hw_msix_error
+ (hdev, &hdev->default_reset_request))
dev_info(dev, "received msix interrupt 0x%x\n",
msix_sts_reg);
-
- if (hdev->default_reset_request)
- if (ae_dev->ops->reset_event)
- ae_dev->ops->reset_event(hdev->pdev, NULL);
}
- hclge_enable_vector(&hdev->misc_vector, true);
+ hclge_handle_hw_ras_error(ae_dev);
+
+ hclge_handle_err_reset_request(hdev);
}
static void hclge_errhand_service_task(struct hclge_dev *hdev)
if (!test_and_clear_bit(HCLGE_STATE_ERR_SERVICE_SCHED, &hdev->state))
return;
- hclge_misc_err_recovery(hdev);
+ if (hnae3_dev_ras_imp_supported(hdev))
+ hclge_handle_err_recovery(hdev);
+ else
+ hclge_misc_err_recovery(hdev);
}
static void hclge_reset_service_task(struct hclge_dev *hdev)
hclge_clear_resetting_state(hdev);
/* Log and clear the hw errors those already occurred */
- hclge_handle_all_hns_hw_errors(ae_dev);
+ if (hnae3_dev_ras_imp_supported(hdev))
+ hclge_handle_occurred_error(hdev);
+ else
+ hclge_handle_all_hns_hw_errors(ae_dev);
/* request delayed reset for the error recovery because an immediate
* global reset on a PF affecting pending initialization of other PFs
}
/* Log and clear the hw errors those already occurred */
- hclge_handle_all_hns_hw_errors(ae_dev);
+ if (hnae3_dev_ras_imp_supported(hdev))
+ hclge_handle_occurred_error(hdev);
+ else
+ hclge_handle_all_hns_hw_errors(ae_dev);
/* Re-enable the hw error interrupts because
* the interrupts get disabled on global reset.