2 * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 #include <linux/module.h>
34 #include <linux/etherdevice.h>
35 #include <linux/mlx5/driver.h>
37 #include "mlx5_core.h"
39 #include "fpga/core.h"
40 #include "fpga/conn.h"
42 static const char *const mlx5_fpga_error_strings[] = {
46 "Internal Link Error",
47 "Watchdog HW Failure",
50 "Temperature Critical",
53 static const char * const mlx5_fpga_qp_error_strings[] = {
55 "Retry Counter Expired",
58 static struct mlx5_fpga_device *mlx5_fpga_device_alloc(void)
60 struct mlx5_fpga_device *fdev = NULL;
62 fdev = kzalloc(sizeof(*fdev), GFP_KERNEL);
66 spin_lock_init(&fdev->state_lock);
67 fdev->state = MLX5_FPGA_STATUS_NONE;
71 static const char *mlx5_fpga_image_name(enum mlx5_fpga_image image)
74 case MLX5_FPGA_IMAGE_USER:
76 case MLX5_FPGA_IMAGE_FACTORY:
83 static const char *mlx5_fpga_device_name(u32 device)
86 case MLX5_FPGA_DEVICE_KU040:
88 case MLX5_FPGA_DEVICE_KU060:
90 case MLX5_FPGA_DEVICE_KU060_2:
92 case MLX5_FPGA_DEVICE_UNKNOWN:
98 static int mlx5_fpga_device_load_check(struct mlx5_fpga_device *fdev)
100 struct mlx5_fpga_query query;
103 err = mlx5_fpga_query(fdev->mdev, &query);
105 mlx5_fpga_err(fdev, "Failed to query status: %d\n", err);
109 fdev->last_admin_image = query.admin_image;
110 fdev->last_oper_image = query.oper_image;
112 mlx5_fpga_dbg(fdev, "Status %u; Admin image %u; Oper image %u\n",
113 query.status, query.admin_image, query.oper_image);
115 if (query.status != MLX5_FPGA_STATUS_SUCCESS) {
116 mlx5_fpga_err(fdev, "%s image failed to load; status %u\n",
117 mlx5_fpga_image_name(fdev->last_oper_image),
125 static int mlx5_fpga_device_brb(struct mlx5_fpga_device *fdev)
128 struct mlx5_core_dev *mdev = fdev->mdev;
130 err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_ON);
132 mlx5_fpga_err(fdev, "Failed to set bypass on: %d\n", err);
135 err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_RESET_SANDBOX);
137 mlx5_fpga_err(fdev, "Failed to reset SBU: %d\n", err);
140 err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_OFF);
142 mlx5_fpga_err(fdev, "Failed to set bypass off: %d\n", err);
148 int mlx5_fpga_device_start(struct mlx5_core_dev *mdev)
150 struct mlx5_fpga_device *fdev = mdev->fpga;
151 unsigned int max_num_qps;
159 err = mlx5_fpga_device_load_check(fdev);
163 err = mlx5_fpga_caps(fdev->mdev);
167 fpga_device_id = MLX5_CAP_FPGA(fdev->mdev, fpga_device);
168 mlx5_fpga_info(fdev, "%s:%u; %s image, version %u; SBU %06x:%04x version %d\n",
169 mlx5_fpga_device_name(fpga_device_id),
171 mlx5_fpga_image_name(fdev->last_oper_image),
172 MLX5_CAP_FPGA(fdev->mdev, image_version),
173 MLX5_CAP_FPGA(fdev->mdev, ieee_vendor_id),
174 MLX5_CAP_FPGA(fdev->mdev, sandbox_product_id),
175 MLX5_CAP_FPGA(fdev->mdev, sandbox_product_version));
177 max_num_qps = MLX5_CAP_FPGA(mdev, shell_caps.max_num_qps);
179 mlx5_fpga_err(fdev, "FPGA reports 0 QPs in SHELL_CAPS\n");
184 err = mlx5_core_reserve_gids(mdev, max_num_qps);
188 err = mlx5_fpga_conn_device_init(fdev);
192 if (fdev->last_oper_image == MLX5_FPGA_IMAGE_USER) {
193 err = mlx5_fpga_device_brb(fdev);
201 mlx5_fpga_conn_device_cleanup(fdev);
204 mlx5_core_unreserve_gids(mdev, max_num_qps);
206 spin_lock_irqsave(&fdev->state_lock, flags);
207 fdev->state = err ? MLX5_FPGA_STATUS_FAILURE : MLX5_FPGA_STATUS_SUCCESS;
208 spin_unlock_irqrestore(&fdev->state_lock, flags);
212 int mlx5_fpga_init(struct mlx5_core_dev *mdev)
214 struct mlx5_fpga_device *fdev = NULL;
216 if (!MLX5_CAP_GEN(mdev, fpga)) {
217 mlx5_core_dbg(mdev, "FPGA capability not present\n");
221 mlx5_core_dbg(mdev, "Initializing FPGA\n");
223 fdev = mlx5_fpga_device_alloc();
233 void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev)
235 struct mlx5_fpga_device *fdev = mdev->fpga;
236 unsigned int max_num_qps;
243 spin_lock_irqsave(&fdev->state_lock, flags);
244 if (fdev->state != MLX5_FPGA_STATUS_SUCCESS) {
245 spin_unlock_irqrestore(&fdev->state_lock, flags);
248 fdev->state = MLX5_FPGA_STATUS_NONE;
249 spin_unlock_irqrestore(&fdev->state_lock, flags);
251 if (fdev->last_oper_image == MLX5_FPGA_IMAGE_USER) {
252 err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_ON);
254 mlx5_fpga_err(fdev, "Failed to re-set SBU bypass on: %d\n",
258 mlx5_fpga_conn_device_cleanup(fdev);
259 max_num_qps = MLX5_CAP_FPGA(mdev, shell_caps.max_num_qps);
260 mlx5_core_unreserve_gids(mdev, max_num_qps);
263 void mlx5_fpga_cleanup(struct mlx5_core_dev *mdev)
265 struct mlx5_fpga_device *fdev = mdev->fpga;
267 mlx5_fpga_device_stop(mdev);
272 static const char *mlx5_fpga_syndrome_to_string(u8 syndrome)
274 if (syndrome < ARRAY_SIZE(mlx5_fpga_error_strings))
275 return mlx5_fpga_error_strings[syndrome];
279 static const char *mlx5_fpga_qp_syndrome_to_string(u8 syndrome)
281 if (syndrome < ARRAY_SIZE(mlx5_fpga_qp_error_strings))
282 return mlx5_fpga_qp_error_strings[syndrome];
286 void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event, void *data)
288 struct mlx5_fpga_device *fdev = mdev->fpga;
289 const char *event_name;
290 bool teardown = false;
296 case MLX5_EVENT_TYPE_FPGA_ERROR:
297 syndrome = MLX5_GET(fpga_error_event, data, syndrome);
298 event_name = mlx5_fpga_syndrome_to_string(syndrome);
300 case MLX5_EVENT_TYPE_FPGA_QP_ERROR:
301 syndrome = MLX5_GET(fpga_qp_error_event, data, syndrome);
302 event_name = mlx5_fpga_qp_syndrome_to_string(syndrome);
303 fpga_qpn = MLX5_GET(fpga_qp_error_event, data, fpga_qpn);
306 mlx5_fpga_warn_ratelimited(fdev, "Unexpected event %u\n",
311 spin_lock_irqsave(&fdev->state_lock, flags);
312 switch (fdev->state) {
313 case MLX5_FPGA_STATUS_SUCCESS:
314 mlx5_fpga_warn(fdev, "Error %u: %s\n", syndrome, event_name);
318 mlx5_fpga_warn_ratelimited(fdev, "Unexpected error event %u: %s\n",
319 syndrome, event_name);
321 spin_unlock_irqrestore(&fdev->state_lock, flags);
322 /* We tear-down the card's interfaces and functionality because
323 * the FPGA bump-on-the-wire is misbehaving and we lose ability
324 * to communicate with the network. User may still be able to
325 * recover by re-programming or debugging the FPGA
328 mlx5_trigger_health_work(fdev->mdev);