1 // SPDX-License-Identifier: GPL-2.0-only
3 * Intel Keem Bay OCS HCU Crypto Driver.
5 * Copyright (C) 2018-2020 Intel Corporation
8 #include <linux/delay.h>
9 #include <linux/device.h>
10 #include <linux/iopoll.h>
11 #include <linux/irq.h>
12 #include <linux/module.h>
14 #include <crypto/sha2.h>
19 #define OCS_HCU_MODE 0x00
20 #define OCS_HCU_CHAIN 0x04
21 #define OCS_HCU_OPERATION 0x08
22 #define OCS_HCU_KEY_0 0x0C
23 #define OCS_HCU_ISR 0x50
24 #define OCS_HCU_IER 0x54
25 #define OCS_HCU_STATUS 0x58
26 #define OCS_HCU_MSG_LEN_LO 0x60
27 #define OCS_HCU_MSG_LEN_HI 0x64
28 #define OCS_HCU_KEY_BYTE_ORDER_CFG 0x80
29 #define OCS_HCU_DMA_SRC_ADDR 0x400
30 #define OCS_HCU_DMA_SRC_SIZE 0x408
31 #define OCS_HCU_DMA_DST_SIZE 0x40C
32 #define OCS_HCU_DMA_DMA_MODE 0x410
33 #define OCS_HCU_DMA_NEXT_SRC_DESCR 0x418
34 #define OCS_HCU_DMA_MSI_ISR 0x480
35 #define OCS_HCU_DMA_MSI_IER 0x484
36 #define OCS_HCU_DMA_MSI_MASK 0x488
38 /* Register bit definitions. */
39 #define HCU_MODE_ALGO_SHIFT 16
40 #define HCU_MODE_HMAC_SHIFT 22
42 #define HCU_STATUS_BUSY BIT(0)
44 #define HCU_BYTE_ORDER_SWAP BIT(0)
46 #define HCU_IRQ_HASH_DONE BIT(2)
47 #define HCU_IRQ_HASH_ERR_MASK (BIT(3) | BIT(1) | BIT(0))
49 #define HCU_DMA_IRQ_SRC_DONE BIT(0)
50 #define HCU_DMA_IRQ_SAI_ERR BIT(2)
51 #define HCU_DMA_IRQ_BAD_COMP_ERR BIT(3)
52 #define HCU_DMA_IRQ_INBUF_RD_ERR BIT(4)
53 #define HCU_DMA_IRQ_INBUF_WD_ERR BIT(5)
54 #define HCU_DMA_IRQ_OUTBUF_WR_ERR BIT(6)
55 #define HCU_DMA_IRQ_OUTBUF_RD_ERR BIT(7)
56 #define HCU_DMA_IRQ_CRD_ERR BIT(8)
57 #define HCU_DMA_IRQ_ERR_MASK (HCU_DMA_IRQ_SAI_ERR | \
58 HCU_DMA_IRQ_BAD_COMP_ERR | \
59 HCU_DMA_IRQ_INBUF_RD_ERR | \
60 HCU_DMA_IRQ_INBUF_WD_ERR | \
61 HCU_DMA_IRQ_OUTBUF_WR_ERR | \
62 HCU_DMA_IRQ_OUTBUF_RD_ERR | \
65 #define HCU_DMA_SNOOP_MASK (0x7 << 28)
66 #define HCU_DMA_SRC_LL_EN BIT(25)
67 #define HCU_DMA_EN BIT(31)
69 #define OCS_HCU_ENDIANNESS_VALUE 0x2A
71 #define HCU_DMA_MSI_UNMASK BIT(0)
72 #define HCU_DMA_MSI_DISABLE 0
73 #define HCU_IRQ_DISABLE 0
75 #define OCS_HCU_START BIT(0)
76 #define OCS_HCU_TERMINATE BIT(1)
78 #define OCS_LL_DMA_FLAG_TERMINATE BIT(31)
80 #define OCS_HCU_HW_KEY_LEN_U32 (OCS_HCU_HW_KEY_LEN / sizeof(u32))
82 #define HCU_DATA_WRITE_ENDIANNESS_OFFSET 26
84 #define OCS_HCU_NUM_CHAINS_SHA256_224_SM3 (SHA256_DIGEST_SIZE / sizeof(u32))
85 #define OCS_HCU_NUM_CHAINS_SHA384_512 (SHA512_DIGEST_SIZE / sizeof(u32))
88 * While polling on a busy HCU, wait maximum 200us between one check and the
91 #define OCS_HCU_WAIT_BUSY_RETRY_DELAY_US 200
92 /* Wait on a busy HCU for maximum 1 second. */
93 #define OCS_HCU_WAIT_BUSY_TIMEOUT_US 1000000
96 * struct ocs_hcu_dma_list - An entry in an OCS DMA linked list.
97 * @src_addr: Source address of the data.
98 * @src_len: Length of data to be fetched.
99 * @nxt_desc: Next descriptor to fetch.
100 * @ll_flags: Flags (Freeze @ terminate) for the DMA engine.
102 struct ocs_hcu_dma_entry {
110 * struct ocs_dma_list - OCS-specific DMA linked list.
111 * @head: The head of the list (points to the array backing the list).
112 * @tail: The current tail of the list; NULL if the list is empty.
113 * @dma_addr: The DMA address of @head (i.e., the DMA address of the backing
115 * @max_nents: Maximum number of entries in the list (i.e., number of elements
116 * in the backing array).
118 * The OCS DMA list is an array-backed list of OCS DMA descriptors. The array
119 * backing the list is allocated with dma_alloc_coherent() and pointed by
122 struct ocs_hcu_dma_list {
123 struct ocs_hcu_dma_entry *head;
124 struct ocs_hcu_dma_entry *tail;
129 static inline u32 ocs_hcu_num_chains(enum ocs_hcu_algo algo)
132 case OCS_HCU_ALGO_SHA224:
133 case OCS_HCU_ALGO_SHA256:
134 case OCS_HCU_ALGO_SM3:
135 return OCS_HCU_NUM_CHAINS_SHA256_224_SM3;
136 case OCS_HCU_ALGO_SHA384:
137 case OCS_HCU_ALGO_SHA512:
138 return OCS_HCU_NUM_CHAINS_SHA384_512;
144 static inline u32 ocs_hcu_digest_size(enum ocs_hcu_algo algo)
147 case OCS_HCU_ALGO_SHA224:
148 return SHA224_DIGEST_SIZE;
149 case OCS_HCU_ALGO_SHA256:
150 case OCS_HCU_ALGO_SM3:
151 /* SM3 shares the same block size. */
152 return SHA256_DIGEST_SIZE;
153 case OCS_HCU_ALGO_SHA384:
154 return SHA384_DIGEST_SIZE;
155 case OCS_HCU_ALGO_SHA512:
156 return SHA512_DIGEST_SIZE;
163 * ocs_hcu_wait_busy() - Wait for HCU OCS hardware to became usable.
164 * @hcu_dev: OCS HCU device to wait for.
166 * Return: 0 if device free, -ETIMEOUT if device busy and internal timeout has
169 static int ocs_hcu_wait_busy(struct ocs_hcu_dev *hcu_dev)
173 return readl_poll_timeout(hcu_dev->io_base + OCS_HCU_STATUS, val,
174 !(val & HCU_STATUS_BUSY),
175 OCS_HCU_WAIT_BUSY_RETRY_DELAY_US,
176 OCS_HCU_WAIT_BUSY_TIMEOUT_US);
179 static void ocs_hcu_done_irq_en(struct ocs_hcu_dev *hcu_dev)
181 /* Clear any pending interrupts. */
182 writel(0xFFFFFFFF, hcu_dev->io_base + OCS_HCU_ISR);
183 hcu_dev->irq_err = false;
184 /* Enable error and HCU done interrupts. */
185 writel(HCU_IRQ_HASH_DONE | HCU_IRQ_HASH_ERR_MASK,
186 hcu_dev->io_base + OCS_HCU_IER);
189 static void ocs_hcu_dma_irq_en(struct ocs_hcu_dev *hcu_dev)
191 /* Clear any pending interrupts. */
192 writel(0xFFFFFFFF, hcu_dev->io_base + OCS_HCU_DMA_MSI_ISR);
193 hcu_dev->irq_err = false;
194 /* Only operating on DMA source completion and error interrupts. */
195 writel(HCU_DMA_IRQ_ERR_MASK | HCU_DMA_IRQ_SRC_DONE,
196 hcu_dev->io_base + OCS_HCU_DMA_MSI_IER);
198 writel(HCU_DMA_MSI_UNMASK, hcu_dev->io_base + OCS_HCU_DMA_MSI_MASK);
201 static void ocs_hcu_irq_dis(struct ocs_hcu_dev *hcu_dev)
203 writel(HCU_IRQ_DISABLE, hcu_dev->io_base + OCS_HCU_IER);
204 writel(HCU_DMA_MSI_DISABLE, hcu_dev->io_base + OCS_HCU_DMA_MSI_IER);
207 static int ocs_hcu_wait_and_disable_irq(struct ocs_hcu_dev *hcu_dev)
211 rc = wait_for_completion_interruptible(&hcu_dev->irq_done);
215 if (hcu_dev->irq_err) {
216 /* Unset flag and return error. */
217 hcu_dev->irq_err = false;
223 ocs_hcu_irq_dis(hcu_dev);
229 * ocs_hcu_get_intermediate_data() - Get intermediate data.
230 * @hcu_dev: The target HCU device.
231 * @data: Where to store the intermediate.
232 * @algo: The algorithm being used.
234 * This function is used to save the current hashing process state in order to
235 * continue it in the future.
237 * Note: once all data has been processed, the intermediate data actually
238 * contains the hashing result. So this function is also used to retrieve the
239 * final result of a hashing process.
241 * Return: 0 on success, negative error code otherwise.
243 static int ocs_hcu_get_intermediate_data(struct ocs_hcu_dev *hcu_dev,
244 struct ocs_hcu_idata *data,
245 enum ocs_hcu_algo algo)
247 const int n = ocs_hcu_num_chains(algo);
252 /* Data not requested. */
256 chain = (u32 *)data->digest;
258 /* Ensure that the OCS is no longer busy before reading the chains. */
259 rc = ocs_hcu_wait_busy(hcu_dev);
264 * This loops is safe because data->digest is an array of
265 * SHA512_DIGEST_SIZE bytes and the maximum value returned by
266 * ocs_hcu_num_chains() is OCS_HCU_NUM_CHAINS_SHA384_512 which is equal
267 * to SHA512_DIGEST_SIZE / sizeof(u32).
269 for (i = 0; i < n; i++)
270 chain[i] = readl(hcu_dev->io_base + OCS_HCU_CHAIN);
272 data->msg_len_lo = readl(hcu_dev->io_base + OCS_HCU_MSG_LEN_LO);
273 data->msg_len_hi = readl(hcu_dev->io_base + OCS_HCU_MSG_LEN_HI);
279 * ocs_hcu_set_intermediate_data() - Set intermediate data.
280 * @hcu_dev: The target HCU device.
281 * @data: The intermediate data to be set.
282 * @algo: The algorithm being used.
284 * This function is used to continue a previous hashing process.
286 static void ocs_hcu_set_intermediate_data(struct ocs_hcu_dev *hcu_dev,
287 const struct ocs_hcu_idata *data,
288 enum ocs_hcu_algo algo)
290 const int n = ocs_hcu_num_chains(algo);
291 u32 *chain = (u32 *)data->digest;
295 * This loops is safe because data->digest is an array of
296 * SHA512_DIGEST_SIZE bytes and the maximum value returned by
297 * ocs_hcu_num_chains() is OCS_HCU_NUM_CHAINS_SHA384_512 which is equal
298 * to SHA512_DIGEST_SIZE / sizeof(u32).
300 for (i = 0; i < n; i++)
301 writel(chain[i], hcu_dev->io_base + OCS_HCU_CHAIN);
303 writel(data->msg_len_lo, hcu_dev->io_base + OCS_HCU_MSG_LEN_LO);
304 writel(data->msg_len_hi, hcu_dev->io_base + OCS_HCU_MSG_LEN_HI);
307 static int ocs_hcu_get_digest(struct ocs_hcu_dev *hcu_dev,
308 enum ocs_hcu_algo algo, u8 *dgst, size_t dgst_len)
317 /* Length of the output buffer must match the algo digest size. */
318 if (dgst_len != ocs_hcu_digest_size(algo))
321 /* Ensure that the OCS is no longer busy before reading the chains. */
322 rc = ocs_hcu_wait_busy(hcu_dev);
327 for (i = 0; i < dgst_len / sizeof(u32); i++)
328 chain[i] = readl(hcu_dev->io_base + OCS_HCU_CHAIN);
334 * ocs_hcu_hw_cfg() - Configure the HCU hardware.
335 * @hcu_dev: The HCU device to configure.
336 * @algo: The algorithm to be used by the HCU device.
337 * @use_hmac: Whether or not HW HMAC should be used.
339 * Return: 0 on success, negative error code otherwise.
341 static int ocs_hcu_hw_cfg(struct ocs_hcu_dev *hcu_dev, enum ocs_hcu_algo algo,
347 if (algo != OCS_HCU_ALGO_SHA256 && algo != OCS_HCU_ALGO_SHA224 &&
348 algo != OCS_HCU_ALGO_SHA384 && algo != OCS_HCU_ALGO_SHA512 &&
349 algo != OCS_HCU_ALGO_SM3)
352 rc = ocs_hcu_wait_busy(hcu_dev);
356 /* Ensure interrupts are disabled. */
357 ocs_hcu_irq_dis(hcu_dev);
359 /* Configure endianness, hashing algorithm and HW HMAC (if needed) */
360 cfg = OCS_HCU_ENDIANNESS_VALUE << HCU_DATA_WRITE_ENDIANNESS_OFFSET;
361 cfg |= algo << HCU_MODE_ALGO_SHIFT;
363 cfg |= BIT(HCU_MODE_HMAC_SHIFT);
365 writel(cfg, hcu_dev->io_base + OCS_HCU_MODE);
371 * ocs_hcu_ll_dma_start() - Start OCS HCU hashing via DMA
372 * @hcu_dev: The OCS HCU device to use.
373 * @dma_list: The OCS DMA list mapping the data to hash.
374 * @finalize: Whether or not this is the last hashing operation and therefore
375 * the final hash should be compute even if data is not
378 * Return: 0 on success, negative error code otherwise.
380 static int ocs_hcu_ll_dma_start(struct ocs_hcu_dev *hcu_dev,
381 const struct ocs_hcu_dma_list *dma_list,
384 u32 cfg = HCU_DMA_SNOOP_MASK | HCU_DMA_SRC_LL_EN | HCU_DMA_EN;
391 * For final requests we use HCU_DONE IRQ to be notified when all input
392 * data has been processed by the HCU; however, we cannot do so for
393 * non-final requests, because we don't get a HCU_DONE IRQ when we
394 * don't terminate the operation.
396 * Therefore, for non-final requests, we use the DMA IRQ, which
397 * triggers when DMA has finishing feeding all the input data to the
398 * HCU, but the HCU may still be processing it. This is fine, since we
399 * will wait for the HCU processing to be completed when we try to read
400 * intermediate results, in ocs_hcu_get_intermediate_data().
403 ocs_hcu_done_irq_en(hcu_dev);
405 ocs_hcu_dma_irq_en(hcu_dev);
407 reinit_completion(&hcu_dev->irq_done);
408 writel(dma_list->dma_addr, hcu_dev->io_base + OCS_HCU_DMA_NEXT_SRC_DESCR);
409 writel(0, hcu_dev->io_base + OCS_HCU_DMA_SRC_SIZE);
410 writel(0, hcu_dev->io_base + OCS_HCU_DMA_DST_SIZE);
412 writel(OCS_HCU_START, hcu_dev->io_base + OCS_HCU_OPERATION);
414 writel(cfg, hcu_dev->io_base + OCS_HCU_DMA_DMA_MODE);
417 writel(OCS_HCU_TERMINATE, hcu_dev->io_base + OCS_HCU_OPERATION);
419 rc = ocs_hcu_wait_and_disable_irq(hcu_dev);
426 struct ocs_hcu_dma_list *ocs_hcu_dma_list_alloc(struct ocs_hcu_dev *hcu_dev,
429 struct ocs_hcu_dma_list *dma_list;
431 dma_list = kmalloc(sizeof(*dma_list), GFP_KERNEL);
435 /* Total size of the DMA list to allocate. */
436 dma_list->head = dma_alloc_coherent(hcu_dev->dev,
437 sizeof(*dma_list->head) * max_nents,
438 &dma_list->dma_addr, GFP_KERNEL);
439 if (!dma_list->head) {
443 dma_list->max_nents = max_nents;
444 dma_list->tail = NULL;
449 void ocs_hcu_dma_list_free(struct ocs_hcu_dev *hcu_dev,
450 struct ocs_hcu_dma_list *dma_list)
455 dma_free_coherent(hcu_dev->dev,
456 sizeof(*dma_list->head) * dma_list->max_nents,
457 dma_list->head, dma_list->dma_addr);
462 /* Add a new DMA entry at the end of the OCS DMA list. */
463 int ocs_hcu_dma_list_add_tail(struct ocs_hcu_dev *hcu_dev,
464 struct ocs_hcu_dma_list *dma_list,
465 dma_addr_t addr, u32 len)
467 struct device *dev = hcu_dev->dev;
468 struct ocs_hcu_dma_entry *old_tail;
469 struct ocs_hcu_dma_entry *new_tail;
477 if (addr & ~OCS_HCU_DMA_BIT_MASK) {
479 "Unexpected error: Invalid DMA address for OCS HCU\n");
483 old_tail = dma_list->tail;
484 new_tail = old_tail ? old_tail + 1 : dma_list->head;
486 /* Check if list is full. */
487 if (new_tail - dma_list->head >= dma_list->max_nents)
491 * If there was an old tail (i.e., this is not the first element we are
492 * adding), un-terminate the old tail and make it point to the new one.
495 old_tail->ll_flags &= ~OCS_LL_DMA_FLAG_TERMINATE;
497 * The old tail 'nxt_desc' must point to the DMA address of the
500 old_tail->nxt_desc = dma_list->dma_addr +
501 sizeof(*dma_list->tail) * (new_tail -
505 new_tail->src_addr = (u32)addr;
506 new_tail->src_len = (u32)len;
507 new_tail->ll_flags = OCS_LL_DMA_FLAG_TERMINATE;
508 new_tail->nxt_desc = 0;
510 /* Update list tail with new tail. */
511 dma_list->tail = new_tail;
517 * ocs_hcu_hash_init() - Initialize hash operation context.
518 * @ctx: The context to initialize.
519 * @algo: The hashing algorithm to use.
521 * Return: 0 on success, negative error code otherwise.
523 int ocs_hcu_hash_init(struct ocs_hcu_hash_ctx *ctx, enum ocs_hcu_algo algo)
529 ctx->idata.msg_len_lo = 0;
530 ctx->idata.msg_len_hi = 0;
531 /* No need to set idata.digest to 0. */
537 * ocs_hcu_digest() - Perform a hashing iteration.
538 * @hcu_dev: The OCS HCU device to use.
539 * @ctx: The OCS HCU hashing context.
540 * @dma_list: The OCS DMA list mapping the input data to process.
542 * Return: 0 on success; negative error code otherwise.
544 int ocs_hcu_hash_update(struct ocs_hcu_dev *hcu_dev,
545 struct ocs_hcu_hash_ctx *ctx,
546 const struct ocs_hcu_dma_list *dma_list)
550 if (!hcu_dev || !ctx)
553 /* Configure the hardware for the current request. */
554 rc = ocs_hcu_hw_cfg(hcu_dev, ctx->algo, false);
558 /* If we already processed some data, idata needs to be set. */
559 if (ctx->idata.msg_len_lo || ctx->idata.msg_len_hi)
560 ocs_hcu_set_intermediate_data(hcu_dev, &ctx->idata, ctx->algo);
562 /* Start linked-list DMA hashing. */
563 rc = ocs_hcu_ll_dma_start(hcu_dev, dma_list, false);
567 /* Update idata and return. */
568 return ocs_hcu_get_intermediate_data(hcu_dev, &ctx->idata, ctx->algo);
572 * ocs_hcu_hash_final() - Update and finalize hash computation.
573 * @hcu_dev: The OCS HCU device to use.
574 * @ctx: The OCS HCU hashing context.
575 * @dma_list: The OCS DMA list mapping the input data to process.
576 * @dgst: The buffer where to save the computed digest.
577 * @dgst_len: The length of @dgst.
579 * Return: 0 on success; negative error code otherwise.
581 int ocs_hcu_hash_finup(struct ocs_hcu_dev *hcu_dev,
582 const struct ocs_hcu_hash_ctx *ctx,
583 const struct ocs_hcu_dma_list *dma_list,
584 u8 *dgst, size_t dgst_len)
588 if (!hcu_dev || !ctx)
591 /* Configure the hardware for the current request. */
592 rc = ocs_hcu_hw_cfg(hcu_dev, ctx->algo, false);
596 /* If we already processed some data, idata needs to be set. */
597 if (ctx->idata.msg_len_lo || ctx->idata.msg_len_hi)
598 ocs_hcu_set_intermediate_data(hcu_dev, &ctx->idata, ctx->algo);
600 /* Start linked-list DMA hashing. */
601 rc = ocs_hcu_ll_dma_start(hcu_dev, dma_list, true);
605 /* Get digest and return. */
606 return ocs_hcu_get_digest(hcu_dev, ctx->algo, dgst, dgst_len);
610 * ocs_hcu_hash_final() - Finalize hash computation.
611 * @hcu_dev: The OCS HCU device to use.
612 * @ctx: The OCS HCU hashing context.
613 * @dgst: The buffer where to save the computed digest.
614 * @dgst_len: The length of @dgst.
616 * Return: 0 on success; negative error code otherwise.
618 int ocs_hcu_hash_final(struct ocs_hcu_dev *hcu_dev,
619 const struct ocs_hcu_hash_ctx *ctx, u8 *dgst,
624 if (!hcu_dev || !ctx)
627 /* Configure the hardware for the current request. */
628 rc = ocs_hcu_hw_cfg(hcu_dev, ctx->algo, false);
632 /* If we already processed some data, idata needs to be set. */
633 if (ctx->idata.msg_len_lo || ctx->idata.msg_len_hi)
634 ocs_hcu_set_intermediate_data(hcu_dev, &ctx->idata, ctx->algo);
637 * Enable HCU interrupts, so that HCU_DONE will be triggered once the
638 * final hash is computed.
640 ocs_hcu_done_irq_en(hcu_dev);
641 reinit_completion(&hcu_dev->irq_done);
642 writel(OCS_HCU_TERMINATE, hcu_dev->io_base + OCS_HCU_OPERATION);
644 rc = ocs_hcu_wait_and_disable_irq(hcu_dev);
648 /* Get digest and return. */
649 return ocs_hcu_get_digest(hcu_dev, ctx->algo, dgst, dgst_len);
652 irqreturn_t ocs_hcu_irq_handler(int irq, void *dev_id)
654 struct ocs_hcu_dev *hcu_dev = dev_id;
658 /* Read and clear the HCU interrupt. */
659 hcu_irq = readl(hcu_dev->io_base + OCS_HCU_ISR);
660 writel(hcu_irq, hcu_dev->io_base + OCS_HCU_ISR);
662 /* Read and clear the HCU DMA interrupt. */
663 dma_irq = readl(hcu_dev->io_base + OCS_HCU_DMA_MSI_ISR);
664 writel(dma_irq, hcu_dev->io_base + OCS_HCU_DMA_MSI_ISR);
666 /* Check for errors. */
667 if (hcu_irq & HCU_IRQ_HASH_ERR_MASK || dma_irq & HCU_DMA_IRQ_ERR_MASK) {
668 hcu_dev->irq_err = true;
672 /* Check for DONE IRQs. */
673 if (hcu_irq & HCU_IRQ_HASH_DONE || dma_irq & HCU_DMA_IRQ_SRC_DONE)
679 complete(&hcu_dev->irq_done);
684 MODULE_LICENSE("GPL");