/* * AMD Cryptographic Coprocessor (CCP) driver * * Copyright (C) 2013,2016 Advanced Micro Devices, Inc. * * Author: Tom Lendacky * Author: Gary R Hook * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ #include #include #include #include #include #include #include "ccp-dev.h" static int ccp_do_cmd(struct ccp_op *op, u32 *cr, unsigned int cr_count) { struct ccp_cmd_queue *cmd_q = op->cmd_q; struct ccp_device *ccp = cmd_q->ccp; void __iomem *cr_addr; u32 cr0, cmd; unsigned int i; int ret = 0; /* We could read a status register to see how many free slots * are actually available, but reading that register resets it * and you could lose some error information. */ cmd_q->free_slots--; cr0 = (cmd_q->id << REQ0_CMD_Q_SHIFT) | (op->jobid << REQ0_JOBID_SHIFT) | REQ0_WAIT_FOR_WRITE; if (op->soc) cr0 |= REQ0_STOP_ON_COMPLETE | REQ0_INT_ON_COMPLETE; if (op->ioc || !cmd_q->free_slots) cr0 |= REQ0_INT_ON_COMPLETE; /* Start at CMD_REQ1 */ cr_addr = ccp->io_regs + CMD_REQ0 + CMD_REQ_INCR; mutex_lock(&ccp->req_mutex); /* Write CMD_REQ1 through CMD_REQx first */ for (i = 0; i < cr_count; i++, cr_addr += CMD_REQ_INCR) iowrite32(*(cr + i), cr_addr); /* Tell the CCP to start */ wmb(); iowrite32(cr0, ccp->io_regs + CMD_REQ0); mutex_unlock(&ccp->req_mutex); if (cr0 & REQ0_INT_ON_COMPLETE) { /* Wait for the job to complete */ ret = wait_event_interruptible(cmd_q->int_queue, cmd_q->int_rcvd); if (ret || cmd_q->cmd_error) { /* On error delete all related jobs from the queue */ cmd = (cmd_q->id << DEL_Q_ID_SHIFT) | op->jobid; iowrite32(cmd, ccp->io_regs + DEL_CMD_Q_JOB); if (!ret) ret = -EIO; } else if (op->soc) { /* Delete just head job from the queue on SoC */ cmd = DEL_Q_ACTIVE | (cmd_q->id << DEL_Q_ID_SHIFT) | op->jobid; iowrite32(cmd, ccp->io_regs + DEL_CMD_Q_JOB); } cmd_q->free_slots = CMD_Q_DEPTH(cmd_q->q_status); cmd_q->int_rcvd = 0; } return ret; } static int ccp_perform_aes(struct ccp_op *op) { u32 cr[6]; /* Fill out the register contents for REQ1 through REQ6 */ cr[0] = (CCP_ENGINE_AES << REQ1_ENGINE_SHIFT) | (op->u.aes.type << REQ1_AES_TYPE_SHIFT) | (op->u.aes.mode << REQ1_AES_MODE_SHIFT) | (op->u.aes.action << REQ1_AES_ACTION_SHIFT) | (op->ksb_key << REQ1_KEY_KSB_SHIFT); cr[1] = op->src.u.dma.length - 1; cr[2] = ccp_addr_lo(&op->src.u.dma); cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT) | (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT) | ccp_addr_hi(&op->src.u.dma); cr[4] = ccp_addr_lo(&op->dst.u.dma); cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT) | ccp_addr_hi(&op->dst.u.dma); if (op->u.aes.mode == CCP_AES_MODE_CFB) cr[0] |= ((0x7f) << REQ1_AES_CFB_SIZE_SHIFT); if (op->eom) cr[0] |= REQ1_EOM; if (op->init) cr[0] |= REQ1_INIT; return ccp_do_cmd(op, cr, ARRAY_SIZE(cr)); } static int ccp_perform_xts_aes(struct ccp_op *op) { u32 cr[6]; /* Fill out the register contents for REQ1 through REQ6 */ cr[0] = (CCP_ENGINE_XTS_AES_128 << REQ1_ENGINE_SHIFT) | (op->u.xts.action << REQ1_AES_ACTION_SHIFT) | (op->u.xts.unit_size << REQ1_XTS_AES_SIZE_SHIFT) | (op->ksb_key << REQ1_KEY_KSB_SHIFT); cr[1] = op->src.u.dma.length - 1; cr[2] = ccp_addr_lo(&op->src.u.dma); cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT) | (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT) | ccp_addr_hi(&op->src.u.dma); cr[4] = ccp_addr_lo(&op->dst.u.dma); cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT) | ccp_addr_hi(&op->dst.u.dma); if (op->eom) cr[0] |= REQ1_EOM; if (op->init) cr[0] |= REQ1_INIT; return ccp_do_cmd(op, cr, ARRAY_SIZE(cr)); } static int ccp_perform_sha(struct ccp_op *op) { u32 cr[6]; /* Fill out the register contents for REQ1 through REQ6 */ cr[0] = (CCP_ENGINE_SHA << REQ1_ENGINE_SHIFT) | (op->u.sha.type << REQ1_SHA_TYPE_SHIFT) | REQ1_INIT; cr[1] = op->src.u.dma.length - 1; cr[2] = ccp_addr_lo(&op->src.u.dma); cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT) | (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT) | ccp_addr_hi(&op->src.u.dma); if (op->eom) { cr[0] |= REQ1_EOM; cr[4] = lower_32_bits(op->u.sha.msg_bits); cr[5] = upper_32_bits(op->u.sha.msg_bits); } else { cr[4] = 0; cr[5] = 0; } return ccp_do_cmd(op, cr, ARRAY_SIZE(cr)); } static int ccp_perform_rsa(struct ccp_op *op) { u32 cr[6]; /* Fill out the register contents for REQ1 through REQ6 */ cr[0] = (CCP_ENGINE_RSA << REQ1_ENGINE_SHIFT) | (op->u.rsa.mod_size << REQ1_RSA_MOD_SIZE_SHIFT) | (op->ksb_key << REQ1_KEY_KSB_SHIFT) | REQ1_EOM; cr[1] = op->u.rsa.input_len - 1; cr[2] = ccp_addr_lo(&op->src.u.dma); cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT) | (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT) | ccp_addr_hi(&op->src.u.dma); cr[4] = ccp_addr_lo(&op->dst.u.dma); cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT) | ccp_addr_hi(&op->dst.u.dma); return ccp_do_cmd(op, cr, ARRAY_SIZE(cr)); } static int ccp_perform_passthru(struct ccp_op *op) { u32 cr[6]; /* Fill out the register contents for REQ1 through REQ6 */ cr[0] = (CCP_ENGINE_PASSTHRU << REQ1_ENGINE_SHIFT) | (op->u.passthru.bit_mod << REQ1_PT_BW_SHIFT) | (op->u.passthru.byte_swap << REQ1_PT_BS_SHIFT); if (op->src.type == CCP_MEMTYPE_SYSTEM) cr[1] = op->src.u.dma.length - 1; else cr[1] = op->dst.u.dma.length - 1; if (op->src.type == CCP_MEMTYPE_SYSTEM) { cr[2] = ccp_addr_lo(&op->src.u.dma); cr[3] = (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT) | ccp_addr_hi(&op->src.u.dma); if (op->u.passthru.bit_mod != CCP_PASSTHRU_BITWISE_NOOP) cr[3] |= (op->ksb_key << REQ4_KSB_SHIFT); } else { cr[2] = op->src.u.ksb * CCP_KSB_BYTES; cr[3] = (CCP_MEMTYPE_KSB << REQ4_MEMTYPE_SHIFT); } if (op->dst.type == CCP_MEMTYPE_SYSTEM) { cr[4] = ccp_addr_lo(&op->dst.u.dma); cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT) | ccp_addr_hi(&op->dst.u.dma); } else { cr[4] = op->dst.u.ksb * CCP_KSB_BYTES; cr[5] = (CCP_MEMTYPE_KSB << REQ6_MEMTYPE_SHIFT); } if (op->eom) cr[0] |= REQ1_EOM; return ccp_do_cmd(op, cr, ARRAY_SIZE(cr)); } static int ccp_perform_ecc(struct ccp_op *op) { u32 cr[6]; /* Fill out the register contents for REQ1 through REQ6 */ cr[0] = REQ1_ECC_AFFINE_CONVERT | (CCP_ENGINE_ECC << REQ1_ENGINE_SHIFT) | (op->u.ecc.function << REQ1_ECC_FUNCTION_SHIFT) | REQ1_EOM; cr[1] = op->src.u.dma.length - 1; cr[2] = ccp_addr_lo(&op->src.u.dma); cr[3] = (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT) | ccp_addr_hi(&op->src.u.dma); cr[4] = ccp_addr_lo(&op->dst.u.dma); cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT) | ccp_addr_hi(&op->dst.u.dma); return ccp_do_cmd(op, cr, ARRAY_SIZE(cr)); } static int ccp_trng_read(struct hwrng *rng, void *data, size_t max, bool wait) { struct ccp_device *ccp = container_of(rng, struct ccp_device, hwrng); u32 trng_value; int len = min_t(int, sizeof(trng_value), max); /* * Locking is provided by the caller so we can update device * hwrng-related fields safely */ trng_value = ioread32(ccp->io_regs + TRNG_OUT_REG); if (!trng_value) { /* Zero is returned if not data is available or if a * bad-entropy error is present. Assume an error if * we exceed TRNG_RETRIES reads of zero. */ if (ccp->hwrng_retries++ > TRNG_RETRIES) return -EIO; return 0; } /* Reset the counter and save the rng value */ ccp->hwrng_retries = 0; memcpy(data, &trng_value, len); return len; } static int ccp_init(struct ccp_device *ccp) { struct device *dev = ccp->dev; struct ccp_cmd_queue *cmd_q; struct dma_pool *dma_pool; char dma_pool_name[MAX_DMAPOOL_NAME_LEN]; unsigned int qmr, qim, i; int ret; /* Find available queues */ qim = 0; qmr = ioread32(ccp->io_regs + Q_MASK_REG); for (i = 0; i < MAX_HW_QUEUES; i++) { if (!(qmr & (1 << i))) continue; /* Allocate a dma pool for this queue */ snprintf(dma_pool_name, sizeof(dma_pool_name), "%s_q%d", ccp->name, i); dma_pool = dma_pool_create(dma_pool_name, dev, CCP_DMAPOOL_MAX_SIZE, CCP_DMAPOOL_ALIGN, 0); if (!dma_pool) { dev_err(dev, "unable to allocate dma pool\n"); ret = -ENOMEM; goto e_pool; } cmd_q = &ccp->cmd_q[ccp->cmd_q_count]; ccp->cmd_q_count++; cmd_q->ccp = ccp; cmd_q->id = i; cmd_q->dma_pool = dma_pool; /* Reserve 2 KSB regions for the queue */ cmd_q->ksb_key = KSB_START + ccp->ksb_start++; cmd_q->ksb_ctx = KSB_START + ccp->ksb_start++; ccp->ksb_count -= 2; /* Preset some register values and masks that are queue * number dependent */ cmd_q->reg_status = ccp->io_regs + CMD_Q_STATUS_BASE + (CMD_Q_STATUS_INCR * i); cmd_q->reg_int_status = ccp->io_regs + CMD_Q_INT_STATUS_BASE + (CMD_Q_STATUS_INCR * i); cmd_q->int_ok = 1 << (i * 2); cmd_q->int_err = 1 << ((i * 2) + 1); cmd_q->free_slots = CMD_Q_DEPTH(ioread32(cmd_q->reg_status)); init_waitqueue_head(&cmd_q->int_queue); /* Build queue interrupt mask (two interrupts per queue) */ qim |= cmd_q->int_ok | cmd_q->int_err; #ifdef CONFIG_ARM64 /* For arm64 set the recommended queue cache settings */ iowrite32(ccp->axcache, ccp->io_regs + CMD_Q_CACHE_BASE + (CMD_Q_CACHE_INC * i)); #endif dev_dbg(dev, "queue #%u available\n", i); } if (ccp->cmd_q_count == 0) { dev_notice(dev, "no command queues available\n"); ret = -EIO; goto e_pool; } dev_notice(dev, "%u command queues available\n", ccp->cmd_q_count); /* Disable and clear interrupts until ready */ iowrite32(0x00, ccp->io_regs + IRQ_MASK_REG); for (i = 0; i < ccp->cmd_q_count; i++) { cmd_q = &ccp->cmd_q[i]; ioread32(cmd_q->reg_int_status); ioread32(cmd_q->reg_status); } iowrite32(qim, ccp->io_regs + IRQ_STATUS_REG); /* Request an irq */ ret = ccp->get_irq(ccp); if (ret) { dev_err(dev, "unable to allocate an IRQ\n"); goto e_pool; } /* Initialize the queues used to wait for KSB space and suspend */ init_waitqueue_head(&ccp->ksb_queue); init_waitqueue_head(&ccp->suspend_queue); /* Create a kthread for each queue */ for (i = 0; i < ccp->cmd_q_count; i++) { struct task_struct *kthread; cmd_q = &ccp->cmd_q[i]; kthread = kthread_create(ccp_cmd_queue_thread, cmd_q, "%s-q%u", ccp->name, cmd_q->id); if (IS_ERR(kthread)) { dev_err(dev, "error creating queue thread (%ld)\n", PTR_ERR(kthread)); ret = PTR_ERR(kthread); goto e_kthread; } cmd_q->kthread = kthread; wake_up_process(kthread); } /* Register the RNG */ ccp->hwrng.name = ccp->rngname; ccp->hwrng.read = ccp_trng_read; ret = hwrng_register(&ccp->hwrng); if (ret) { dev_err(dev, "error registering hwrng (%d)\n", ret); goto e_kthread; } /* Register the DMA engine support */ ret = ccp_dmaengine_register(ccp); if (ret) goto e_hwrng; ccp_add_device(ccp); /* Enable interrupts */ iowrite32(qim, ccp->io_regs + IRQ_MASK_REG); return 0; e_hwrng: hwrng_unregister(&ccp->hwrng); e_kthread: for (i = 0; i < ccp->cmd_q_count; i++) if (ccp->cmd_q[i].kthread) kthread_stop(ccp->cmd_q[i].kthread); ccp->free_irq(ccp); e_pool: for (i = 0; i < ccp->cmd_q_count; i++) dma_pool_destroy(ccp->cmd_q[i].dma_pool); return ret; } static void ccp_destroy(struct ccp_device *ccp) { struct ccp_cmd_queue *cmd_q; struct ccp_cmd *cmd; unsigned int qim, i; /* Remove this device from the list of available units first */ ccp_del_device(ccp); /* Unregister the DMA engine */ ccp_dmaengine_unregister(ccp); /* Unregister the RNG */ hwrng_unregister(&ccp->hwrng); /* Stop the queue kthreads */ for (i = 0; i < ccp->cmd_q_count; i++) if (ccp->cmd_q[i].kthread) kthread_stop(ccp->cmd_q[i].kthread); /* Build queue interrupt mask (two interrupt masks per queue) */ qim = 0; for (i = 0; i < ccp->cmd_q_count; i++) { cmd_q = &ccp->cmd_q[i]; qim |= cmd_q->int_ok | cmd_q->int_err; } /* Disable and clear interrupts */ iowrite32(0x00, ccp->io_regs + IRQ_MASK_REG); for (i = 0; i < ccp->cmd_q_count; i++) { cmd_q = &ccp->cmd_q[i]; ioread32(cmd_q->reg_int_status); ioread32(cmd_q->reg_status); } iowrite32(qim, ccp->io_regs + IRQ_STATUS_REG); ccp->free_irq(ccp); for (i = 0; i < ccp->cmd_q_count; i++) dma_pool_destroy(ccp->cmd_q[i].dma_pool); /* Flush the cmd and backlog queue */ while (!list_empty(&ccp->cmd)) { /* Invoke the callback directly with an error code */ cmd = list_first_entry(&ccp->cmd, struct ccp_cmd, entry); list_del(&cmd->entry); cmd->callback(cmd->data, -ENODEV); } while (!list_empty(&ccp->backlog)) { /* Invoke the callback directly with an error code */ cmd = list_first_entry(&ccp->backlog, struct ccp_cmd, entry); list_del(&cmd->entry); cmd->callback(cmd->data, -ENODEV); } } static irqreturn_t ccp_irq_handler(int irq, void *data) { struct device *dev = data; struct ccp_device *ccp = dev_get_drvdata(dev); struct ccp_cmd_queue *cmd_q; u32 q_int, status; unsigned int i; status = ioread32(ccp->io_regs + IRQ_STATUS_REG); for (i = 0; i < ccp->cmd_q_count; i++) { cmd_q = &ccp->cmd_q[i]; q_int = status & (cmd_q->int_ok | cmd_q->int_err); if (q_int) { cmd_q->int_status = status; cmd_q->q_status = ioread32(cmd_q->reg_status); cmd_q->q_int_status = ioread32(cmd_q->reg_int_status); /* On error, only save the first error value */ if ((q_int & cmd_q->int_err) && !cmd_q->cmd_error) cmd_q->cmd_error = CMD_Q_ERROR(cmd_q->q_status); cmd_q->int_rcvd = 1; /* Acknowledge the interrupt and wake the kthread */ iowrite32(q_int, ccp->io_regs + IRQ_STATUS_REG); wake_up_interruptible(&cmd_q->int_queue); } } return IRQ_HANDLED; } static const struct ccp_actions ccp3_actions = { .perform_aes = ccp_perform_aes, .perform_xts_aes = ccp_perform_xts_aes, .perform_sha = ccp_perform_sha, .perform_rsa = ccp_perform_rsa, .perform_passthru = ccp_perform_passthru, .perform_ecc = ccp_perform_ecc, .init = ccp_init, .destroy = ccp_destroy, .irqhandler = ccp_irq_handler, }; struct ccp_vdata ccpv3 = { .version = CCP_VERSION(3, 0), .perform = &ccp3_actions, .bar = 2, .offset = 0x20000, };