提交 5d4ad757 编写于 作者: Y Yu'an Wang 提交者: Yang Yingliang

UACCE backport from mainline

hulk inclusion
category: Feature
bugzilla: NA
CVE: NA

backport uacce from mainline, it moved uacce.c to misc/uacce and update
Kconfig and Makefile. At the same time, uacce.h is moved from /uapi/linux
to /uapi/misc/uacce.
Signed-off-by: NYu'an Wang <wangyuan46@huawei.com>
Signed-off-by: NKai Ye <yekai13@huawei.com>
Reviewed-by: NZhou Wang <wangzhou1@hisilicon.com>
Signed-off-by: NYang Yingliang <yangyingliang@huawei.com>
上级 12cd4ab4
...@@ -217,8 +217,6 @@ source "drivers/visorbus/Kconfig" ...@@ -217,8 +217,6 @@ source "drivers/visorbus/Kconfig"
source "drivers/siox/Kconfig" source "drivers/siox/Kconfig"
source "drivers/uacce/Kconfig"
source "drivers/slimbus/Kconfig" source "drivers/slimbus/Kconfig"
endmenu endmenu
...@@ -186,4 +186,3 @@ obj-$(CONFIG_MULTIPLEXER) += mux/ ...@@ -186,4 +186,3 @@ obj-$(CONFIG_MULTIPLEXER) += mux/
obj-$(CONFIG_UNISYS_VISORBUS) += visorbus/ obj-$(CONFIG_UNISYS_VISORBUS) += visorbus/
obj-$(CONFIG_SIOX) += siox/ obj-$(CONFIG_SIOX) += siox/
obj-$(CONFIG_GNSS) += gnss/ obj-$(CONFIG_GNSS) += gnss/
obj-y += uacce/
...@@ -527,4 +527,5 @@ source "drivers/misc/echo/Kconfig" ...@@ -527,4 +527,5 @@ source "drivers/misc/echo/Kconfig"
source "drivers/misc/cxl/Kconfig" source "drivers/misc/cxl/Kconfig"
source "drivers/misc/ocxl/Kconfig" source "drivers/misc/ocxl/Kconfig"
source "drivers/misc/cardreader/Kconfig" source "drivers/misc/cardreader/Kconfig"
source "drivers/misc/uacce/Kconfig"
endmenu endmenu
...@@ -58,3 +58,4 @@ obj-$(CONFIG_ASPEED_LPC_SNOOP) += aspeed-lpc-snoop.o ...@@ -58,3 +58,4 @@ obj-$(CONFIG_ASPEED_LPC_SNOOP) += aspeed-lpc-snoop.o
obj-$(CONFIG_PCI_ENDPOINT_TEST) += pci_endpoint_test.o obj-$(CONFIG_PCI_ENDPOINT_TEST) += pci_endpoint_test.o
obj-$(CONFIG_OCXL) += ocxl/ obj-$(CONFIG_OCXL) += ocxl/
obj-$(CONFIG_MISC_RTSX) += cardreader/ obj-$(CONFIG_MISC_RTSX) += cardreader/
obj-$(CONFIG_UACCE) += uacce/
menuconfig UACCE
tristate "Accelerator Framework for User Land"
depends on IOMMU_API
select ANON_INODES
help
UACCE provides interface for the user process to access the hardware
without interaction with the kernel space in data path.
The user-space interface is described in
include/uapi/misc/uacce/uacce.h
See Documentation/misc-devices/uacce.rst for more details.
If you don't know what to do here, say N.
# SPDX-License-Identifier: GPL-2.0-or-later # SPDX-License-Identifier: GPL-2.0-or-later
obj-$(CONFIG_UACCE) += uacce.o obj-$(CONFIG_UACCE) += uacce.o
obj-$(CONFIG_WD_DUMMY_DEV) += dummy_drv/
...@@ -699,10 +699,7 @@ static void uacce_queue_drain(struct uacce_queue *q) ...@@ -699,10 +699,7 @@ static void uacce_queue_drain(struct uacce_queue *q)
if (is_to_free_region) if (is_to_free_region)
uacce_destroy_region(q, qfr); uacce_destroy_region(q, qfr);
} }
#ifdef CONFIG_IOMMU_SVA2
if (uacce->flags & UACCE_DEV_SVA)
iommu_sva_unbind_device(uacce->pdev, q->pasid);
#endif
if (state && uacce->ops->put_queue) if (state && uacce->ops->put_queue)
uacce->ops->put_queue(q); uacce->ops->put_queue(q);
...@@ -749,16 +746,6 @@ static int uacce_get_queue(struct uacce *uacce, struct file *filep) ...@@ -749,16 +746,6 @@ static int uacce_get_queue(struct uacce *uacce, struct file *filep)
int ret; int ret;
int pasid = 0; int pasid = 0;
#ifdef CONFIG_IOMMU_SVA2
if (uacce->flags & UACCE_DEV_PASID) {
ret = iommu_sva_bind_device(uacce->pdev, current->mm, &pasid,
IOMMU_SVA_FEAT_IOPF, NULL);
if (ret) {
dev_err(uacce->pdev, "iommu SVA binds fail!\n");
return ret;
}
}
#endif
uacce_qs_wlock(); uacce_qs_wlock();
ret = uacce->ops->get_queue(uacce, pasid, &q); ret = uacce->ops->get_queue(uacce, pasid, &q);
...@@ -782,10 +769,7 @@ static int uacce_get_queue(struct uacce *uacce, struct file *filep) ...@@ -782,10 +769,7 @@ static int uacce_get_queue(struct uacce *uacce, struct file *filep)
return 0; return 0;
err_unbind: err_unbind:
#ifdef CONFIG_IOMMU_SVA2
if (uacce->flags & UACCE_DEV_PASID)
iommu_sva_unbind_device(uacce->pdev, pasid);
#endif
return ret; return ret;
} }
...@@ -1297,19 +1281,8 @@ int uacce_register(struct uacce *uacce) ...@@ -1297,19 +1281,8 @@ int uacce_register(struct uacce *uacce)
return ret; return ret;
} }
if (uacce->flags & UACCE_DEV_PASID) { if (uacce->flags & UACCE_DEV_PASID)
#ifdef CONFIG_IOMMU_SVA2
ret = iommu_sva_init_device(uacce->pdev, IOMMU_SVA_FEAT_IOPF,
0, 0, NULL);
if (ret) {
dev_err(dev, "uacce sva init fail!\n");
uacce_destroy_chrdev(uacce);
return ret;
}
#else
uacce->flags &= ~(UACCE_DEV_FAULT_FROM_DEV | UACCE_DEV_PASID); uacce->flags &= ~(UACCE_DEV_FAULT_FROM_DEV | UACCE_DEV_PASID);
#endif
}
dev_dbg(&uacce->dev, "register to uacce!\n"); dev_dbg(&uacce->dev, "register to uacce!\n");
atomic_set(&uacce->ref, 0); atomic_set(&uacce->ref, 0);
...@@ -1333,9 +1306,6 @@ int uacce_unregister(struct uacce *uacce) ...@@ -1333,9 +1306,6 @@ int uacce_unregister(struct uacce *uacce)
return -EAGAIN; return -EAGAIN;
} }
#ifdef CONFIG_IOMMU_SVA2
iommu_sva_shutdown_device(uacce->pdev);
#endif
uacce_hw_err_destroy(uacce); uacce_hw_err_destroy(uacce);
uacce_destroy_chrdev(uacce); uacce_destroy_chrdev(uacce);
......
menuconfig UACCE
tristate "Accelerator Framework for User Land"
depends on IOMMU_API
select ANON_INODES
help
UACCE provides interface for the user process to access the hardware
without interaction with the kernel space in data path.
See Documentation/warpdrive/warpdrive.rst for more details.
If you don't know what to do here, say N.
config WD_DUMMY_DEV
tristate "Support for WrapDrive Dummy Device"
depends on UACCE
help
Support for WarpDrive test driver with devices (NOT for upstream).
If you don't know what to do here, say N.
config WD_DUMMY_V1
tristate "Support for WrapDrive Dummy Device V1"
depends on WD_DUMMY_DEV
help
Support for WarpDrive test driver (NOT for upstream).
The device is simulated with os
If you don't know what to do here, say N.
config WD_DUMMY_V2
tristate "Support for WrapDrive Dummy Device V2"
depends on WD_DUMMY_DEV
help
Support for WarpDrive test driver (NOT for upstream).
The device is simulated with qemu
If you don't know what to do here, say N.
obj-$(CONFIG_WD_DUMMY_V1) += wd_dummy.o
wd_dummy-objs := dummy_wd_dev.o
obj-$(CONFIG_WD_DUMMY_V2) += wd_dummy2.o
wd_dummy2-objs := dummy_wd_v2.o
/* SPDX-License-Identifier: GPL-2.0-or-later */
/* Copyright (c) 2018-2019 HiSilicon Limited. */
/*
* This file defines the dummy hardware/driver interface between the user and
* kernel space
*/
#ifndef __DUMMY_HW_USR_IF_H
#define __DUMMY_HW_USR_IF_H
#include <linux/types.h>
#define DUMMY_WD "dummy_wd"
#define Q_BDS 16
#define DUMMY_HW_TAG_SZ 8
#define DUMMY_HW_TAG "WDDUMMY"
/* the format of the device ring space, which is of drv */
#define ring_bd wd_dummy_cpy_msg
/* the format of the device io space, which is of drv */
struct dummy_hw_queue_reg {
char hw_tag[DUMMY_HW_TAG_SZ]; /* should be "WDDUMMY\0" */
struct ring_bd ring[Q_BDS]; /*
* in real hardware, this is good to be
* in memory space, and will be fast
* for communication. here we keep it
* in io space just to make it simple
*/
__u32 ring_bd_num; /*
* ring_bd_num, now it is Q_BDS until
* we use a memory ring
*/
__u32 head; /*
* assume int is atomical. it should be
* fine as a dummy and test function.
* head is for the writer(user) while
* tail is for the reader(kernel).
* head==tail means the queue is empty
*/
__u32 tail;
};
#define DUMMY_CMD_FLUSH _IO('d', 0)
#endif
// SPDX-License-Identifier: GPL-2.0+
/* Copyright (c) 2018-2019 HiSilicon Limited. */
/**
* This module is used to test the framework of WarpDrive.
*
* It creates MAX_DEV platform devices with MAX_QUEUE queue for each. When the
* queue is gotten, a kernel thread is created and handle request put into the
* queue by the user application.
*/
#include <asm/page.h>
#include <linux/dma-mapping.h>
#include <linux/kthread.h>
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/platform_device.h>
#include <linux/printk.h>
#include <linux/slab.h>
#include <linux/uacce.h>
#include <linux/uaccess.h>
#include "wd_dummy_usr_if.h"
#include "dummy_hw_usr_if.h"
#define MAX_DEV 3
#define MAX_QUEUE 4
#define QUEUE_YEILD_MS 50
#define VERBOSE_LOG
#define MODE_MMIO 0 /* use mmio region for bd */
#define MODE_DSU 1 /* use dsu region for bd */
static int mode = MODE_DSU;
module_param(mode, int, 0);
static DEFINE_MUTEX(qsmutex);
struct dummy_hw;
struct dummy_hw_queue {
bool used;
struct task_struct *tsk;
__u32 tail;
struct uacce_qfile_region *ss_qfr;
struct uacce_queue wdq;
struct dummy_hw_queue_reg *reg;
struct dummy_hw *hw;
struct task_struct *work_thread;
struct mutex mutex;
int is_updated;
int devid, qid;
};
static struct dummy_hw {
int max_copy_size;
int aflags;
struct dummy_hw_queue qs[MAX_QUEUE];
struct platform_device *pdev;
} hws[MAX_DEV];
static int _do_copy(struct uacce_queue *q, void *tgt, void *src, size_t len)
{
struct uacce_qfile_region *ss_qfr = q->qfrs[UACCE_QFRT_SS];
int ret = 0;
size_t iova_base = q->qfrs[UACCE_QFRT_SS]->iova;
size_t ktgt = (unsigned long)tgt - iova_base;
size_t ksrc = (unsigned long)src - iova_base;
size_t range = ss_qfr->nr_pages << PAGE_SHIFT;
if (ktgt + len > range) {
dev_dbg(&q->uacce->dev, "ktgt(%lx, %lx) not in range(%lx)\n",
ktgt, len, range);
ret = -EINVAL;
goto out;
}
if (ksrc + len > range) {
dev_dbg(&q->uacce->dev, "ksrc(%lx, %lx) not in range(%lx)\n",
ksrc, len, range);
ret = -EINVAL;
goto out;
}
ktgt += (unsigned long)ss_qfr->kaddr;
ksrc += (unsigned long)ss_qfr->kaddr;
memcpy((void *)ktgt, (void *)ksrc, len);
out:
return ret;
}
static void _queue_work(struct dummy_hw_queue *hwq)
{
int bd_num;
__u32 head;
__u32 tail;
struct device *dev = &hwq->wdq.uacce->dev;
mutex_lock(&hwq->mutex);
bd_num = hwq->reg->ring_bd_num;
head = readl(&hwq->reg->head);
if (head >= bd_num) {
dev_err(dev, "dummy_wd io error, head=%d\n", head);
mutex_unlock(&hwq->mutex);
return;
}
tail = hwq->tail;
while (hwq->tail != head) {
if (hwq->reg->ring[hwq->tail].size > hwq->hw->max_copy_size)
hwq->reg->ring[hwq->tail].ret = -EINVAL;
else
hwq->reg->ring[hwq->tail].ret = _do_copy(&hwq->wdq,
hwq->reg->ring[hwq->tail].tgt_addr,
hwq->reg->ring[hwq->tail].src_addr,
hwq->reg->ring[hwq->tail].size);
dev_dbg(dev, "memcpy(%pK, %pK, %ld) = %d",
hwq->reg->ring[hwq->tail].tgt_addr,
hwq->reg->ring[hwq->tail].src_addr,
hwq->reg->ring[hwq->tail].size,
hwq->reg->ring[hwq->tail].ret);
hwq->tail = (hwq->tail+1)%bd_num;
}
if (tail != hwq->tail) {
dev_dbg(dev, "write back tail %d\n", hwq->tail);
writel(hwq->tail, &hwq->reg->tail);
hwq->is_updated = 1;
uacce_wake_up(&hwq->wdq);
}
mutex_unlock(&hwq->mutex);
}
static int dummy_is_q_updated(struct uacce_queue *q)
{
struct dummy_hw_queue *hwq = q->priv;
int updated;
mutex_lock(&hwq->mutex);
updated = hwq->is_updated;
hwq->is_updated = 0;
mutex_unlock(&hwq->mutex);
dev_dbg(&q->uacce->dev, "check q updated: %d\n", updated);
return updated;
}
static int dummy_get_queue(struct uacce *uacce, unsigned long arg,
struct uacce_queue **q)
{
int i;
struct dummy_hw *hw = (struct dummy_hw *)uacce->priv;
struct dummy_hw_queue *devqs = hw->qs;
WARN_ON(!devqs);
mutex_lock(&qsmutex);
for (i = 0; i < MAX_QUEUE; i++) {
if (!devqs[i].used) {
devqs[i].used = 1;
devqs[i].reg->head = 0;
devqs[i].reg->tail = 0;
devqs[i].tail = 0;
devqs[i].is_updated = 0;
*q = &devqs[i].wdq;
devqs[i].wdq.priv = &devqs[i];
dev_dbg(uacce->pdev, "allocate hw q %d\n", i);
break;
}
}
mutex_unlock(&qsmutex);
if (i < MAX_QUEUE)
return 0;
return -ENODEV;
}
static void dummy_put_queue(struct uacce_queue *q)
{
struct dummy_hw_queue *hwq = (struct dummy_hw_queue *)q->priv;
mutex_lock(&qsmutex);
hwq->used = 0;
mutex_unlock(&qsmutex);
}
static int dummy_mmap(struct uacce_queue *q, struct vm_area_struct *vma,
struct uacce_qfile_region *qfr)
{
struct dummy_hw_queue *hwq = (struct dummy_hw_queue *)q->priv;
struct page *page = virt_to_page(hwq->reg);
dev_dbg(&q->uacce->dev, "mmap mmio space (ref=%d)\n",
page_ref_count(page));
if (vma->vm_pgoff != 0 || qfr->nr_pages > 1 ||
!(vma->vm_flags & VM_SHARED))
return -EINVAL;
return remap_pfn_range(vma, vma->vm_start, __pa(hwq->reg)>>PAGE_SHIFT,
PAGE_SIZE, vma->vm_page_prot);
}
static long dummy_ioctl(struct uacce_queue *q, unsigned int cmd,
unsigned long arg)
{
struct dummy_hw_queue *hwq = q->priv;
switch (cmd) {
case DUMMY_CMD_FLUSH:
_queue_work(hwq);
return 0;
default:
return -EINVAL;
}
}
static void dummy_mask_notify(struct uacce_queue *q, int event_mask)
{
dev_dbg(&q->uacce->dev, "mask notify: %x\n", event_mask);
}
int queue_worker(void *data)
{
struct dummy_hw_queue *hwq = data;
do {
_queue_work(hwq);
schedule_timeout_interruptible(
msecs_to_jiffies(QUEUE_YEILD_MS));
} while (!kthread_should_stop());
hwq->work_thread = NULL;
return 0;
}
static int dummy_start_queue(struct uacce_queue *q)
{
struct dummy_hw_queue *hwq = q->priv;
hwq->work_thread = kthread_run(queue_worker, hwq,
"dummy_queue_worker %d-%d",
hwq->devid, hwq->qid);
if (PTR_ERR_OR_ZERO(hwq->work_thread))
return PTR_ERR(hwq->work_thread);
dev_dbg(&q->uacce->dev, "queue start\n");
return 0;
}
void dummy_stop_queue(struct uacce_queue *q)
{
struct dummy_hw_queue *hwq = q->priv;
if (hwq->work_thread)
kthread_stop(hwq->work_thread);
}
static int dummy_get_available_instances(struct uacce *uacce)
{
int i, ret;
struct dummy_hw *hw = (struct dummy_hw *)uacce->priv;
struct dummy_hw_queue *devqs = hw->qs;
mutex_lock(&qsmutex);
for (i = 0, ret = 0; i < MAX_QUEUE; i++) {
if (!devqs[i].used)
ret++;
}
mutex_unlock(&qsmutex);
return ret;
}
static struct uacce_ops dummy_ops = {
.get_queue = dummy_get_queue,
.put_queue = dummy_put_queue,
.start_queue = dummy_start_queue,
.stop_queue = dummy_stop_queue,
.is_q_updated = dummy_is_q_updated,
.mmap = dummy_mmap,
.ioctl = dummy_ioctl,
.mask_notify = dummy_mask_notify,
.get_available_instances = dummy_get_available_instances,
};
static int dummy_wd_probe(struct platform_device *pdev)
{
struct uacce *uacce;
struct dummy_hw *hw;
int i, ret;
if (pdev->id >= MAX_DEV) {
dev_err(&pdev->dev, "invalid id (%d) for dummy_wd\n", pdev->id);
return -EINVAL;
}
ret = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
if (ret < 0)
return -EINVAL;
hw = &hws[pdev->id];
hw->aflags = 0;
hw->max_copy_size = 4096;
uacce = devm_kzalloc(&pdev->dev, sizeof(struct uacce), GFP_KERNEL);
if (!uacce)
return -ENOMEM;
platform_set_drvdata(pdev, uacce);
uacce->name = DUMMY_WD;
uacce->pdev = &pdev->dev;
uacce->priv = hw;
uacce->ops = &dummy_ops;
uacce->drv_name = DUMMY_WD;
uacce->algs = "memcpy\n";
uacce->api_ver = "dummy_v1";
uacce->flags = UACCE_DEV_NOIOMMU;
uacce->qf_pg_start[UACCE_QFRT_MMIO] = 0;
uacce->qf_pg_start[UACCE_QFRT_DUS] = UACCE_QFR_NA;
uacce->qf_pg_start[UACCE_QFRT_SS] = 1;
#ifdef CONFIG_NUMA
/*
* Emulate numa id if there's no platform dummy device.
* Try to bind each dummy device to each numa node. If there're more
* dummy devices than numa nodes, the numa_node should be binded to
* the last numa node.
*/
if (uacce->pdev->numa_node < 0) {
if (cpu_to_node(nr_cpu_ids - 1) > pdev->id)
uacce->pdev->numa_node = pdev->id;
else
uacce->pdev->numa_node = cpu_to_node(nr_cpu_ids - 1);
}
#endif
for (i = 0; i < MAX_QUEUE; i++) {
hw->qs[i].wdq.uacce = uacce;
hw->qs[i].hw = hw;
hw->qs[i].reg = (struct dummy_hw_queue_reg *)
__get_free_page(GFP_KERNEL);
memcpy(hw->qs[i].reg->hw_tag, DUMMY_HW_TAG, DUMMY_HW_TAG_SZ);
hw->qs[i].reg->ring_bd_num = Q_BDS;
hw->qs[i].reg->head = 0;
hw->qs[i].reg->tail = 0;
hw->qs[i].tail = 0;
hw->qs[i].is_updated = 0;
hw->qs[i].devid = pdev->id;
hw->qs[i].qid = i;
mutex_init(&hw->qs[i].mutex);
}
return uacce_register(uacce);
}
static int dummy_wd_remove(struct platform_device *pdev)
{
struct uacce *uacce = (struct uacce *)pdev->dev.driver_data;
struct dummy_hw *hw = &hws[pdev->id];
int i;
uacce_unregister(uacce);
for (i = 0; i < MAX_QUEUE; i++)
free_page((unsigned long)hw->qs[i].reg);
return 0;
}
static struct platform_driver dummy_pdrv = {
.probe = dummy_wd_probe,
.remove = dummy_wd_remove,
.driver = {
.name = DUMMY_WD,
},
};
static int __init dummy_uacce_init(void)
{
int i, j;
int ret = platform_driver_register(&dummy_pdrv);
if (ret)
return ret;
for (i = 0; i < MAX_DEV; i++) {
hws[i].pdev = platform_device_alloc(DUMMY_WD, i);
WARN_ON(!hws[i].pdev);
ret = platform_device_add(hws[i].pdev);
if (ret)
goto dev_reg_fail;
}
return 0;
dev_reg_fail:
for (j = i - 1; j >= 0; j--) {
if (hws[i].pdev)
platform_device_put(hws[i].pdev);
}
platform_driver_unregister(&dummy_pdrv);
return ret;
}
static void __exit dummy_uacce_exit(void)
{
int i;
for (i = MAX_DEV - 1; i >= 0; i--)
platform_device_unregister(hws[i].pdev);
platform_driver_unregister(&dummy_pdrv);
}
module_init(dummy_uacce_init);
module_exit(dummy_uacce_exit);
MODULE_AUTHOR("Kenneth Lee<liguozhu@hisilicon.com>");
MODULE_LICENSE("GPL v2");
// SPDX-License-Identifier: GPL-2.0+
/* Copyright (c) 2018-2019 HiSilicon Limited. */
/**
* This module is used to test the framework of WarpDrive.
*
* It support a simular device as dummy_wd_dev in qemu and do almost the same.
* But it is a "real" hardware to the OS, so we can test the iommu feature
*/
#include <asm/page.h>
#include <linux/dma-mapping.h>
#include <linux/kthread.h>
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/platform_device.h>
#include <linux/printk.h>
#include <linux/slab.h>
#include <linux/uacce.h>
#include <linux/uaccess.h>
#include "wd_dummy_usr_if.h"
#include "dummy_hw_usr_if.h"
#define DUMMY2_WD "dummy_wd2"
#define MAX_PT_ENTRIES 64
#define RING_NUM 3
#define HEADER_WORDS 12 /* reseved words in page 0 */
#define DUMMY2_DMA_PAGE_SHIFT 12
#define DUMMY2_DMA_PAGE_SIZE (1<<DUMMY2_DMA_PAGE_SHIFT)
#define DUMMY2_IO_TAG 0
#define DUMMY2_IO_PTPA (sizeof(uint64_t))
#define DUMMY2_IO_PTSZ (sizeof(uint64_t) * 2)
#define DUMMY2_IO_MAX_COPY_SIZE (sizeof(uint64_t) * 4)
#define DUMMY2_IO_RING_BEGIN (sizeof(uint64_t) * HEADER_WORDS)
struct pt_entry {
uint64_t asid; /*
*-1 means entry invalid, 0 means kernel,
*others are valid pasid.
*/
uint64_t iova;
/*-1 means entry invalid */
uint64_t pa;
};
/* ring io struct in hardware mmio space */
struct ring_io {
uint64_t rbpa;
uint64_t rbsz;
uint64_t asid;
};
struct dummy_wd2_hw;
struct dummy_wd2_iommu_domain {
struct iommu_domain domain;
struct dummy_wd2_hw *hw;
uint64_t asid;
};
#define to_dummy_wd2_iommu_domain(d) \
container_of(d, struct dummy_wd2_iommu_domain, domain)
struct dummy_wd2_hw_queue {
bool used;
struct dummy_wd2_hw *hw;
struct uacce_queue q;
void __iomem *db_pa;
void *ring_io_base;
dma_addr_t bd_dma;
};
struct dummy_wd2_hw {
int max_copy_size;
int ver;
struct dummy_wd2_hw_queue qs[RING_NUM];
struct platform_device *pdev;
struct mutex mutex;
struct device dummy_wd2_dev;
struct iommu_group *iommu_group;
void *io_base;
struct pt_entry *pt;
dma_addr_t pt_dma;
};
static int dummy_wd2_bus_probe(struct device *dev)
{
dev_info(dev, "bus probe dev\n");
return 0;
}
static int dummy_wd2_bus_remove(struct device *dev)
{
dev_info(dev, "bus remove dev");
return 0;
}
static bool dummy_wd2_iommu_capable(enum iommu_cap cap)
{
switch (cap) {
case IOMMU_CAP_CACHE_COHERENCY:
return true;
default:
return false;
}
}
static struct iommu_domain *dummy_wd2_iommu_domain_alloc(
unsigned int iommu_domain_type);
static void dummy_wd2_iommu_domain_free(struct iommu_domain *domain);
static int dummy_wd2_iommu_attach_dev(struct iommu_domain *domain,
struct device *dev)
{
struct dummy_wd2_iommu_domain *d = to_dummy_wd2_iommu_domain(domain);
pr_info("%s\n", __func__);
d->hw = dev_get_drvdata(dev);
d->asid = 0;
if (d->hw)
return 0;
else
return -ENODEV;
}
static void dummy_wd2_iommu_detach_dev(struct iommu_domain *domain,
struct device *dev)
{
struct dummy_wd2_iommu_domain *d = to_dummy_wd2_iommu_domain(domain);
d->hw = NULL;
d->asid = (u64)-1;
pr_info("%s\n", __func__);
}
static int dummy_wd2_iommu_map(struct iommu_domain *domain, unsigned long iova,
phys_addr_t paddr, size_t size, int prot)
{
struct dummy_wd2_iommu_domain *d = to_dummy_wd2_iommu_domain(domain);
int i;
if (size != 4096)
return -EIO;
for (i = 0; i < MAX_PT_ENTRIES; i++) {
if (d->hw->pt[i].asid == (uint64_t)-1) {
d->hw->pt[i].asid = d->asid;
d->hw->pt[i].iova = iova;
d->hw->pt[i].pa = paddr;
dev_dbg(&d->hw->dummy_wd2_dev,
"iommu_map %d asid=%lld, %llx=>%llx\n", i,
d->hw->pt[i].asid,
d->hw->pt[i].iova,
d->hw->pt[i].pa);
/* flush to hardware */
writeq(MAX_PT_ENTRIES,
d->hw->io_base + DUMMY2_IO_PTSZ);
return 0;
}
}
return -EBUSY;
}
static size_t dummy_wd2_iommu_unmap(struct iommu_domain *domain,
unsigned long iova, size_t size)
{
struct dummy_wd2_iommu_domain *d = to_dummy_wd2_iommu_domain(domain);
int i;
if (size != DUMMY2_DMA_PAGE_SHIFT)
return 0;
for (i = 0; i < MAX_PT_ENTRIES; i++) {
if (d->hw->pt[i].asid == d->asid && d->hw->pt[i].iova == iova) {
dev_dbg(&d->hw->dummy_wd2_dev,
"iommu_unmap %d asid=%lld, %llx=>%llx\n", i,
d->hw->pt[i].asid,
d->hw->pt[i].iova,
d->hw->pt[i].pa);
d->hw->pt[i].asid = (uint64_t)-1;
/* flush to hardware */
writeq(MAX_PT_ENTRIES,
d->hw->io_base + DUMMY2_IO_PTSZ);
return DUMMY2_DMA_PAGE_SIZE;
}
}
return 0;
}
static struct iommu_ops dummy_wd2_iommu_ops = {
.capable = dummy_wd2_iommu_capable,
.domain_alloc = dummy_wd2_iommu_domain_alloc,
.domain_free = dummy_wd2_iommu_domain_free,
.attach_dev = dummy_wd2_iommu_attach_dev,
.detach_dev = dummy_wd2_iommu_detach_dev,
.map = dummy_wd2_iommu_map,
.unmap = dummy_wd2_iommu_unmap,
.pgsize_bitmap = SZ_4K,
};
static struct iommu_domain *dummy_wd2_iommu_domain_alloc(
unsigned int iommu_domain_type)
{
struct dummy_wd2_iommu_domain *domain = kzalloc(
sizeof(struct iommu_domain), GFP_KERNEL);
if (!domain)
return NULL;
domain->domain.ops = &dummy_wd2_iommu_ops;
return &domain->domain;
}
static void dummy_wd2_iommu_domain_free(struct iommu_domain *domain)
{
struct dummy_wd2_iommu_domain *d = to_dummy_wd2_iommu_domain(domain);
kfree(d);
}
static struct bus_type dummy_wd2_bus_type = {
.name = "dummy_wd2_bus",
.probe = dummy_wd2_bus_probe,
.remove = dummy_wd2_bus_remove,
.iommu_ops = &dummy_wd2_iommu_ops,
};
static int dummy_wd2_is_q_updated(struct uacce_queue *q)
{
return 0;
}
static int dummy_wd2_get_queue(struct uacce *uacce, unsigned long arg,
struct uacce_queue **q)
{
int i;
struct dummy_wd2_hw *hw = (struct dummy_wd2_hw *)uacce->priv;
mutex_lock(&hw->mutex);
for (i = 0; i < RING_NUM; i++) {
if (!hw->qs[i].used) {
hw->qs[i].used = true;
*q = &hw->qs[i].q;
dev_dbg(uacce->pdev, "allocate hw q %d\n", i);
break;
}
}
mutex_unlock(&hw->mutex);
if (i < RING_NUM)
return 0;
return -ENODEV;
}
static void dummy_wd2_put_queue(struct uacce_queue *q)
{
struct dummy_wd2_hw_queue *hwq = (struct dummy_wd2_hw_queue *)q->priv;
struct dummy_wd2_hw *hw = hwq->hw;
mutex_lock(&hw->mutex);
hwq->used = false;
mutex_unlock(&hw->mutex);
}
static int dummy_wd2_mmap(struct uacce_queue *q, struct vm_area_struct *vma,
struct uacce_qfile_region *qfr)
{
struct dummy_wd2_hw_queue *hwq = (struct dummy_wd2_hw_queue *)q->priv;
if (vma->vm_pgoff != 0 || qfr->nr_pages > 1 ||
!(vma->vm_flags & VM_SHARED))
return -EINVAL;
return remap_pfn_range(vma, vma->vm_start,
(u64)hwq->db_pa >> PAGE_SHIFT,
DUMMY2_DMA_PAGE_SIZE,
pgprot_noncached(vma->vm_page_prot));
}
static void dummy_wd2_mask_notify(struct uacce_queue *q, int event_mask)
{
dev_dbg(&q->uacce->dev, "mask notify: %x\n", event_mask);
}
static inline void dummy_wd2_hw_set_rb(struct dummy_wd2_hw_queue *hwq,
uint64_t rbpa, uint64_t rbsz,
uint64_t asid)
{
#define ring_io_off(member) offsetof(struct ring_io, member)
writeq(rbpa, hwq->ring_io_base + ring_io_off(rbpa));
writeq(rbsz, hwq->ring_io_base + ring_io_off(rbsz));
writeq(asid, hwq->ring_io_base + ring_io_off(asid));
}
static int dummy_wd2_start_queue(struct uacce_queue *q)
{
struct dummy_wd2_hw_queue *hwq = (struct dummy_wd2_hw_queue *)q->priv;
dev_dbg(&q->uacce->dev, "queue start\n");
hwq->bd_dma = q->qfrs[UACCE_QFRT_DUS]->iova;
if (!hwq->bd_dma) {
dev_err(&q->uacce->dev, "DUS is not created\n");
return -EINVAL;
}
if (!hwq->ring_io_base) {
dev_err(&q->uacce->dev, "ring_iobase is not set\n");
return -EINVAL;
}
dummy_wd2_hw_set_rb(hwq, hwq->bd_dma, Q_BDS, 0);
return 0;
}
void dummy_wd2_stop_queue(struct uacce_queue *q)
{
struct dummy_wd2_hw_queue *hwq = (struct dummy_wd2_hw_queue *)q->priv;
dev_dbg(&q->uacce->dev, "stop queue\n");
dummy_wd2_hw_set_rb(hwq, 0, 0, (uint64_t)-1);
}
static int dummy_wd2_get_available_instances(struct uacce *uacce)
{
int i, ret;
struct dummy_wd2_hw *hw = (struct dummy_wd2_hw *)uacce->priv;
mutex_lock(&hw->mutex);
for (i = 0, ret = 0; i < RING_NUM; i++) {
if (!hw->qs[i].used)
ret++;
}
mutex_unlock(&hw->mutex);
return ret;
}
static struct uacce_ops dummy_wd2_ops = {
.get_queue = dummy_wd2_get_queue,
.put_queue = dummy_wd2_put_queue,
.start_queue = dummy_wd2_start_queue,
.stop_queue = dummy_wd2_stop_queue,
.is_q_updated = dummy_wd2_is_q_updated,
.mmap = dummy_wd2_mmap,
.mask_notify = dummy_wd2_mask_notify,
.get_available_instances = dummy_wd2_get_available_instances,
};
static void dummy_wd2_dev_release(struct device *dev)
{
dev_info(dev, "dummy_wd2 dev release\n");
}
static int dummy_wd2_probe(struct platform_device *pdev)
{
struct uacce *uacce;
struct device *dev = &pdev->dev;
struct dummy_wd2_hw *hw;
struct resource *res;
int i, ret;
if (!of_device_is_compatible(dev->of_node, "warpdrive,wd_dummy_v2"))
return -EINVAL;
hw = devm_kzalloc(dev, sizeof(*hw), GFP_KERNEL);
if (!hw)
return -ENOMEM;
hw->ver = 2;
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
if (!res) {
dev_err(dev, "cannot find io space!\n");
return -ENODEV;
}
hw->io_base = devm_ioremap_resource(dev, res);
if (IS_ERR(hw->io_base))
return PTR_ERR(hw->io_base);
hw->pt = dmam_alloc_coherent(dev,
sizeof(struct pt_entry) * MAX_PT_ENTRIES,
&hw->pt_dma, GFP_KERNEL);
if (!hw->pt)
return -ENOMEM;
hw->max_copy_size = (int)readq(hw->io_base + DUMMY2_IO_MAX_COPY_SIZE);
mutex_init(&hw->mutex);
for (i = 0; i < MAX_PT_ENTRIES; i++) {
hw->pt[i].asid = (uint64_t)-1;
hw->pt[i].iova = 0x1111222233334444;
hw->pt[i].pa = 0x5555666677778888;
}
dev_info(dev, "v2 device (%llx, %llx), header: %llx\n",
(u64)hw->pt, hw->pt_dma, readq(hw->io_base + DUMMY2_IO_TAG));
/* set page tables */
writeq(hw->pt_dma, hw->io_base + DUMMY2_IO_PTPA);
writeq(MAX_PT_ENTRIES, hw->io_base + DUMMY2_IO_PTSZ);
for (i = 0; i < RING_NUM; i++) {
hw->qs[i].used = false;
hw->qs[i].db_pa = (void __iomem *)res->start +
((i+1)<<DUMMY2_DMA_PAGE_SHIFT);
hw->qs[i].ring_io_base = hw->io_base + DUMMY2_IO_RING_BEGIN +
sizeof(struct ring_io) * i;
hw->qs[i].hw = hw;
hw->qs[i].q.priv = &hw->qs[i];
}
hw->dummy_wd2_dev.parent = dev;
hw->dummy_wd2_dev.bus = &dummy_wd2_bus_type;
hw->dummy_wd2_dev.release = dummy_wd2_dev_release;
/* assume I have only one device now */
dev_set_name(&hw->dummy_wd2_dev, "dummy_wd2-%d", 0);
dev_set_drvdata(&hw->dummy_wd2_dev, hw);
ret = device_register(&hw->dummy_wd2_dev);
if (ret) {
/* device_register have 2 steps, the first one always success
* and set refcount to 1
*/
goto err_with_device;
}
hw->iommu_group = iommu_group_alloc();
if (IS_ERR(hw->iommu_group)) {
ret = -ENOMEM;
goto err_with_device;
}
iommu_group_set_name(hw->iommu_group, "dummy_hw2_iommu");
iommu_group_set_iommudata(hw->iommu_group, &hw, NULL);
ret = iommu_group_add_device(hw->iommu_group, &hw->dummy_wd2_dev);
if (ret)
goto err_with_group;
uacce = devm_kzalloc(&pdev->dev, sizeof(struct uacce), GFP_KERNEL);
if (!uacce) {
ret = -ENOMEM;
goto err_with_group;
}
platform_set_drvdata(pdev, uacce);
uacce->name = DUMMY2_WD;
uacce->pdev = &hw->dummy_wd2_dev;
uacce->priv = hw;
uacce->ops = &dummy_wd2_ops;
uacce->drv_name = DUMMY2_WD;
uacce->algs = "memcpy\n";
uacce->api_ver = "dummy_v2";
uacce->flags = 0;
uacce->qf_pg_start[UACCE_QFRT_MMIO] = 0;
uacce->qf_pg_start[UACCE_QFRT_DUS] = 1;
uacce->qf_pg_start[UACCE_QFRT_SS] = 2;
ret = uacce_register(uacce);
if (ret) {
dev_warn(uacce->pdev, "uacce register fail %d\n", ret);
goto err_with_group;
}
return 0;
err_with_group:
iommu_group_put(hw->iommu_group);
err_with_device:
put_device(&hw->dummy_wd2_dev);
return ret;
}
static int dummy_wd2_remove(struct platform_device *pdev)
{
struct uacce *uacce = (struct uacce *)pdev->dev.driver_data;
uacce_unregister(uacce);
return 0;
}
static struct platform_driver dummy_wd2_pdrv = {
.probe = dummy_wd2_probe,
.remove = dummy_wd2_remove,
.driver = {
.name = DUMMY2_WD,
},
};
static int __init dummy_wd2_init(void)
{
int ret;
ret = bus_register(&dummy_wd2_bus_type);
if (ret)
return ret;
ret = platform_driver_register(&dummy_wd2_pdrv);
if (ret) {
bus_unregister(&dummy_wd2_bus_type);
return ret;
}
return 0;
}
static void __exit dummy_wd2_exit(void)
{
platform_driver_unregister(&dummy_wd2_pdrv);
bus_unregister(&dummy_wd2_bus_type);
}
module_init(dummy_wd2_init);
module_exit(dummy_wd2_exit);
MODULE_AUTHOR("Kenneth Lee<liguozhu@hisilicon.com>");
MODULE_LICENSE("GPL v2");
/* SPDX-License-Identifier: GPL-2.0-or-later */
/* Copyright (c) 2018-2019 HiSilicon Limited. */
/*
* This file defines the dummy algo interface between the user and kernel space
*/
#ifndef __DUMMY_USR_IF_H
#define __DUMMY_USR_IF_H
/* Algorithm name */
#define AN_DUMMY_MEMCPY "memcopy"
#define AAN_AFLAGS "aflags"
#define AAN_MAX_COPY_SIZE "max_copy_size"
struct wd_dummy_cpy_param {
int flags;
int max_copy_size;
};
struct wd_dummy_cpy_msg {
char *src_addr;
char *tgt_addr;
size_t size;
void *ptr;
__u32 ret;
};
#endif
/* SPDX-License-Identifier: GPL-2.0-or-later */ /* SPDX-License-Identifier: GPL-2.0-or-later */
/* Copyright (c) 2018-2019 HiSilicon Limited. */ /* Copyright (c) 2018-2019 HiSilicon Limited. */
#ifndef __UACCE_H #ifndef _LINUX_UACCE_H
#define __UACCE_H #define _LINUX_UACCE_H
#include <linux/cdev.h> #include <linux/cdev.h>
#include <linux/device.h> #include <linux/device.h>
#include <linux/fs.h> #include <linux/fs.h>
#include <linux/list.h> #include <linux/list.h>
#include <linux/iommu.h> #include <linux/iommu.h>
#include <uapi/linux/uacce.h> #include <uapi/misc/uacce/uacce.h>
struct uacce_queue; struct uacce_queue;
struct uacce; struct uacce;
...@@ -132,4 +132,4 @@ const char *uacce_qfrt_str(struct uacce_qfile_region *qfr); ...@@ -132,4 +132,4 @@ const char *uacce_qfrt_str(struct uacce_qfile_region *qfr);
struct uacce *dev_to_uacce(struct device *dev); struct uacce *dev_to_uacce(struct device *dev);
int uacce_hw_err_isolate(struct uacce *uacce); int uacce_hw_err_isolate(struct uacce *uacce);
#endif #endif /* _LINUX_UACCE_H */
/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
#ifndef _UAPI_HISI_QM_H
#define _UAPI_HISI_QM_H
#include <linux/types.h>
#define QM_CQE_SIZE 16
/* default queue depth for sq/cq/eq */
#define QM_Q_DEPTH 1024
/* page number for queue file region */
#define QM_DOORBELL_PAGE_NR 1
#define QM_DKO_PAGE_NR 4
#define QM_DUS_PAGE_NR 36
#define QM_DOORBELL_PG_START 0
#define QM_DKO_PAGE_START (QM_DOORBELL_PG_START + QM_DOORBELL_PAGE_NR)
#define QM_DUS_PAGE_START (QM_DKO_PAGE_START + QM_DKO_PAGE_NR)
#define QM_SS_PAGE_START (QM_DUS_PAGE_START + QM_DUS_PAGE_NR)
#define QM_DOORBELL_OFFSET 0x340
#define QM_V2_DOORBELL_OFFSET 0x1000
struct cqe {
__le32 rsvd0;
__le16 cmd_id;
__le16 rsvd1;
__le16 sq_head;
__le16 sq_num;
__le16 rsvd2;
__le16 w7;
};
/**
* struct hisi_qp_ctx - User data for hisi qp.
* @id: qp_index return to user space
* @qc_type: Accelerator algorithm type
*/
struct hisi_qp_ctx {
__u16 id;
__u16 qc_type;
};
#define HISI_QM_API_VER_BASE "hisi_qm_v1"
#define HISI_QM_API_VER2_BASE "hisi_qm_v2"
/* UACCE_CMD_QM_SET_QP_CTX: Set qp algorithm type */
#define UACCE_CMD_QM_SET_QP_CTX _IOWR('H', 10, struct hisi_qp_ctx)
#endif
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册