提交 bafa900a 编写于 作者: Z Zhen Lei 提交者: Zheng Zengkai

iommu/arm-smmu-v3: Add arm_smmu_ecmdq_issue_cmdlist() for non-shared ECMDQ

hulk inclusion
category: feature
bugzilla: 174251
CVE: NA

-------------------------------------------------------------------------

When a core can exclusively own an ECMDQ, competition with other cores
does not need to be considered during command insertion. Therefore, we can
delete the part of arm_smmu_cmdq_issue_cmdlist() that deals with
multi-core contention and generate a more efficient ECMDQ-specific
function arm_smmu_ecmdq_issue_cmdlist().
Signed-off-by: NZhen Lei <thunder.leizhen@huawei.com>
Reviewed-by: NHanjun Guo <guohanjun@huawei.com>
Signed-off-by: NZheng Zengkai <zhengzengkai@huawei.com>
上级 9a1685b3
......@@ -767,6 +767,87 @@ static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
}
}
/*
* The function is used when the current core exclusively occupies an ECMDQ.
* This is a reduced version of arm_smmu_cmdq_issue_cmdlist(), which eliminates
* a lot of unnecessary inter-core competition considerations.
*/
static int arm_smmu_ecmdq_issue_cmdlist(struct arm_smmu_device *smmu,
struct arm_smmu_cmdq *cmdq,
u64 *cmds, int n, bool sync)
{
u32 prod;
unsigned long flags;
struct arm_smmu_ll_queue llq = {
.max_n_shift = cmdq->q.llq.max_n_shift,
}, head;
int ret = 0;
/* 1. Allocate some space in the queue */
local_irq_save(flags);
llq.val = READ_ONCE(cmdq->q.llq.val);
do {
u64 old;
while (!queue_has_space(&llq, n + sync)) {
local_irq_restore(flags);
if (arm_smmu_cmdq_poll_until_not_full(smmu, &llq))
dev_err_ratelimited(smmu->dev, "ECMDQ timeout\n");
local_irq_save(flags);
}
head.cons = llq.cons;
head.prod = queue_inc_prod_n(&llq, n + sync);
old = cmpxchg_relaxed(&cmdq->q.llq.val, llq.val, head.val);
if (old == llq.val)
break;
llq.val = old;
} while (1);
/* 2. Write our commands into the queue */
arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
if (sync) {
u64 cmd_sync[CMDQ_ENT_DWORDS];
prod = queue_inc_prod_n(&llq, n);
arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, &cmdq->q, prod);
queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
}
/* 3. Ensuring commands are visible first */
dma_wmb();
/* 4. Advance the hardware prod pointer */
read_lock(&cmdq->q.ecmdq_lock);
writel_relaxed(head.prod | cmdq->q.ecmdq_prod, cmdq->q.prod_reg);
read_unlock(&cmdq->q.ecmdq_lock);
/* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */
if (sync) {
llq.prod = queue_inc_prod_n(&llq, n);
ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq);
if (ret) {
dev_err_ratelimited(smmu->dev,
"CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
llq.prod,
readl_relaxed(cmdq->q.prod_reg),
readl_relaxed(cmdq->q.cons_reg));
}
/*
* Update cmdq->q.llq.cons, to improve the success rate of
* queue_has_space() when some new commands are inserted next
* time.
*/
WRITE_ONCE(cmdq->q.llq.cons, llq.cons);
}
local_irq_restore(flags);
return ret;
}
/*
* This is the actual insertion function, and provides the following
* ordering guarantees to callers:
......@@ -796,6 +877,9 @@ static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
}, head = llq;
int ret = 0;
if (!cmdq->shared)
return arm_smmu_ecmdq_issue_cmdlist(smmu, cmdq, cmds, n, sync);
/* 1. Allocate some space in the queue */
local_irq_save(flags);
llq.val = READ_ONCE(cmdq->q.llq.val);
......@@ -4036,6 +4120,7 @@ static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu)
unsigned int nents = 1 << cmdq->q.llq.max_n_shift;
atomic_long_t *bitmap;
cmdq->shared = 1;
atomic_set(&cmdq->owner_prod, 0);
atomic_set(&cmdq->lock, 0);
......
......@@ -587,6 +587,7 @@ struct arm_smmu_cmdq {
atomic_long_t *valid_map;
atomic_t owner_prod;
atomic_t lock;
int shared;
};
struct arm_smmu_ecmdq {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册