提交 9d127ad5 编写于 作者: O Ofir Bitton 提交者: Oded Gabbay

habanalabs: indicate to user that a cs is gone

We want to indicate to the user that a certain command submission
is finished long time ago and it is no longer in database.
This means no further information regarding this cs can be obtained.
Signed-off-by: NOfir Bitton <obitton@habana.ai>
Reviewed-by: NOded Gabbay <ogabbay@kernel.org>
Signed-off-by: NOded Gabbay <ogabbay@kernel.org>
上级 64a9d5ab
...@@ -11,9 +11,22 @@ ...@@ -11,9 +11,22 @@
#include <linux/uaccess.h> #include <linux/uaccess.h>
#include <linux/slab.h> #include <linux/slab.h>
/**
* enum hl_cs_wait_status - cs wait status
* @CS_WAIT_STATUS_BUSY: cs was not completed yet
* @CS_WAIT_STATUS_COMPLETED: cs completed
* @CS_WAIT_STATUS_GONE: cs completed but fence is already gone
*/
enum hl_cs_wait_status {
CS_WAIT_STATUS_BUSY,
CS_WAIT_STATUS_COMPLETED,
CS_WAIT_STATUS_GONE
};
static void job_wq_completion(struct work_struct *work); static void job_wq_completion(struct work_struct *work);
static long _hl_cs_wait_ioctl(struct hl_device *hdev, static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
struct hl_ctx *ctx, u64 timeout_us, u64 seq); u64 timeout_us, u64 seq,
enum hl_cs_wait_status *status);
static void cs_do_release(struct kref *ref); static void cs_do_release(struct kref *ref);
static void hl_sob_reset(struct kref *ref) static void hl_sob_reset(struct kref *ref)
...@@ -942,7 +955,7 @@ static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args, ...@@ -942,7 +955,7 @@ static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args,
int rc = 0, do_ctx_switch; int rc = 0, do_ctx_switch;
void __user *chunks; void __user *chunks;
u32 num_chunks, tmp; u32 num_chunks, tmp;
long ret; int ret;
do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0); do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0);
...@@ -996,18 +1009,19 @@ static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args, ...@@ -996,18 +1009,19 @@ static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args,
/* Need to wait for restore completion before execution phase */ /* Need to wait for restore completion before execution phase */
if (num_chunks) { if (num_chunks) {
enum hl_cs_wait_status status;
wait_again: wait_again:
ret = _hl_cs_wait_ioctl(hdev, ctx, ret = _hl_cs_wait_ioctl(hdev, ctx,
jiffies_to_usecs(hdev->timeout_jiffies), jiffies_to_usecs(hdev->timeout_jiffies),
*cs_seq); *cs_seq, &status);
if (ret <= 0) { if (ret) {
if (ret == -ERESTARTSYS) { if (ret == -ERESTARTSYS) {
usleep_range(100, 200); usleep_range(100, 200);
goto wait_again; goto wait_again;
} }
dev_err(hdev->dev, dev_err(hdev->dev,
"Restore CS for context %d failed to complete %ld\n", "Restore CS for context %d failed to complete %d\n",
ctx->asid, ret); ctx->asid, ret);
rc = -ENOEXEC; rc = -ENOEXEC;
goto out; goto out;
...@@ -1337,12 +1351,14 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data) ...@@ -1337,12 +1351,14 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
return rc; return rc;
} }
static long _hl_cs_wait_ioctl(struct hl_device *hdev, static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
struct hl_ctx *ctx, u64 timeout_us, u64 seq) u64 timeout_us, u64 seq,
enum hl_cs_wait_status *status)
{ {
struct hl_fence *fence; struct hl_fence *fence;
unsigned long timeout; unsigned long timeout;
long rc; int rc = 0;
long completion_rc;
if (timeout_us == MAX_SCHEDULE_TIMEOUT) if (timeout_us == MAX_SCHEDULE_TIMEOUT)
timeout = timeout_us; timeout = timeout_us;
...@@ -1360,11 +1376,17 @@ static long _hl_cs_wait_ioctl(struct hl_device *hdev, ...@@ -1360,11 +1376,17 @@ static long _hl_cs_wait_ioctl(struct hl_device *hdev,
seq, ctx->cs_sequence); seq, ctx->cs_sequence);
} else if (fence) { } else if (fence) {
if (!timeout_us) if (!timeout_us)
rc = completion_done(&fence->completion); completion_rc = completion_done(&fence->completion);
else else
rc = wait_for_completion_interruptible_timeout( completion_rc =
wait_for_completion_interruptible_timeout(
&fence->completion, timeout); &fence->completion, timeout);
if (completion_rc > 0)
*status = CS_WAIT_STATUS_COMPLETED;
else
*status = CS_WAIT_STATUS_BUSY;
if (fence->error == -ETIMEDOUT) if (fence->error == -ETIMEDOUT)
rc = -ETIMEDOUT; rc = -ETIMEDOUT;
else if (fence->error == -EIO) else if (fence->error == -EIO)
...@@ -1375,7 +1397,7 @@ static long _hl_cs_wait_ioctl(struct hl_device *hdev, ...@@ -1375,7 +1397,7 @@ static long _hl_cs_wait_ioctl(struct hl_device *hdev,
dev_dbg(hdev->dev, dev_dbg(hdev->dev,
"Can't wait on seq %llu because current CS is at seq %llu (Fence is gone)\n", "Can't wait on seq %llu because current CS is at seq %llu (Fence is gone)\n",
seq, ctx->cs_sequence); seq, ctx->cs_sequence);
rc = 1; *status = CS_WAIT_STATUS_GONE;
} }
hl_ctx_put(ctx); hl_ctx_put(ctx);
...@@ -1387,14 +1409,16 @@ int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data) ...@@ -1387,14 +1409,16 @@ int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
{ {
struct hl_device *hdev = hpriv->hdev; struct hl_device *hdev = hpriv->hdev;
union hl_wait_cs_args *args = data; union hl_wait_cs_args *args = data;
enum hl_cs_wait_status status;
u64 seq = args->in.seq; u64 seq = args->in.seq;
long rc; int rc;
rc = _hl_cs_wait_ioctl(hdev, hpriv->ctx, args->in.timeout_us, seq); rc = _hl_cs_wait_ioctl(hdev, hpriv->ctx, args->in.timeout_us, seq,
&status);
memset(args, 0, sizeof(*args)); memset(args, 0, sizeof(*args));
if (rc < 0) { if (rc) {
if (rc == -ERESTARTSYS) { if (rc == -ERESTARTSYS) {
dev_err_ratelimited(hdev->dev, dev_err_ratelimited(hdev->dev,
"user process got signal while waiting for CS handle %llu\n", "user process got signal while waiting for CS handle %llu\n",
...@@ -1415,10 +1439,18 @@ int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data) ...@@ -1415,10 +1439,18 @@ int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
return rc; return rc;
} }
if (rc == 0) switch (status) {
args->out.status = HL_WAIT_CS_STATUS_BUSY; case CS_WAIT_STATUS_GONE:
else args->out.flags |= HL_WAIT_CS_STATUS_FLAG_GONE;
fallthrough;
case CS_WAIT_STATUS_COMPLETED:
args->out.status = HL_WAIT_CS_STATUS_COMPLETED; args->out.status = HL_WAIT_CS_STATUS_COMPLETED;
break;
case CS_WAIT_STATUS_BUSY:
default:
args->out.status = HL_WAIT_CS_STATUS_BUSY;
break;
}
return 0; return 0;
} }
...@@ -662,10 +662,13 @@ struct hl_wait_cs_in { ...@@ -662,10 +662,13 @@ struct hl_wait_cs_in {
#define HL_WAIT_CS_STATUS_ABORTED 3 #define HL_WAIT_CS_STATUS_ABORTED 3
#define HL_WAIT_CS_STATUS_INTERRUPTED 4 #define HL_WAIT_CS_STATUS_INTERRUPTED 4
#define HL_WAIT_CS_STATUS_FLAG_GONE 0x1
struct hl_wait_cs_out { struct hl_wait_cs_out {
/* HL_WAIT_CS_STATUS_* */ /* HL_WAIT_CS_STATUS_* */
__u32 status; __u32 status;
__u32 pad; /* HL_WAIT_CS_STATUS_FLAG* */
__u32 flags;
}; };
union hl_wait_cs_args { union hl_wait_cs_args {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册