提交 8e39e75a 编写于 作者: O Ofir Bitton 提交者: Oded Gabbay

habanalabs: Init the VM module for kernel context

In order for reserving VA ranges for kernel memory, we need
to allow the VM module to be initiated with kernel context.
Signed-off-by: NOfir Bitton <obitton@habana.ai>
Reviewed-by: NOded Gabbay <ogabbay@kernel.org>
Signed-off-by: NOded Gabbay <ogabbay@kernel.org>
上级 cb6ef0ee
...@@ -56,6 +56,8 @@ static void hl_ctx_fini(struct hl_ctx *ctx) ...@@ -56,6 +56,8 @@ static void hl_ctx_fini(struct hl_ctx *ctx)
idle_mask); idle_mask);
} else { } else {
dev_dbg(hdev->dev, "closing kernel context\n"); dev_dbg(hdev->dev, "closing kernel context\n");
hdev->asic_funcs->ctx_fini(ctx);
hl_vm_ctx_fini(ctx);
hl_mmu_ctx_fini(ctx); hl_mmu_ctx_fini(ctx);
} }
} }
...@@ -151,11 +153,18 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx) ...@@ -151,11 +153,18 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
if (is_kernel_ctx) { if (is_kernel_ctx) {
ctx->asid = HL_KERNEL_ASID_ID; /* Kernel driver gets ASID 0 */ ctx->asid = HL_KERNEL_ASID_ID; /* Kernel driver gets ASID 0 */
rc = hl_mmu_ctx_init(ctx); rc = hl_vm_ctx_init(ctx);
if (rc) { if (rc) {
dev_err(hdev->dev, "Failed to init mmu ctx module\n"); dev_err(hdev->dev, "Failed to init mem ctx module\n");
rc = -ENOMEM;
goto err_free_cs_pending; goto err_free_cs_pending;
} }
rc = hdev->asic_funcs->ctx_init(ctx);
if (rc) {
dev_err(hdev->dev, "ctx_init failed\n");
goto err_vm_ctx_fini;
}
} else { } else {
ctx->asid = hl_asid_alloc(hdev); ctx->asid = hl_asid_alloc(hdev);
if (!ctx->asid) { if (!ctx->asid) {
...@@ -194,6 +203,7 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx) ...@@ -194,6 +203,7 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
err_vm_ctx_fini: err_vm_ctx_fini:
hl_vm_ctx_fini(ctx); hl_vm_ctx_fini(ctx);
err_asid_free: err_asid_free:
if (ctx->asid != HL_KERNEL_ASID_ID)
hl_asid_free(hdev, ctx->asid); hl_asid_free(hdev, ctx->asid);
err_free_cs_pending: err_free_cs_pending:
kfree(ctx->cs_pending); kfree(ctx->cs_pending);
......
...@@ -1312,11 +1312,16 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass) ...@@ -1312,11 +1312,16 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
hdev->compute_ctx = NULL; hdev->compute_ctx = NULL;
hl_debugfs_add_device(hdev);
/* debugfs nodes are created in hl_ctx_init so it must be called after
* hl_debugfs_add_device.
*/
rc = hl_ctx_init(hdev, hdev->kernel_ctx, true); rc = hl_ctx_init(hdev, hdev->kernel_ctx, true);
if (rc) { if (rc) {
dev_err(hdev->dev, "failed to initialize kernel context\n"); dev_err(hdev->dev, "failed to initialize kernel context\n");
kfree(hdev->kernel_ctx); kfree(hdev->kernel_ctx);
goto mmu_fini; goto remove_device_from_debugfs;
} }
rc = hl_cb_pool_init(hdev); rc = hl_cb_pool_init(hdev);
...@@ -1325,8 +1330,6 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass) ...@@ -1325,8 +1330,6 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
goto release_ctx; goto release_ctx;
} }
hl_debugfs_add_device(hdev);
/* /*
* From this point, in case of an error, add char devices and create * From this point, in case of an error, add char devices and create
* sysfs nodes as part of the error flow, to allow debugging. * sysfs nodes as part of the error flow, to allow debugging.
...@@ -1415,6 +1418,8 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass) ...@@ -1415,6 +1418,8 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
if (hl_ctx_put(hdev->kernel_ctx) != 1) if (hl_ctx_put(hdev->kernel_ctx) != 1)
dev_err(hdev->dev, dev_err(hdev->dev,
"kernel ctx is still alive on initialization failure\n"); "kernel ctx is still alive on initialization failure\n");
remove_device_from_debugfs:
hl_debugfs_remove_device(hdev);
mmu_fini: mmu_fini:
hl_mmu_fini(hdev); hl_mmu_fini(hdev);
eq_fini: eq_fini:
...@@ -1513,8 +1518,6 @@ void hl_device_fini(struct hl_device *hdev) ...@@ -1513,8 +1518,6 @@ void hl_device_fini(struct hl_device *hdev)
device_late_fini(hdev); device_late_fini(hdev);
hl_debugfs_remove_device(hdev);
/* /*
* Halt the engines and disable interrupts so we won't get any more * Halt the engines and disable interrupts so we won't get any more
* completions from H/W and we won't have any accesses from the * completions from H/W and we won't have any accesses from the
...@@ -1546,6 +1549,8 @@ void hl_device_fini(struct hl_device *hdev) ...@@ -1546,6 +1549,8 @@ void hl_device_fini(struct hl_device *hdev)
if ((hdev->kernel_ctx) && (hl_ctx_put(hdev->kernel_ctx) != 1)) if ((hdev->kernel_ctx) && (hl_ctx_put(hdev->kernel_ctx) != 1))
dev_err(hdev->dev, "kernel ctx is still alive\n"); dev_err(hdev->dev, "kernel ctx is still alive\n");
hl_debugfs_remove_device(hdev);
hl_vm_fini(hdev); hl_vm_fini(hdev);
hl_mmu_fini(hdev); hl_mmu_fini(hdev);
......
...@@ -1929,7 +1929,8 @@ void hl_vm_ctx_fini(struct hl_ctx *ctx) ...@@ -1929,7 +1929,8 @@ void hl_vm_ctx_fini(struct hl_ctx *ctx)
* because the user notifies us on allocations. If the user is no more, * because the user notifies us on allocations. If the user is no more,
* all DRAM is available * all DRAM is available
*/ */
if (!ctx->hdev->asic_prop.dram_supports_virtual_memory) if (ctx->asid != HL_KERNEL_ASID_ID &&
!ctx->hdev->asic_prop.dram_supports_virtual_memory)
atomic64_set(&ctx->hdev->dram_used_mem, 0); atomic64_set(&ctx->hdev->dram_used_mem, 0);
} }
......
...@@ -7868,18 +7868,16 @@ static void gaudi_internal_cb_pool_fini(struct hl_device *hdev, ...@@ -7868,18 +7868,16 @@ static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
static int gaudi_ctx_init(struct hl_ctx *ctx) static int gaudi_ctx_init(struct hl_ctx *ctx)
{ {
if (ctx->asid == HL_KERNEL_ASID_ID)
return 0;
gaudi_mmu_prepare(ctx->hdev, ctx->asid); gaudi_mmu_prepare(ctx->hdev, ctx->asid);
return gaudi_internal_cb_pool_init(ctx->hdev, ctx); return gaudi_internal_cb_pool_init(ctx->hdev, ctx);
} }
static void gaudi_ctx_fini(struct hl_ctx *ctx) static void gaudi_ctx_fini(struct hl_ctx *ctx)
{ {
struct hl_device *hdev = ctx->hdev; if (ctx->asid == HL_KERNEL_ASID_ID)
/* Gaudi will NEVER support more then a single compute context.
* Therefore, don't clear anything unless it is the compute context
*/
if (hdev->compute_ctx != ctx)
return; return;
gaudi_internal_cb_pool_fini(ctx->hdev, ctx); gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册