diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index a9fd3d352ef00db9fdba4d9d339527263768e4e1..57b2b9392cb225b113372c4f2ca1bf4958513ccb 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -4253,7 +4253,7 @@ static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr, { struct packet_lin_dma *lin_dma_pkt; struct hl_cs_job *job; - u32 cb_size, ctl; + u32 cb_size, ctl, err_cause; struct hl_cb *cb; int rc; @@ -4282,6 +4282,15 @@ static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr, goto release_cb; } + /* Verify DMA is OK */ + err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE); + if (err_cause && !hdev->init_done) { + dev_dbg(hdev->dev, + "Clearing DMA0 engine from errors (cause 0x%x)\n", + err_cause); + WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause); + } + job->id = 0; job->user_cb = cb; job->user_cb->cs_cnt++; @@ -4293,11 +4302,23 @@ static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr, hl_debugfs_add_job(hdev, job); rc = gaudi_send_job_on_qman0(hdev, job); - hl_debugfs_remove_job(hdev, job); kfree(job); cb->cs_cnt--; + /* Verify DMA is OK */ + err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE); + if (err_cause) { + dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause); + rc = -EIO; + if (!hdev->init_done) { + dev_dbg(hdev->dev, + "Clearing DMA0 engine from errors (cause 0x%x)\n", + err_cause); + WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause); + } + } + release_cb: hl_cb_put(cb); hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);