diff --git a/drivers/misc/habanalabs/debugfs.c b/drivers/misc/habanalabs/debugfs.c index 20413e350343adf2ef841837ed3b8e491231a7df..599d17dfd5429d9a7935d0eddbb78ab1271c4ed9 100644 --- a/drivers/misc/habanalabs/debugfs.c +++ b/drivers/misc/habanalabs/debugfs.c @@ -393,9 +393,10 @@ static int mmu_show(struct seq_file *s, void *data) } is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size, - prop->va_space_dram_start_address, - prop->va_space_dram_end_address); + prop->dmmu.start_addr, + prop->dmmu.end_addr); + /* shifts and masks are the same in PMMU and HPMMU, use one of them */ mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu; mutex_lock(&ctx->mmu_lock); @@ -547,12 +548,15 @@ static bool hl_is_device_va(struct hl_device *hdev, u64 addr) goto out; if (hdev->dram_supports_virtual_memory && - addr >= prop->va_space_dram_start_address && - addr < prop->va_space_dram_end_address) + (addr >= prop->dmmu.start_addr && addr < prop->dmmu.end_addr)) return true; - if (addr >= prop->va_space_host_start_address && - addr < prop->va_space_host_end_address) + if (addr >= prop->pmmu.start_addr && + addr < prop->pmmu.end_addr) + return true; + + if (addr >= prop->pmmu_huge.start_addr && + addr < prop->pmmu_huge.end_addr) return true; out: return false; @@ -575,9 +579,10 @@ static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr, } is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size, - prop->va_space_dram_start_address, - prop->va_space_dram_end_address); + prop->dmmu.start_addr, + prop->dmmu.end_addr); + /* shifts and masks are the same in PMMU and HPMMU, use one of them */ mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu; mutex_lock(&ctx->mmu_lock); diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 3c6794883db1cbbea45fd0d5f8fff0bc1a87165a..74785ccd2cb1c72988b02126119248eb16a159f1 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -393,19 +393,21 @@ void goya_get_fixed_properties(struct hl_device *hdev) prop->dmmu.hop2_mask = HOP2_MASK; prop->dmmu.hop3_mask = HOP3_MASK; prop->dmmu.hop4_mask = HOP4_MASK; - prop->dmmu.huge_page_size = PAGE_SIZE_2MB; + prop->dmmu.start_addr = VA_DDR_SPACE_START; + prop->dmmu.end_addr = VA_DDR_SPACE_END; + prop->dmmu.page_size = PAGE_SIZE_2MB; - /* No difference between PMMU and DMMU except of page size */ + /* shifts and masks are the same in PMMU and DMMU */ memcpy(&prop->pmmu, &prop->dmmu, sizeof(prop->dmmu)); - prop->dmmu.page_size = PAGE_SIZE_2MB; + prop->pmmu.start_addr = VA_HOST_SPACE_START; + prop->pmmu.end_addr = VA_HOST_SPACE_END; prop->pmmu.page_size = PAGE_SIZE_4KB; - prop->va_space_host_start_address = VA_HOST_SPACE_START; - prop->va_space_host_end_address = VA_HOST_SPACE_END; - prop->va_space_dram_start_address = VA_DDR_SPACE_START; - prop->va_space_dram_end_address = VA_DDR_SPACE_END; - prop->dram_size_for_default_page_mapping = - prop->va_space_dram_end_address; + /* PMMU and HPMMU are the same except of page size */ + memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu)); + prop->pmmu_huge.page_size = PAGE_SIZE_2MB; + + prop->dram_size_for_default_page_mapping = VA_DDR_SPACE_END; prop->cfg_size = CFG_SIZE; prop->max_asid = MAX_ASID; prop->num_of_events = GOYA_ASYNC_EVENT_ID_SIZE; @@ -3443,12 +3445,13 @@ static int goya_validate_dma_pkt_mmu(struct hl_device *hdev, /* * WA for HW-23. * We can't allow user to read from Host using QMANs other than 1. + * PMMU and HPMMU addresses are equal, check only one of them. */ if (parser->hw_queue_id != GOYA_QUEUE_ID_DMA_1 && hl_mem_area_inside_range(le64_to_cpu(user_dma_pkt->src_addr), le32_to_cpu(user_dma_pkt->tsize), - hdev->asic_prop.va_space_host_start_address, - hdev->asic_prop.va_space_host_end_address)) { + hdev->asic_prop.pmmu.start_addr, + hdev->asic_prop.pmmu.end_addr)) { dev_err(hdev->dev, "Can't DMA from host on queue other then 1\n"); return -EFAULT; diff --git a/drivers/misc/habanalabs/goya/goya_coresight.c b/drivers/misc/habanalabs/goya/goya_coresight.c index c1ee6e2b5dfff495da19e68cba5753adf3c81d72..a1bc930d904f4435d9c789336c010cccfb87d71b 100644 --- a/drivers/misc/habanalabs/goya/goya_coresight.c +++ b/drivers/misc/habanalabs/goya/goya_coresight.c @@ -364,8 +364,8 @@ static int goya_etr_validate_address(struct hl_device *hdev, u64 addr, u64 range_start, range_end; if (hdev->mmu_enable) { - range_start = prop->va_space_dram_start_address; - range_end = prop->va_space_dram_end_address; + range_start = prop->dmmu.start_addr; + range_end = prop->dmmu.end_addr; } else { range_start = prop->dram_user_base_address; range_end = prop->dram_end_address; diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h index df34227dea312f3234403ba450edc2496787f2e8..5c751b9517c00795b5f2e3bc6959dce3544497c0 100644 --- a/drivers/misc/habanalabs/habanalabs.h +++ b/drivers/misc/habanalabs/habanalabs.h @@ -132,6 +132,8 @@ enum hl_device_hw_state { /** * struct hl_mmu_properties - ASIC specific MMU address translation properties. + * @start_addr: virtual start address of the memory region. + * @end_addr: virtual end address of the memory region. * @hop0_shift: shift of hop 0 mask. * @hop1_shift: shift of hop 1 mask. * @hop2_shift: shift of hop 2 mask. @@ -143,9 +145,10 @@ enum hl_device_hw_state { * @hop3_mask: mask to get the PTE address in hop 3. * @hop4_mask: mask to get the PTE address in hop 4. * @page_size: default page size used to allocate memory. - * @huge_page_size: page size used to allocate memory with huge pages. */ struct hl_mmu_properties { + u64 start_addr; + u64 end_addr; u64 hop0_shift; u64 hop1_shift; u64 hop2_shift; @@ -157,7 +160,6 @@ struct hl_mmu_properties { u64 hop3_mask; u64 hop4_mask; u32 page_size; - u32 huge_page_size; }; /** @@ -169,6 +171,8 @@ struct hl_mmu_properties { * @preboot_ver: F/W Preboot version. * @dmmu: DRAM MMU address translation properties. * @pmmu: PCI (host) MMU address translation properties. + * @pmmu_huge: PCI (host) MMU address translation properties for memory + * allocated with huge pages. * @sram_base_address: SRAM physical start address. * @sram_end_address: SRAM physical end address. * @sram_user_base_address - SRAM physical start address for user access. @@ -178,14 +182,6 @@ struct hl_mmu_properties { * @dram_size: DRAM total size. * @dram_pci_bar_size: size of PCI bar towards DRAM. * @max_power_default: max power of the device after reset - * @va_space_host_start_address: base address of virtual memory range for - * mapping host memory. - * @va_space_host_end_address: end address of virtual memory range for - * mapping host memory. - * @va_space_dram_start_address: base address of virtual memory range for - * mapping DRAM memory. - * @va_space_dram_end_address: end address of virtual memory range for - * mapping DRAM memory. * @dram_size_for_default_page_mapping: DRAM size needed to map to avoid page * fault. * @pcie_dbi_base_address: Base address of the PCIE_DBI block. @@ -218,6 +214,7 @@ struct asic_fixed_properties { char preboot_ver[VERSION_MAX_LEN]; struct hl_mmu_properties dmmu; struct hl_mmu_properties pmmu; + struct hl_mmu_properties pmmu_huge; u64 sram_base_address; u64 sram_end_address; u64 sram_user_base_address; @@ -227,10 +224,6 @@ struct asic_fixed_properties { u64 dram_size; u64 dram_pci_bar_size; u64 max_power_default; - u64 va_space_host_start_address; - u64 va_space_host_end_address; - u64 va_space_dram_start_address; - u64 va_space_dram_end_address; u64 dram_size_for_default_page_mapping; u64 pcie_dbi_base_address; u64 pcie_aux_dbi_reg_addr; @@ -658,6 +651,8 @@ struct hl_va_range { * this hits 0l. It is incremented on CS and CS_WAIT. * @cs_pending: array of DMA fence objects representing pending CS. * @host_va_range: holds available virtual addresses for host mappings. + * @host_huge_va_range: holds available virtual addresses for host mappings + * with huge pages. * @dram_va_range: holds available virtual addresses for DRAM mappings. * @mem_hash_lock: protects the mem_hash. * @mmu_lock: protects the MMU page tables. Any change to the PGT, modifing the @@ -688,8 +683,9 @@ struct hl_ctx { struct hl_device *hdev; struct kref refcount; struct dma_fence *cs_pending[HL_MAX_PENDING_CS]; - struct hl_va_range host_va_range; - struct hl_va_range dram_va_range; + struct hl_va_range *host_va_range; + struct hl_va_range *host_huge_va_range; + struct hl_va_range *dram_va_range; struct mutex mem_hash_lock; struct mutex mmu_lock; struct list_head debugfs_list; @@ -1291,6 +1287,8 @@ struct hl_device_idle_busy_ts { * otherwise. * @dram_supports_virtual_memory: is MMU enabled towards DRAM. * @dram_default_page_mapping: is DRAM default page mapping enabled. + * @pmmu_huge_range: is a different virtual addresses range used for PMMU with + * huge pages. * @init_done: is the initialization of the device done. * @mmu_enable: is MMU enabled. * @device_cpu_disabled: is the device CPU disabled (due to timeouts) @@ -1372,6 +1370,7 @@ struct hl_device { u8 reset_on_lockup; u8 dram_supports_virtual_memory; u8 dram_default_page_mapping; + u8 pmmu_huge_range; u8 init_done; u8 device_cpu_disabled; u8 dma_mask; diff --git a/drivers/misc/habanalabs/memory.c b/drivers/misc/habanalabs/memory.c index b612b1ad0aacd625489d4ec01f84606df69c4fef..a72f766ca470148ac596dbb11055516a04470f31 100644 --- a/drivers/misc/habanalabs/memory.c +++ b/drivers/misc/habanalabs/memory.c @@ -530,7 +530,7 @@ static u64 get_va_block(struct hl_device *hdev, * or not, hence we continue with the biggest possible * granularity. */ - page_size = hdev->asic_prop.pmmu.huge_page_size; + page_size = hdev->asic_prop.pmmu_huge.page_size; else page_size = hdev->asic_prop.dmmu.page_size; @@ -638,13 +638,12 @@ static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx, struct hl_userptr *userptr, struct hl_vm_phys_pg_pack **pphys_pg_pack) { - struct hl_mmu_properties *mmu_prop = &ctx->hdev->asic_prop.pmmu; struct hl_vm_phys_pg_pack *phys_pg_pack; struct scatterlist *sg; dma_addr_t dma_addr; u64 page_mask, total_npages; u32 npages, page_size = PAGE_SIZE, - huge_page_size = mmu_prop->huge_page_size; + huge_page_size = ctx->hdev->asic_prop.pmmu_huge.page_size; bool first = true, is_huge_page_opt = true; int rc, i, j; u32 pgs_in_huge_page = huge_page_size >> __ffs(page_size); @@ -856,6 +855,7 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, struct hl_vm_phys_pg_pack *phys_pg_pack; struct hl_userptr *userptr = NULL; struct hl_vm_hash_node *hnode; + struct hl_va_range *va_range; enum vm_type_t *vm_type; u64 ret_vaddr, hint_addr; u32 handle = 0; @@ -927,9 +927,16 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, goto hnode_err; } - ret_vaddr = get_va_block(hdev, - is_userptr ? &ctx->host_va_range : &ctx->dram_va_range, - phys_pg_pack->total_size, hint_addr, is_userptr); + if (is_userptr) + if (phys_pg_pack->page_size == hdev->asic_prop.pmmu.page_size) + va_range = ctx->host_va_range; + else + va_range = ctx->host_huge_va_range; + else + va_range = ctx->dram_va_range; + + ret_vaddr = get_va_block(hdev, va_range, phys_pg_pack->total_size, + hint_addr, is_userptr); if (!ret_vaddr) { dev_err(hdev->dev, "no available va block for handle %u\n", handle); @@ -968,10 +975,8 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, return 0; map_err: - if (add_va_block(hdev, - is_userptr ? &ctx->host_va_range : &ctx->dram_va_range, - ret_vaddr, - ret_vaddr + phys_pg_pack->total_size - 1)) + if (add_va_block(hdev, va_range, ret_vaddr, + ret_vaddr + phys_pg_pack->total_size - 1)) dev_warn(hdev->dev, "release va block failed for handle 0x%x, vaddr: 0x%llx\n", handle, ret_vaddr); @@ -1033,7 +1038,6 @@ static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr, bool ctx_free) if (*vm_type == VM_TYPE_USERPTR) { is_userptr = true; - va_range = &ctx->host_va_range; userptr = hnode->ptr; rc = init_phys_pg_pack_from_userptr(ctx, userptr, &phys_pg_pack); @@ -1043,9 +1047,15 @@ static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr, bool ctx_free) vaddr); goto vm_type_err; } + + if (phys_pg_pack->page_size == + hdev->asic_prop.pmmu.page_size) + va_range = ctx->host_va_range; + else + va_range = ctx->host_huge_va_range; } else if (*vm_type == VM_TYPE_PHYS_PACK) { is_userptr = false; - va_range = &ctx->dram_va_range; + va_range = ctx->dram_va_range; phys_pg_pack = hnode->ptr; } else { dev_warn(hdev->dev, @@ -1441,19 +1451,18 @@ bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr, } /* - * hl_va_range_init - initialize virtual addresses range - * - * @hdev : pointer to the habanalabs device structure - * @va_range : pointer to the range to initialize - * @start : range start address - * @end : range end address + * va_range_init - initialize virtual addresses range + * @hdev: pointer to the habanalabs device structure + * @va_range: pointer to the range to initialize + * @start: range start address + * @end: range end address * * This function does the following: * - Initializes the virtual addresses list of the given range with the given * addresses. */ -static int hl_va_range_init(struct hl_device *hdev, - struct hl_va_range *va_range, u64 start, u64 end) +static int va_range_init(struct hl_device *hdev, struct hl_va_range *va_range, + u64 start, u64 end) { int rc; @@ -1488,47 +1497,105 @@ static int hl_va_range_init(struct hl_device *hdev, } /* - * hl_vm_ctx_init_with_ranges - initialize virtual memory for context + * va_range_fini() - clear a virtual addresses range + * @hdev: pointer to the habanalabs structure + * va_range: pointer to virtual addresses range * - * @ctx : pointer to the habanalabs context structure - * @host_range_start : host virtual addresses range start - * @host_range_end : host virtual addresses range end - * @dram_range_start : dram virtual addresses range start - * @dram_range_end : dram virtual addresses range end + * This function does the following: + * - Frees the virtual addresses block list and its lock + */ +static void va_range_fini(struct hl_device *hdev, + struct hl_va_range *va_range) +{ + mutex_lock(&va_range->lock); + clear_va_list_locked(hdev, &va_range->list); + mutex_unlock(&va_range->lock); + + mutex_destroy(&va_range->lock); + kfree(va_range); +} + +/* + * vm_ctx_init_with_ranges() - initialize virtual memory for context + * @ctx: pointer to the habanalabs context structure + * @host_range_start: host virtual addresses range start. + * @host_range_end: host virtual addresses range end. + * @host_huge_range_start: host virtual addresses range start for memory + * allocated with huge pages. + * @host_huge_range_end: host virtual addresses range end for memory allocated + * with huge pages. + * @dram_range_start: dram virtual addresses range start. + * @dram_range_end: dram virtual addresses range end. * * This function initializes the following: * - MMU for context * - Virtual address to area descriptor hashtable * - Virtual block list of available virtual memory */ -static int hl_vm_ctx_init_with_ranges(struct hl_ctx *ctx, u64 host_range_start, - u64 host_range_end, u64 dram_range_start, - u64 dram_range_end) +static int vm_ctx_init_with_ranges(struct hl_ctx *ctx, + u64 host_range_start, + u64 host_range_end, + u64 host_huge_range_start, + u64 host_huge_range_end, + u64 dram_range_start, + u64 dram_range_end) { struct hl_device *hdev = ctx->hdev; int rc; + ctx->host_va_range = kzalloc(sizeof(*ctx->host_va_range), GFP_KERNEL); + if (!ctx->host_va_range) + return -ENOMEM; + + ctx->host_huge_va_range = kzalloc(sizeof(*ctx->host_huge_va_range), + GFP_KERNEL); + if (!ctx->host_huge_va_range) { + rc = -ENOMEM; + goto host_huge_va_range_err; + } + + ctx->dram_va_range = kzalloc(sizeof(*ctx->dram_va_range), GFP_KERNEL); + if (!ctx->dram_va_range) { + rc = -ENOMEM; + goto dram_va_range_err; + } + rc = hl_mmu_ctx_init(ctx); if (rc) { dev_err(hdev->dev, "failed to init context %d\n", ctx->asid); - return rc; + goto mmu_ctx_err; } mutex_init(&ctx->mem_hash_lock); hash_init(ctx->mem_hash); - mutex_init(&ctx->host_va_range.lock); + mutex_init(&ctx->host_va_range->lock); - rc = hl_va_range_init(hdev, &ctx->host_va_range, host_range_start, - host_range_end); + rc = va_range_init(hdev, ctx->host_va_range, host_range_start, + host_range_end); if (rc) { dev_err(hdev->dev, "failed to init host vm range\n"); - goto host_vm_err; + goto host_page_range_err; + } + + if (hdev->pmmu_huge_range) { + mutex_init(&ctx->host_huge_va_range->lock); + + rc = va_range_init(hdev, ctx->host_huge_va_range, + host_huge_range_start, + host_huge_range_end); + if (rc) { + dev_err(hdev->dev, + "failed to init host huge vm range\n"); + goto host_hpage_range_err; + } + } else { + ctx->host_huge_va_range = ctx->host_va_range; } - mutex_init(&ctx->dram_va_range.lock); + mutex_init(&ctx->dram_va_range->lock); - rc = hl_va_range_init(hdev, &ctx->dram_va_range, dram_range_start, + rc = va_range_init(hdev, ctx->dram_va_range, dram_range_start, dram_range_end); if (rc) { dev_err(hdev->dev, "failed to init dram vm range\n"); @@ -1540,15 +1607,29 @@ static int hl_vm_ctx_init_with_ranges(struct hl_ctx *ctx, u64 host_range_start, return 0; dram_vm_err: - mutex_destroy(&ctx->dram_va_range.lock); + mutex_destroy(&ctx->dram_va_range->lock); - mutex_lock(&ctx->host_va_range.lock); - clear_va_list_locked(hdev, &ctx->host_va_range.list); - mutex_unlock(&ctx->host_va_range.lock); -host_vm_err: - mutex_destroy(&ctx->host_va_range.lock); + if (hdev->pmmu_huge_range) { + mutex_lock(&ctx->host_huge_va_range->lock); + clear_va_list_locked(hdev, &ctx->host_huge_va_range->list); + mutex_unlock(&ctx->host_huge_va_range->lock); + } +host_hpage_range_err: + if (hdev->pmmu_huge_range) + mutex_destroy(&ctx->host_huge_va_range->lock); + mutex_lock(&ctx->host_va_range->lock); + clear_va_list_locked(hdev, &ctx->host_va_range->list); + mutex_unlock(&ctx->host_va_range->lock); +host_page_range_err: + mutex_destroy(&ctx->host_va_range->lock); mutex_destroy(&ctx->mem_hash_lock); hl_mmu_ctx_fini(ctx); +mmu_ctx_err: + kfree(ctx->dram_va_range); +dram_va_range_err: + kfree(ctx->host_huge_va_range); +host_huge_va_range_err: + kfree(ctx->host_va_range); return rc; } @@ -1556,8 +1637,8 @@ static int hl_vm_ctx_init_with_ranges(struct hl_ctx *ctx, u64 host_range_start, int hl_vm_ctx_init(struct hl_ctx *ctx) { struct asic_fixed_properties *prop = &ctx->hdev->asic_prop; - u64 host_range_start, host_range_end, dram_range_start, - dram_range_end; + u64 host_range_start, host_range_end, host_huge_range_start, + host_huge_range_end, dram_range_start, dram_range_end; atomic64_set(&ctx->dram_phys_mem, 0); @@ -1569,38 +1650,26 @@ int hl_vm_ctx_init(struct hl_ctx *ctx) * address of the memory related to the given handle. */ if (ctx->hdev->mmu_enable) { - dram_range_start = prop->va_space_dram_start_address; - dram_range_end = prop->va_space_dram_end_address; - host_range_start = prop->va_space_host_start_address; - host_range_end = prop->va_space_host_end_address; + dram_range_start = prop->dmmu.start_addr; + dram_range_end = prop->dmmu.end_addr; + host_range_start = prop->pmmu.start_addr; + host_range_end = prop->pmmu.end_addr; + host_huge_range_start = prop->pmmu_huge.start_addr; + host_huge_range_end = prop->pmmu_huge.end_addr; } else { dram_range_start = prop->dram_user_base_address; dram_range_end = prop->dram_end_address; host_range_start = prop->dram_user_base_address; host_range_end = prop->dram_end_address; + host_huge_range_start = prop->dram_user_base_address; + host_huge_range_end = prop->dram_end_address; } - return hl_vm_ctx_init_with_ranges(ctx, host_range_start, host_range_end, - dram_range_start, dram_range_end); -} - -/* - * hl_va_range_fini - clear a virtual addresses range - * - * @hdev : pointer to the habanalabs structure - * va_range : pointer to virtual addresses range - * - * This function does the following: - * - Frees the virtual addresses block list and its lock - */ -static void hl_va_range_fini(struct hl_device *hdev, - struct hl_va_range *va_range) -{ - mutex_lock(&va_range->lock); - clear_va_list_locked(hdev, &va_range->list); - mutex_unlock(&va_range->lock); - - mutex_destroy(&va_range->lock); + return vm_ctx_init_with_ranges(ctx, host_range_start, host_range_end, + host_huge_range_start, + host_huge_range_end, + dram_range_start, + dram_range_end); } /* @@ -1667,8 +1736,10 @@ void hl_vm_ctx_fini(struct hl_ctx *ctx) } spin_unlock(&vm->idr_lock); - hl_va_range_fini(hdev, &ctx->dram_va_range); - hl_va_range_fini(hdev, &ctx->host_va_range); + va_range_fini(hdev, ctx->dram_va_range); + if (hdev->pmmu_huge_range) + va_range_fini(hdev, ctx->host_huge_va_range); + va_range_fini(hdev, ctx->host_va_range); mutex_destroy(&ctx->mem_hash_lock); hl_mmu_ctx_fini(ctx); diff --git a/drivers/misc/habanalabs/mmu.c b/drivers/misc/habanalabs/mmu.c index 006eee47909d4667a0fc9c6f085738a3db22db2b..a290d6b49d78850f98b823c78015ec33c533f59f 100644 --- a/drivers/misc/habanalabs/mmu.c +++ b/drivers/misc/habanalabs/mmu.c @@ -254,6 +254,15 @@ static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr) return phys_hop_addr + pte_offset; } +static bool is_dram_va(struct hl_device *hdev, u64 virt_addr) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + + return hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size, + prop->dmmu.start_addr, + prop->dmmu.end_addr); +} + static int dram_default_mapping_init(struct hl_ctx *ctx) { struct hl_device *hdev = ctx->hdev; @@ -548,6 +557,7 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, bool is_dram_addr) curr_pte; bool is_huge, clear_hop3 = true; + /* shifts and masks are the same in PMMU and HPMMU, use one of them */ mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu; hop0_addr = get_hop0_addr(ctx); @@ -702,26 +712,25 @@ int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size, if (!hdev->mmu_enable) return 0; - is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size, - prop->va_space_dram_start_address, - prop->va_space_dram_end_address); + is_dram_addr = is_dram_va(hdev, virt_addr); - mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu; + if (is_dram_addr) + mmu_prop = &prop->dmmu; + else if ((page_size % prop->pmmu_huge.page_size) == 0) + mmu_prop = &prop->pmmu_huge; + else + mmu_prop = &prop->pmmu; /* * The H/W handles mapping of specific page sizes. Hence if the page * size is bigger, we break it to sub-pages and unmap them separately. */ - if ((page_size % mmu_prop->huge_page_size) == 0) { - real_page_size = mmu_prop->huge_page_size; - } else if ((page_size % mmu_prop->page_size) == 0) { + if ((page_size % mmu_prop->page_size) == 0) { real_page_size = mmu_prop->page_size; } else { dev_err(hdev->dev, - "page size of %u is not %uKB nor %uMB aligned, can't unmap\n", - page_size, - mmu_prop->page_size >> 10, - mmu_prop->huge_page_size >> 20); + "page size of %u is not %uKB aligned, can't unmap\n", + page_size, mmu_prop->page_size >> 10); return -EFAULT; } @@ -759,8 +768,6 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, hop4_new = false, is_huge; int rc = -ENOMEM; - mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu; - /* * This mapping function can map a page or a huge page. For huge page * there are only 3 hops rather than 4. Currently the DRAM allocation @@ -768,11 +775,15 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, * one of the two page sizes. Since this is a common code for all the * three cases, we need this hugs page check. */ - is_huge = page_size == mmu_prop->huge_page_size; - - if (is_dram_addr && !is_huge) { - dev_err(hdev->dev, "DRAM mapping should use huge pages only\n"); - return -EFAULT; + if (is_dram_addr) { + mmu_prop = &prop->dmmu; + is_huge = true; + } else if (page_size == prop->pmmu_huge.page_size) { + mmu_prop = &prop->pmmu_huge; + is_huge = true; + } else { + mmu_prop = &prop->pmmu; + is_huge = false; } hop0_addr = get_hop0_addr(ctx); @@ -942,26 +953,25 @@ int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size, if (!hdev->mmu_enable) return 0; - is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size, - prop->va_space_dram_start_address, - prop->va_space_dram_end_address); + is_dram_addr = is_dram_va(hdev, virt_addr); - mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu; + if (is_dram_addr) + mmu_prop = &prop->dmmu; + else if ((page_size % prop->pmmu_huge.page_size) == 0) + mmu_prop = &prop->pmmu_huge; + else + mmu_prop = &prop->pmmu; /* * The H/W handles mapping of specific page sizes. Hence if the page * size is bigger, we break it to sub-pages and map them separately. */ - if ((page_size % mmu_prop->huge_page_size) == 0) { - real_page_size = mmu_prop->huge_page_size; - } else if ((page_size % mmu_prop->page_size) == 0) { + if ((page_size % mmu_prop->page_size) == 0) { real_page_size = mmu_prop->page_size; } else { dev_err(hdev->dev, - "page size of %u is not %dKB nor %dMB aligned, can't unmap\n", - page_size, - mmu_prop->page_size >> 10, - mmu_prop->huge_page_size >> 20); + "page size of %u is not %uKB aligned, can't unmap\n", + page_size, mmu_prop->page_size >> 10); return -EFAULT; }