/* * Copyright (c) 2016 Hisilicon Limited. * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include #include #include #include "hns_roce_device.h" #include "hns_roce_cmd.h" #include "hns_roce_hem.h" static u32 hw_index_to_key(unsigned long ind) { return (u32)(ind >> 24) | (ind << 8); } unsigned long key_to_hw_index(u32 key) { return (key << 24) | (key >> 8); } static int hns_roce_hw_create_mpt(struct hns_roce_dev *hr_dev, struct hns_roce_cmd_mailbox *mailbox, unsigned long mpt_index) { return hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, mpt_index, 0, HNS_ROCE_CMD_CREATE_MPT, HNS_ROCE_CMD_TIMEOUT_MSECS); } int hns_roce_hw_destroy_mpt(struct hns_roce_dev *hr_dev, struct hns_roce_cmd_mailbox *mailbox, unsigned long mpt_index) { return hns_roce_cmd_mbox(hr_dev, 0, mailbox ? mailbox->dma : 0, mpt_index, !mailbox, HNS_ROCE_CMD_DESTROY_MPT, HNS_ROCE_CMD_TIMEOUT_MSECS); } static int hns_roce_buddy_alloc(struct hns_roce_buddy *buddy, int order, unsigned long *seg) { int o; u32 m; spin_lock(&buddy->lock); for (o = order; o <= buddy->max_order; ++o) { if (buddy->num_free[o]) { m = 1 << (buddy->max_order - o); *seg = find_first_bit(buddy->bits[o], m); if (*seg < m) goto found; } } spin_unlock(&buddy->lock); return -EINVAL; found: clear_bit(*seg, buddy->bits[o]); --buddy->num_free[o]; while (o > order) { --o; *seg <<= 1; set_bit(*seg ^ 1, buddy->bits[o]); ++buddy->num_free[o]; } spin_unlock(&buddy->lock); *seg <<= order; return 0; } static void hns_roce_buddy_free(struct hns_roce_buddy *buddy, unsigned long seg, int order) { seg >>= order; spin_lock(&buddy->lock); while (test_bit(seg ^ 1, buddy->bits[order])) { clear_bit(seg ^ 1, buddy->bits[order]); --buddy->num_free[order]; seg >>= 1; ++order; } set_bit(seg, buddy->bits[order]); ++buddy->num_free[order]; spin_unlock(&buddy->lock); } static int hns_roce_buddy_init(struct hns_roce_buddy *buddy, int max_order) { int i, s; buddy->max_order = max_order; spin_lock_init(&buddy->lock); buddy->bits = kcalloc(buddy->max_order + 1, sizeof(*buddy->bits), GFP_KERNEL); buddy->num_free = kcalloc(buddy->max_order + 1, sizeof(*buddy->num_free), GFP_KERNEL); if (!buddy->bits || !buddy->num_free) goto err_out; for (i = 0; i <= buddy->max_order; ++i) { s = BITS_TO_LONGS(1 << (buddy->max_order - i)); buddy->bits[i] = kcalloc(s, sizeof(long), GFP_KERNEL | __GFP_NOWARN); if (!buddy->bits[i]) { buddy->bits[i] = vzalloc(array_size(s, sizeof(long))); if (!buddy->bits[i]) goto err_out_free; } } set_bit(0, buddy->bits[buddy->max_order]); buddy->num_free[buddy->max_order] = 1; return 0; err_out_free: for (i = 0; i <= buddy->max_order; ++i) kvfree(buddy->bits[i]); err_out: kfree(buddy->bits); kfree(buddy->num_free); return -ENOMEM; } static void hns_roce_buddy_cleanup(struct hns_roce_buddy *buddy) { int i; for (i = 0; i <= buddy->max_order; ++i) kvfree(buddy->bits[i]); kfree(buddy->bits); kfree(buddy->num_free); } static int hns_roce_alloc_mtt_range(struct hns_roce_dev *hr_dev, int order, unsigned long *seg, u32 mtt_type) { struct hns_roce_mr_table *mr_table = &hr_dev->mr_table; struct hns_roce_hem_table *table; struct hns_roce_buddy *buddy; int ret; switch (mtt_type) { case MTT_TYPE_WQE: buddy = &mr_table->mtt_buddy; table = &mr_table->mtt_table; break; case MTT_TYPE_CQE: buddy = &mr_table->mtt_cqe_buddy; table = &mr_table->mtt_cqe_table; break; case MTT_TYPE_SRQWQE: buddy = &mr_table->mtt_srqwqe_buddy; table = &mr_table->mtt_srqwqe_table; break; case MTT_TYPE_IDX: buddy = &mr_table->mtt_idx_buddy; table = &mr_table->mtt_idx_table; break; default: dev_err(hr_dev->dev, "Unsupport MTT table type: %d\n", mtt_type); return -EINVAL; } ret = hns_roce_buddy_alloc(buddy, order, seg); if (ret) return ret; ret = hns_roce_table_get_range(hr_dev, table, *seg, *seg + (1 << order) - 1); if (ret) { hns_roce_buddy_free(buddy, *seg, order); return ret; } return 0; } int hns_roce_mtt_init(struct hns_roce_dev *hr_dev, int npages, int page_shift, struct hns_roce_mtt *mtt) { int ret; int i; /* Page num is zero, correspond to DMA memory register */ if (!npages) { mtt->order = -1; mtt->page_shift = HNS_ROCE_HEM_PAGE_SHIFT; return 0; } /* Note: if page_shift is zero, FAST memory register */ mtt->page_shift = page_shift; /* Compute MTT entry necessary */ for (mtt->order = 0, i = HNS_ROCE_MTT_ENTRY_PER_SEG; i < npages; i <<= 1) ++mtt->order; /* Allocate MTT entry */ ret = hns_roce_alloc_mtt_range(hr_dev, mtt->order, &mtt->first_seg, mtt->mtt_type); if (ret) return -ENOMEM; return 0; } void hns_roce_mtt_cleanup(struct hns_roce_dev *hr_dev, struct hns_roce_mtt *mtt) { struct hns_roce_mr_table *mr_table = &hr_dev->mr_table; if (mtt->order < 0) return; switch (mtt->mtt_type) { case MTT_TYPE_WQE: hns_roce_buddy_free(&mr_table->mtt_buddy, mtt->first_seg, mtt->order); hns_roce_table_put_range(hr_dev, &mr_table->mtt_table, mtt->first_seg, mtt->first_seg + (1 << mtt->order) - 1); break; case MTT_TYPE_CQE: hns_roce_buddy_free(&mr_table->mtt_cqe_buddy, mtt->first_seg, mtt->order); hns_roce_table_put_range(hr_dev, &mr_table->mtt_cqe_table, mtt->first_seg, mtt->first_seg + (1 << mtt->order) - 1); break; case MTT_TYPE_SRQWQE: hns_roce_buddy_free(&mr_table->mtt_srqwqe_buddy, mtt->first_seg, mtt->order); hns_roce_table_put_range(hr_dev, &mr_table->mtt_srqwqe_table, mtt->first_seg, mtt->first_seg + (1 << mtt->order) - 1); break; case MTT_TYPE_IDX: hns_roce_buddy_free(&mr_table->mtt_idx_buddy, mtt->first_seg, mtt->order); hns_roce_table_put_range(hr_dev, &mr_table->mtt_idx_table, mtt->first_seg, mtt->first_seg + (1 << mtt->order) - 1); break; default: dev_err(hr_dev->dev, "Unsupport mtt type %d, clean mtt failed\n", mtt->mtt_type); break; } } static void hns_roce_loop_free(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr, int err_loop_index, int loop_i, int loop_j) { struct device *dev = hr_dev->dev; u32 mhop_num; u32 pbl_bt_sz; u64 bt_idx; int i, j; pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT); mhop_num = hr_dev->caps.pbl_hop_num; i = loop_i; if (mhop_num == 3 && err_loop_index == 2) { for (; i >= 0; i--) { dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i], mr->pbl_l1_dma_addr[i]); for (j = 0; j < pbl_bt_sz / BA_BYTE_LEN; j++) { if (i == loop_i && j >= loop_j) break; bt_idx = i * pbl_bt_sz / BA_BYTE_LEN + j; dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l2[bt_idx], mr->pbl_l2_dma_addr[bt_idx]); } } } else if (mhop_num == 3 && err_loop_index == 1) { for (i -= 1; i >= 0; i--) { dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i], mr->pbl_l1_dma_addr[i]); for (j = 0; j < pbl_bt_sz / BA_BYTE_LEN; j++) { bt_idx = i * pbl_bt_sz / BA_BYTE_LEN + j; dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l2[bt_idx], mr->pbl_l2_dma_addr[bt_idx]); } } } else if (mhop_num == 2 && err_loop_index == 1) { for (i -= 1; i >= 0; i--) dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i], mr->pbl_l1_dma_addr[i]); } else { dev_warn(dev, "not support: mhop_num=%d, err_loop_index=%d.", mhop_num, err_loop_index); return; } dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l0, mr->pbl_l0_dma_addr); mr->pbl_bt_l0 = NULL; mr->pbl_l0_dma_addr = 0; } static int pbl_1hop_alloc(struct hns_roce_dev *hr_dev, int npages, struct hns_roce_mr *mr, u32 pbl_bt_sz) { struct device *dev = hr_dev->dev; if (npages > pbl_bt_sz / 8) { dev_err(dev, "npages %d is larger than buf_pg_sz!", npages); return -EINVAL; } mr->pbl_buf = dma_alloc_coherent(dev, npages * 8, &(mr->pbl_dma_addr), GFP_KERNEL); if (!mr->pbl_buf) return -ENOMEM; mr->pbl_size = npages; mr->pbl_ba = mr->pbl_dma_addr; mr->pbl_hop_num = 1; mr->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz; mr->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz; return 0; } static int pbl_2hop_alloc(struct hns_roce_dev *hr_dev, int npages, struct hns_roce_mr *mr, u32 pbl_bt_sz) { struct device *dev = hr_dev->dev; int npages_allocated; u64 pbl_last_bt_num; u64 pbl_bt_cnt = 0; u64 size; int i; pbl_last_bt_num = (npages + pbl_bt_sz / 8 - 1) / (pbl_bt_sz / 8); /* alloc L1 BT */ for (i = 0; i < pbl_bt_sz / 8; i++) { if (pbl_bt_cnt + 1 < pbl_last_bt_num) { size = pbl_bt_sz; } else { npages_allocated = i * (pbl_bt_sz / 8); size = (npages - npages_allocated) * 8; } mr->pbl_bt_l1[i] = dma_alloc_coherent(dev, size, &(mr->pbl_l1_dma_addr[i]), GFP_KERNEL); if (!mr->pbl_bt_l1[i]) { hns_roce_loop_free(hr_dev, mr, 1, i, 0); return -ENOMEM; } *(mr->pbl_bt_l0 + i) = mr->pbl_l1_dma_addr[i]; pbl_bt_cnt++; if (pbl_bt_cnt >= pbl_last_bt_num) break; } mr->l0_chunk_last_num = i + 1; return 0; } static int pbl_3hop_alloc(struct hns_roce_dev *hr_dev, int npages, struct hns_roce_mr *mr, u32 pbl_bt_sz) { struct device *dev = hr_dev->dev; int mr_alloc_done = 0; int npages_allocated; u64 pbl_last_bt_num; u64 pbl_bt_cnt = 0; u64 bt_idx; u64 size; int i; int j = 0; pbl_last_bt_num = (npages + pbl_bt_sz / 8 - 1) / (pbl_bt_sz / 8); mr->pbl_l2_dma_addr = kcalloc(pbl_last_bt_num, sizeof(*mr->pbl_l2_dma_addr), GFP_KERNEL); if (!mr->pbl_l2_dma_addr) return -ENOMEM; mr->pbl_bt_l2 = kcalloc(pbl_last_bt_num, sizeof(*mr->pbl_bt_l2), GFP_KERNEL); if (!mr->pbl_bt_l2) goto err_kcalloc_bt_l2; /* alloc L1, L2 BT */ for (i = 0; i < pbl_bt_sz / 8; i++) { mr->pbl_bt_l1[i] = dma_alloc_coherent(dev, pbl_bt_sz, &(mr->pbl_l1_dma_addr[i]), GFP_KERNEL); if (!mr->pbl_bt_l1[i]) { hns_roce_loop_free(hr_dev, mr, 1, i, 0); goto err_dma_alloc_l0; } *(mr->pbl_bt_l0 + i) = mr->pbl_l1_dma_addr[i]; for (j = 0; j < pbl_bt_sz / 8; j++) { bt_idx = i * pbl_bt_sz / 8 + j; if (pbl_bt_cnt + 1 < pbl_last_bt_num) { size = pbl_bt_sz; } else { npages_allocated = bt_idx * (pbl_bt_sz / 8); size = (npages - npages_allocated) * 8; } mr->pbl_bt_l2[bt_idx] = dma_alloc_coherent( dev, size, &(mr->pbl_l2_dma_addr[bt_idx]), GFP_KERNEL); if (!mr->pbl_bt_l2[bt_idx]) { hns_roce_loop_free(hr_dev, mr, 2, i, j); goto err_dma_alloc_l0; } *(mr->pbl_bt_l1[i] + j) = mr->pbl_l2_dma_addr[bt_idx]; pbl_bt_cnt++; if (pbl_bt_cnt >= pbl_last_bt_num) { mr_alloc_done = 1; break; } } if (mr_alloc_done) break; } mr->l0_chunk_last_num = i + 1; mr->l1_chunk_last_num = j + 1; return 0; err_dma_alloc_l0: kfree(mr->pbl_bt_l2); mr->pbl_bt_l2 = NULL; err_kcalloc_bt_l2: kfree(mr->pbl_l2_dma_addr); mr->pbl_l2_dma_addr = NULL; return -ENOMEM; } /* PBL multi hop addressing */ static int hns_roce_mhop_alloc(struct hns_roce_dev *hr_dev, int npages, struct hns_roce_mr *mr) { struct device *dev = hr_dev->dev; u32 pbl_bt_sz; u32 mhop_num; mhop_num = (mr->type == MR_TYPE_FRMR ? 1 : hr_dev->caps.pbl_hop_num); pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT); if (mhop_num == HNS_ROCE_HOP_NUM_0) return 0; if (mhop_num == 1) return pbl_1hop_alloc(hr_dev, npages, mr, pbl_bt_sz); mr->pbl_l1_dma_addr = kcalloc(pbl_bt_sz / 8, sizeof(*mr->pbl_l1_dma_addr), GFP_KERNEL); if (!mr->pbl_l1_dma_addr) return -ENOMEM; mr->pbl_bt_l1 = kcalloc(pbl_bt_sz / 8, sizeof(*mr->pbl_bt_l1), GFP_KERNEL); if (!mr->pbl_bt_l1) goto err_kcalloc_bt_l1; /* alloc L0 BT */ mr->pbl_bt_l0 = dma_alloc_coherent(dev, pbl_bt_sz, &(mr->pbl_l0_dma_addr), GFP_KERNEL); if (!mr->pbl_bt_l0) goto err_kcalloc_l2_dma; if (mhop_num == 2) { if (pbl_2hop_alloc(hr_dev, npages, mr, pbl_bt_sz)) goto err_kcalloc_l2_dma; } if (mhop_num == 3) { if (pbl_3hop_alloc(hr_dev, npages, mr, pbl_bt_sz)) goto err_kcalloc_l2_dma; } mr->pbl_size = npages; mr->pbl_ba = mr->pbl_l0_dma_addr; mr->pbl_hop_num = hr_dev->caps.pbl_hop_num; mr->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz; mr->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz; return 0; err_kcalloc_l2_dma: kfree(mr->pbl_bt_l1); mr->pbl_bt_l1 = NULL; err_kcalloc_bt_l1: kfree(mr->pbl_l1_dma_addr); mr->pbl_l1_dma_addr = NULL; return -ENOMEM; } static int hns_roce_mr_alloc(struct hns_roce_dev *hr_dev, u32 pd, u64 iova, u64 size, u32 access, int npages, struct hns_roce_mr *mr) { struct device *dev = hr_dev->dev; unsigned long index = 0; int ret; /* Allocate a key for mr from mr_table */ ret = hns_roce_bitmap_alloc(&hr_dev->mr_table.mtpt_bitmap, &index); if (ret) return -ENOMEM; mr->iova = iova; /* MR va starting addr */ mr->size = size; /* MR addr range */ mr->pd = pd; /* MR num */ mr->access = access; /* MR access permit */ mr->enabled = 0; /* MR active status */ mr->key = hw_index_to_key(index); /* MR key */ if (size == ~0ull) { mr->pbl_buf = NULL; mr->pbl_dma_addr = 0; /* PBL multi-hop addressing parameters */ mr->pbl_bt_l2 = NULL; mr->pbl_bt_l1 = NULL; mr->pbl_bt_l0 = NULL; mr->pbl_l2_dma_addr = NULL; mr->pbl_l1_dma_addr = NULL; mr->pbl_l0_dma_addr = 0; } else { if (!hr_dev->caps.pbl_hop_num) { mr->pbl_buf = dma_alloc_coherent(dev, npages * BA_BYTE_LEN, &(mr->pbl_dma_addr), GFP_KERNEL); if (!mr->pbl_buf) return -ENOMEM; } else { ret = hns_roce_mhop_alloc(hr_dev, npages, mr); } } return ret; } static void hns_roce_mhop_free(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr) { struct device *dev = hr_dev->dev; int npages_allocated; int npages; int i, j; u32 pbl_bt_sz; u32 mhop_num; u64 bt_idx; npages = mr->pbl_size; pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT); mhop_num = (mr->type == MR_TYPE_FRMR) ? 1 : hr_dev->caps.pbl_hop_num; if (mhop_num == HNS_ROCE_HOP_NUM_0) return; if (mhop_num == 1) { dma_free_coherent(dev, (unsigned int)(npages * BA_BYTE_LEN), mr->pbl_buf, mr->pbl_dma_addr); return; } dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l0, mr->pbl_l0_dma_addr); if (mhop_num == 2) { for (i = 0; i < mr->l0_chunk_last_num; i++) { if (i == mr->l0_chunk_last_num - 1) { npages_allocated = i * (pbl_bt_sz / BA_BYTE_LEN); dma_free_coherent(dev, (npages - npages_allocated) * BA_BYTE_LEN, mr->pbl_bt_l1[i], mr->pbl_l1_dma_addr[i]); break; } dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i], mr->pbl_l1_dma_addr[i]); } } else if (mhop_num == 3) { for (i = 0; i < mr->l0_chunk_last_num; i++) { dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i], mr->pbl_l1_dma_addr[i]); for (j = 0; j < pbl_bt_sz / BA_BYTE_LEN; j++) { bt_idx = i * (pbl_bt_sz / BA_BYTE_LEN) + j; if ((i == mr->l0_chunk_last_num - 1) && j == mr->l1_chunk_last_num - 1) { npages_allocated = bt_idx * (pbl_bt_sz / BA_BYTE_LEN); dma_free_coherent(dev, (npages - npages_allocated) * BA_BYTE_LEN, mr->pbl_bt_l2[bt_idx], mr->pbl_l2_dma_addr[bt_idx]); break; } dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l2[bt_idx], mr->pbl_l2_dma_addr[bt_idx]); } } } kfree(mr->pbl_bt_l1); kfree(mr->pbl_l1_dma_addr); mr->pbl_bt_l1 = NULL; mr->pbl_l1_dma_addr = NULL; if (mhop_num == 3) { kfree(mr->pbl_bt_l2); kfree(mr->pbl_l2_dma_addr); mr->pbl_bt_l2 = NULL; mr->pbl_l2_dma_addr = NULL; } } static void hns_roce_mr_free(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr) { struct device *dev = hr_dev->dev; int npages = 0; int ret; if (mr->enabled) { ret = hns_roce_hw_destroy_mpt(hr_dev, NULL, key_to_hw_index(mr->key) & (hr_dev->caps.num_mtpts - 1)); if (ret) dev_warn(dev, "DESTROY_MPT failed (%d)\n", ret); } if (mr->size != ~0ULL) { if (mr->type == MR_TYPE_MR) npages = ib_umem_page_count(mr->umem); if (!hr_dev->caps.pbl_hop_num) dma_free_coherent(dev, (unsigned int)(npages * BA_BYTE_LEN), mr->pbl_buf, mr->pbl_dma_addr); else hns_roce_mhop_free(hr_dev, mr); } if (mr->enabled) hns_roce_table_put(hr_dev, &hr_dev->mr_table.mtpt_table, key_to_hw_index(mr->key)); hns_roce_bitmap_free(&hr_dev->mr_table.mtpt_bitmap, key_to_hw_index(mr->key), BITMAP_NO_RR); } static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr) { int ret; unsigned long mtpt_idx = key_to_hw_index(mr->key); struct device *dev = hr_dev->dev; struct hns_roce_cmd_mailbox *mailbox; struct hns_roce_mr_table *mr_table = &hr_dev->mr_table; /* Prepare HEM entry memory */ ret = hns_roce_table_get(hr_dev, &mr_table->mtpt_table, mtpt_idx); if (ret) return ret; /* Allocate mailbox memory */ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); if (IS_ERR(mailbox)) { ret = PTR_ERR(mailbox); goto err_table; } if (mr->type != MR_TYPE_FRMR) ret = hr_dev->hw->write_mtpt(mailbox->buf, mr, mtpt_idx); else ret = hr_dev->hw->frmr_write_mtpt(mailbox->buf, mr); if (ret) { dev_err(dev, "Write mtpt fail!\n"); goto err_page; } ret = hns_roce_hw_create_mpt(hr_dev, mailbox, mtpt_idx & (hr_dev->caps.num_mtpts - 1)); if (ret) { dev_err(dev, "CREATE_MPT failed (%d)\n", ret); goto err_page; } mr->enabled = 1; hns_roce_free_cmd_mailbox(hr_dev, mailbox); return 0; err_page: hns_roce_free_cmd_mailbox(hr_dev, mailbox); err_table: hns_roce_table_put(hr_dev, &mr_table->mtpt_table, mtpt_idx); return ret; } static int hns_roce_write_mtt_chunk(struct hns_roce_dev *hr_dev, struct hns_roce_mtt *mtt, u32 start_index, u32 npages, u64 *page_list) { struct hns_roce_hem_table *table; dma_addr_t dma_handle; __le64 *mtts; u32 bt_page_size; u32 i; switch (mtt->mtt_type) { case MTT_TYPE_WQE: table = &hr_dev->mr_table.mtt_table; bt_page_size = 1 << (hr_dev->caps.mtt_ba_pg_sz + PAGE_SHIFT); break; case MTT_TYPE_CQE: table = &hr_dev->mr_table.mtt_cqe_table; bt_page_size = 1 << (hr_dev->caps.cqe_ba_pg_sz + PAGE_SHIFT); break; case MTT_TYPE_SRQWQE: table = &hr_dev->mr_table.mtt_srqwqe_table; bt_page_size = 1 << (hr_dev->caps.srqwqe_ba_pg_sz + PAGE_SHIFT); break; case MTT_TYPE_IDX: table = &hr_dev->mr_table.mtt_idx_table; bt_page_size = 1 << (hr_dev->caps.idx_ba_pg_sz + PAGE_SHIFT); break; default: return -EINVAL; } /* All MTTs must fit in the same page */ if (start_index / (bt_page_size / sizeof(u64)) != (start_index + npages - 1) / (bt_page_size / sizeof(u64))) return -EINVAL; if (start_index & (HNS_ROCE_MTT_ENTRY_PER_SEG - 1)) return -EINVAL; mtts = hns_roce_table_find(hr_dev, table, mtt->first_seg + start_index / HNS_ROCE_MTT_ENTRY_PER_SEG, &dma_handle); if (!mtts) return -ENOMEM; /* Save page addr, low 12 bits : 0 */ for (i = 0; i < npages; ++i) { if (!hr_dev->caps.mtt_hop_num) mtts[i] = cpu_to_le64(page_list[i] >> PAGE_ADDR_SHIFT); else mtts[i] = cpu_to_le64(page_list[i]); } return 0; } static int hns_roce_write_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_mtt *mtt, u32 start_index, u32 npages, u64 *page_list) { int chunk; int ret; u32 bt_page_size; if (mtt->order < 0) return -EINVAL; switch (mtt->mtt_type) { case MTT_TYPE_WQE: bt_page_size = 1 << (hr_dev->caps.mtt_ba_pg_sz + PAGE_SHIFT); break; case MTT_TYPE_CQE: bt_page_size = 1 << (hr_dev->caps.cqe_ba_pg_sz + PAGE_SHIFT); break; case MTT_TYPE_SRQWQE: bt_page_size = 1 << (hr_dev->caps.srqwqe_ba_pg_sz + PAGE_SHIFT); break; case MTT_TYPE_IDX: bt_page_size = 1 << (hr_dev->caps.idx_ba_pg_sz + PAGE_SHIFT); break; default: dev_err(hr_dev->dev, "Unsupport mtt type %d, write mtt failed\n", mtt->mtt_type); return -EINVAL; } while (npages > 0) { chunk = min_t(int, bt_page_size / sizeof(u64), npages); ret = hns_roce_write_mtt_chunk(hr_dev, mtt, start_index, chunk, page_list); if (ret) return ret; npages -= chunk; start_index += chunk; page_list += chunk; } return 0; } int hns_roce_buf_write_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_mtt *mtt, struct hns_roce_buf *buf) { u64 *page_list; int ret; u32 i; page_list = kmalloc_array(buf->npages, sizeof(*page_list), GFP_KERNEL); if (!page_list) return -ENOMEM; for (i = 0; i < buf->npages; ++i) page_list[i] = hns_roce_buf_page(buf, i); ret = hns_roce_write_mtt(hr_dev, mtt, 0, buf->npages, page_list); kfree(page_list); return ret; } int hns_roce_init_mr_table(struct hns_roce_dev *hr_dev) { struct hns_roce_mr_table *mr_table = &hr_dev->mr_table; int ret; ret = hns_roce_bitmap_init(&mr_table->mtpt_bitmap, hr_dev->caps.num_mtpts, hr_dev->caps.num_mtpts - 1, hr_dev->caps.reserved_mrws, 0); if (ret) return ret; ret = hns_roce_buddy_init(&mr_table->mtt_buddy, ilog2(hr_dev->caps.num_mtt_segs)); if (ret) goto err_buddy; if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE)) { ret = hns_roce_buddy_init(&mr_table->mtt_cqe_buddy, ilog2(hr_dev->caps.num_cqe_segs)); if (ret) goto err_buddy_cqe; } if (hr_dev->caps.num_srqwqe_segs) { ret = hns_roce_buddy_init(&mr_table->mtt_srqwqe_buddy, ilog2(hr_dev->caps.num_srqwqe_segs)); if (ret) goto err_buddy_srqwqe; } if (hr_dev->caps.num_idx_segs) { ret = hns_roce_buddy_init(&mr_table->mtt_idx_buddy, ilog2(hr_dev->caps.num_idx_segs)); if (ret) goto err_buddy_idx; } return 0; err_buddy_idx: if (hr_dev->caps.num_srqwqe_segs) hns_roce_buddy_cleanup(&mr_table->mtt_srqwqe_buddy); err_buddy_srqwqe: if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE)) hns_roce_buddy_cleanup(&mr_table->mtt_cqe_buddy); err_buddy_cqe: hns_roce_buddy_cleanup(&mr_table->mtt_buddy); err_buddy: hns_roce_bitmap_cleanup(&mr_table->mtpt_bitmap); return ret; } void hns_roce_cleanup_mr_table(struct hns_roce_dev *hr_dev) { struct hns_roce_mr_table *mr_table = &hr_dev->mr_table; if (hr_dev->caps.num_idx_segs) hns_roce_buddy_cleanup(&mr_table->mtt_idx_buddy); if (hr_dev->caps.num_srqwqe_segs) hns_roce_buddy_cleanup(&mr_table->mtt_srqwqe_buddy); hns_roce_buddy_cleanup(&mr_table->mtt_buddy); if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE)) hns_roce_buddy_cleanup(&mr_table->mtt_cqe_buddy); hns_roce_bitmap_cleanup(&mr_table->mtpt_bitmap); } struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc) { struct hns_roce_mr *mr; int ret; mr = kmalloc(sizeof(*mr), GFP_KERNEL); if (mr == NULL) return ERR_PTR(-ENOMEM); mr->type = MR_TYPE_DMA; /* Allocate memory region key */ ret = hns_roce_mr_alloc(to_hr_dev(pd->device), to_hr_pd(pd)->pdn, 0, ~0ULL, acc, 0, mr); if (ret) goto err_free; ret = hns_roce_mr_enable(to_hr_dev(pd->device), mr); if (ret) goto err_mr; mr->ibmr.rkey = mr->ibmr.lkey = mr->key; mr->umem = NULL; return &mr->ibmr; err_mr: hns_roce_mr_free(to_hr_dev(pd->device), mr); err_free: kfree(mr); return ERR_PTR(ret); } int hns_roce_ib_umem_write_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_mtt *mtt, struct ib_umem *umem) { struct device *dev = hr_dev->dev; struct sg_dma_page_iter sg_iter; unsigned int order; int npage = 0; int ret = 0; int i; u64 page_addr; u64 *pages; u32 bt_page_size; u32 n; switch (mtt->mtt_type) { case MTT_TYPE_WQE: order = hr_dev->caps.mtt_ba_pg_sz; break; case MTT_TYPE_CQE: order = hr_dev->caps.cqe_ba_pg_sz; break; case MTT_TYPE_SRQWQE: order = hr_dev->caps.srqwqe_ba_pg_sz; break; case MTT_TYPE_IDX: order = hr_dev->caps.idx_ba_pg_sz; break; default: dev_err(dev, "Unsupport mtt type %d, write mtt failed\n", mtt->mtt_type); return -EINVAL; } bt_page_size = 1 << (order + PAGE_SHIFT); pages = (u64 *) __get_free_pages(GFP_KERNEL, order); if (!pages) return -ENOMEM; i = n = 0; for_each_sg_dma_page(umem->sg_head.sgl, &sg_iter, umem->nmap, 0) { page_addr = sg_page_iter_dma_address(&sg_iter); if (!(npage % (1 << (mtt->page_shift - PAGE_SHIFT)))) { if (page_addr & ((1 << mtt->page_shift) - 1)) { dev_err(dev, "page_addr is not page_shift %d alignment!\n", mtt->page_shift); ret = -EINVAL; goto out; } pages[i++] = page_addr; } npage++; if (i == bt_page_size / sizeof(u64)) { ret = hns_roce_write_mtt(hr_dev, mtt, n, i, pages); if (ret) goto out; n += i; i = 0; } } if (i) ret = hns_roce_write_mtt(hr_dev, mtt, n, i, pages); out: free_pages((unsigned long) pages, order); return ret; } static int hns_roce_ib_umem_write_mr(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr, struct ib_umem *umem) { struct sg_dma_page_iter sg_iter; int i = 0, j = 0; u64 page_addr; u32 pbl_bt_sz; if (hr_dev->caps.pbl_hop_num == HNS_ROCE_HOP_NUM_0) return 0; pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT); for_each_sg_dma_page(umem->sg_head.sgl, &sg_iter, umem->nmap, 0) { page_addr = sg_page_iter_dma_address(&sg_iter); if (!hr_dev->caps.pbl_hop_num) { /* for hip06, page addr is aligned to 4K */ mr->pbl_buf[i++] = page_addr >> 12; } else if (hr_dev->caps.pbl_hop_num == 1) { mr->pbl_buf[i++] = page_addr; } else { if (hr_dev->caps.pbl_hop_num == 2) mr->pbl_bt_l1[i][j] = page_addr; else if (hr_dev->caps.pbl_hop_num == 3) mr->pbl_bt_l2[i][j] = page_addr; j++; if (j >= (pbl_bt_sz / BA_BYTE_LEN)) { i++; j = 0; } } } /* Memory barrier */ mb(); return 0; } struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(pd->device); struct device *dev = hr_dev->dev; struct hns_roce_mr *mr; int bt_size; int ret; int n; int i; mr = kmalloc(sizeof(*mr), GFP_KERNEL); if (!mr) return ERR_PTR(-ENOMEM); mr->umem = ib_umem_get(pd->device, start, length, access_flags); if (IS_ERR(mr->umem)) { ret = PTR_ERR(mr->umem); goto err_free; } n = ib_umem_page_count(mr->umem); if (!hr_dev->caps.pbl_hop_num) { if (n > HNS_ROCE_MAX_MTPT_PBL_NUM) { dev_err(dev, " MR len %lld err. MR is limited to 4G at most!\n", length); ret = -EINVAL; goto err_umem; } } else { u64 pbl_size = 1; bt_size = (1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT)) / BA_BYTE_LEN; for (i = 0; i < hr_dev->caps.pbl_hop_num; i++) pbl_size *= bt_size; if (n > pbl_size) { dev_err(dev, " MR len %lld err. MR page num is limited to %lld!\n", length, pbl_size); ret = -EINVAL; goto err_umem; } } mr->type = MR_TYPE_MR; ret = hns_roce_mr_alloc(hr_dev, to_hr_pd(pd)->pdn, virt_addr, length, access_flags, n, mr); if (ret) goto err_umem; ret = hns_roce_ib_umem_write_mr(hr_dev, mr, mr->umem); if (ret) goto err_mr; ret = hns_roce_mr_enable(hr_dev, mr); if (ret) goto err_mr; mr->ibmr.rkey = mr->ibmr.lkey = mr->key; return &mr->ibmr; err_mr: hns_roce_mr_free(hr_dev, mr); err_umem: ib_umem_release(mr->umem); err_free: kfree(mr); return ERR_PTR(ret); } static int rereg_mr_trans(struct ib_mr *ibmr, int flags, u64 start, u64 length, u64 virt_addr, int mr_access_flags, struct hns_roce_cmd_mailbox *mailbox, u32 pdn, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device); struct hns_roce_mr *mr = to_hr_mr(ibmr); struct device *dev = hr_dev->dev; int npages; int ret; if (mr->size != ~0ULL) { npages = ib_umem_page_count(mr->umem); if (hr_dev->caps.pbl_hop_num) hns_roce_mhop_free(hr_dev, mr); else dma_free_coherent(dev, npages * 8, mr->pbl_buf, mr->pbl_dma_addr); } ib_umem_release(mr->umem); mr->umem = ib_umem_get(ibmr->device, start, length, mr_access_flags); if (IS_ERR(mr->umem)) { ret = PTR_ERR(mr->umem); mr->umem = NULL; return -ENOMEM; } npages = ib_umem_page_count(mr->umem); if (hr_dev->caps.pbl_hop_num) { ret = hns_roce_mhop_alloc(hr_dev, npages, mr); if (ret) goto release_umem; } else { mr->pbl_buf = dma_alloc_coherent(dev, npages * 8, &(mr->pbl_dma_addr), GFP_KERNEL); if (!mr->pbl_buf) { ret = -ENOMEM; goto release_umem; } } ret = hr_dev->hw->rereg_write_mtpt(hr_dev, mr, flags, pdn, mr_access_flags, virt_addr, length, mailbox->buf); if (ret) goto release_umem; ret = hns_roce_ib_umem_write_mr(hr_dev, mr, mr->umem); if (ret) { if (mr->size != ~0ULL) { npages = ib_umem_page_count(mr->umem); if (hr_dev->caps.pbl_hop_num) hns_roce_mhop_free(hr_dev, mr); else dma_free_coherent(dev, npages * 8, mr->pbl_buf, mr->pbl_dma_addr); } goto release_umem; } return 0; release_umem: ib_umem_release(mr->umem); return ret; } int hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length, u64 virt_addr, int mr_access_flags, struct ib_pd *pd, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device); struct hns_roce_mr *mr = to_hr_mr(ibmr); struct hns_roce_cmd_mailbox *mailbox; struct device *dev = hr_dev->dev; unsigned long mtpt_idx; u32 pdn = 0; int ret; if (!mr->enabled) return -EINVAL; mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); mtpt_idx = key_to_hw_index(mr->key) & (hr_dev->caps.num_mtpts - 1); ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, mtpt_idx, 0, HNS_ROCE_CMD_QUERY_MPT, HNS_ROCE_CMD_TIMEOUT_MSECS); if (ret) goto free_cmd_mbox; ret = hns_roce_hw_destroy_mpt(hr_dev, NULL, mtpt_idx); if (ret) dev_warn(dev, "DESTROY_MPT failed (%d)\n", ret); mr->enabled = 0; if (flags & IB_MR_REREG_PD) pdn = to_hr_pd(pd)->pdn; if (flags & IB_MR_REREG_TRANS) { ret = rereg_mr_trans(ibmr, flags, start, length, virt_addr, mr_access_flags, mailbox, pdn, udata); if (ret) goto free_cmd_mbox; } else { ret = hr_dev->hw->rereg_write_mtpt(hr_dev, mr, flags, pdn, mr_access_flags, virt_addr, length, mailbox->buf); if (ret) goto free_cmd_mbox; } ret = hns_roce_hw_create_mpt(hr_dev, mailbox, mtpt_idx); if (ret) { dev_err(dev, "CREATE_MPT failed (%d)\n", ret); ib_umem_release(mr->umem); goto free_cmd_mbox; } mr->enabled = 1; if (flags & IB_MR_REREG_ACCESS) mr->access = mr_access_flags; hns_roce_free_cmd_mailbox(hr_dev, mailbox); return 0; free_cmd_mbox: hns_roce_free_cmd_mailbox(hr_dev, mailbox); return ret; } int hns_roce_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device); struct hns_roce_mr *mr = to_hr_mr(ibmr); int ret = 0; if (hr_dev->hw->dereg_mr) { ret = hr_dev->hw->dereg_mr(hr_dev, mr, udata); } else { hns_roce_mr_free(hr_dev, mr); ib_umem_release(mr->umem); kfree(mr); } return ret; } struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(pd->device); struct device *dev = hr_dev->dev; struct hns_roce_mr *mr; u64 length; u32 page_size; int ret; page_size = 1 << (hr_dev->caps.pbl_buf_pg_sz + PAGE_SHIFT); length = max_num_sg * page_size; if (mr_type != IB_MR_TYPE_MEM_REG) return ERR_PTR(-EINVAL); if (max_num_sg > HNS_ROCE_FRMR_MAX_PA) { dev_err(dev, "max_num_sg larger than %d\n", HNS_ROCE_FRMR_MAX_PA); return ERR_PTR(-EINVAL); } mr = kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) return ERR_PTR(-ENOMEM); mr->type = MR_TYPE_FRMR; /* Allocate memory region key */ ret = hns_roce_mr_alloc(hr_dev, to_hr_pd(pd)->pdn, 0, length, 0, max_num_sg, mr); if (ret) goto err_free; ret = hns_roce_mr_enable(hr_dev, mr); if (ret) goto err_mr; mr->ibmr.rkey = mr->ibmr.lkey = mr->key; mr->umem = NULL; return &mr->ibmr; err_mr: hns_roce_mr_free(to_hr_dev(pd->device), mr); err_free: kfree(mr); return ERR_PTR(ret); } static int hns_roce_set_page(struct ib_mr *ibmr, u64 addr) { struct hns_roce_mr *mr = to_hr_mr(ibmr); mr->pbl_buf[mr->npages++] = addr; return 0; } int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset) { struct hns_roce_mr *mr = to_hr_mr(ibmr); mr->npages = 0; return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, hns_roce_set_page); } static void hns_roce_mw_free(struct hns_roce_dev *hr_dev, struct hns_roce_mw *mw) { struct device *dev = hr_dev->dev; int ret; if (mw->enabled) { ret = hns_roce_hw_destroy_mpt(hr_dev, NULL, key_to_hw_index(mw->rkey) & (hr_dev->caps.num_mtpts - 1)); if (ret) dev_warn(dev, "MW DESTROY_MPT failed (%d)\n", ret); hns_roce_table_put(hr_dev, &hr_dev->mr_table.mtpt_table, key_to_hw_index(mw->rkey)); } hns_roce_bitmap_free(&hr_dev->mr_table.mtpt_bitmap, key_to_hw_index(mw->rkey), BITMAP_NO_RR); } static int hns_roce_mw_enable(struct hns_roce_dev *hr_dev, struct hns_roce_mw *mw) { struct hns_roce_mr_table *mr_table = &hr_dev->mr_table; struct hns_roce_cmd_mailbox *mailbox; struct device *dev = hr_dev->dev; unsigned long mtpt_idx = key_to_hw_index(mw->rkey); int ret; /* prepare HEM entry memory */ ret = hns_roce_table_get(hr_dev, &mr_table->mtpt_table, mtpt_idx); if (ret) return ret; mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); if (IS_ERR(mailbox)) { ret = PTR_ERR(mailbox); goto err_table; } ret = hr_dev->hw->mw_write_mtpt(mailbox->buf, mw); if (ret) { dev_err(dev, "MW write mtpt fail!\n"); goto err_page; } ret = hns_roce_hw_create_mpt(hr_dev, mailbox, mtpt_idx & (hr_dev->caps.num_mtpts - 1)); if (ret) { dev_err(dev, "MW CREATE_MPT failed (%d)\n", ret); goto err_page; } mw->enabled = 1; hns_roce_free_cmd_mailbox(hr_dev, mailbox); return 0; err_page: hns_roce_free_cmd_mailbox(hr_dev, mailbox); err_table: hns_roce_table_put(hr_dev, &mr_table->mtpt_table, mtpt_idx); return ret; } struct ib_mw *hns_roce_alloc_mw(struct ib_pd *ib_pd, enum ib_mw_type type, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ib_pd->device); struct hns_roce_mw *mw; unsigned long index = 0; int ret; mw = kmalloc(sizeof(*mw), GFP_KERNEL); if (!mw) return ERR_PTR(-ENOMEM); /* Allocate a key for mw from bitmap */ ret = hns_roce_bitmap_alloc(&hr_dev->mr_table.mtpt_bitmap, &index); if (ret) goto err_bitmap; mw->rkey = hw_index_to_key(index); mw->ibmw.rkey = mw->rkey; mw->ibmw.type = type; mw->pdn = to_hr_pd(ib_pd)->pdn; mw->pbl_hop_num = hr_dev->caps.pbl_hop_num; mw->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz; mw->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz; ret = hns_roce_mw_enable(hr_dev, mw); if (ret) goto err_mw; return &mw->ibmw; err_mw: hns_roce_mw_free(hr_dev, mw); err_bitmap: kfree(mw); return ERR_PTR(ret); } int hns_roce_dealloc_mw(struct ib_mw *ibmw) { struct hns_roce_dev *hr_dev = to_hr_dev(ibmw->device); struct hns_roce_mw *mw = to_hr_mw(ibmw); hns_roce_mw_free(hr_dev, mw); kfree(mw); return 0; } void hns_roce_mtr_init(struct hns_roce_mtr *mtr, int bt_pg_shift, int buf_pg_shift) { hns_roce_hem_list_init(&mtr->hem_list); mtr->hem_cfg.buf_pg_shift = buf_pg_shift; mtr->hem_cfg.ba_pg_shift = bt_pg_shift; } void hns_roce_mtr_cleanup(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr) { hns_roce_hem_list_release(hr_dev, &mtr->hem_list); } static int mtr_map_region(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, dma_addr_t *pages, struct hns_roce_buf_region *region) { __le64 *mtts; int offset; int count; int npage; u64 addr; int end; int i; /* if hopnum is 0, buffer cannot store BAs, so skip write mtt */ if (!region->hopnum) return 0; offset = region->offset; end = offset + region->count; npage = 0; while (offset < end) { mtts = hns_roce_hem_list_find_mtt(hr_dev, &mtr->hem_list, offset, &count, NULL); if (!mtts) return -ENOBUFS; for (i = 0; i < count; i++) { if (hr_dev->hw_rev == HNS_ROCE_HW_VER1) addr = to_hr_hw_page_addr(pages[npage]); else addr = pages[npage]; mtts[i] = cpu_to_le64(addr); npage++; } offset += count; } return 0; } int hns_roce_mtr_attach(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, dma_addr_t **bufs, struct hns_roce_buf_region *regions, int region_cnt) { struct hns_roce_buf_region *r; int ret; int i; ret = hns_roce_hem_list_request(hr_dev, &mtr->hem_list, regions, region_cnt, mtr->hem_cfg.ba_pg_shift); if (ret) return ret; mtr->hem_cfg.root_ba = mtr->hem_list.root_ba; for (i = 0; i < region_cnt; i++) { r = ®ions[i]; ret = mtr_map_region(hr_dev, mtr, bufs[i], r); if (ret) { dev_err(hr_dev->dev, "write mtr[%d/%d] err %d,offset=%d.\n", i, region_cnt, ret, r->offset); goto err_write; } } return 0; err_write: hns_roce_hem_list_release(hr_dev, &mtr->hem_list); return ret; } static inline bool mtr_has_mtt(struct hns_roce_buf_attr *attr) { int i; for (i = 0; i < attr->region_count; i++) if (attr->region[i].hopnum != HNS_ROCE_HOP_NUM_0 && attr->region[i].hopnum > 0) return true; /* because the mtr only one root base address, when hopnum is 0 means * root base address equals the first buffer address, thus all alloced * memory must in a continuous space accessed by direct mode. */ return false; } static inline size_t mtr_bufs_size(struct hns_roce_buf_attr *attr) { size_t size = 0; int i; for (i = 0; i < attr->region_count; i++) size += attr->region[i].size; return size; } static inline int mtr_umem_page_count(struct ib_umem *umem, int page_shift) { int count = ib_umem_page_count(umem); if (page_shift >= PAGE_SHIFT) count >>= page_shift - PAGE_SHIFT; else count <<= PAGE_SHIFT - page_shift; return count; } static inline size_t mtr_kmem_direct_size(bool is_direct, size_t alloc_size, int page_shift) { if (is_direct) return ALIGN(alloc_size, 1 << page_shift); else return HNS_HW_DIRECT_PAGE_COUNT << page_shift; } /* * check the given pages in continuous address space * Returns 0 on success, or the error page num. */ static inline int mtr_check_direct_pages(dma_addr_t *pages, int page_count, int page_shift) { size_t page_size = 1 << page_shift; int i; for (i = 1; i < page_count; i++) if (pages[i] - pages[i - 1] != page_size) return i; return 0; } static void mtr_free_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr) { /* release user buffers */ if (mtr->umem) { ib_umem_release(mtr->umem); mtr->umem = NULL; } /* release kernel buffers */ if (mtr->kmem) { hns_roce_buf_free(hr_dev, mtr->kmem); kfree(mtr->kmem); mtr->kmem = NULL; } } static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, struct hns_roce_buf_attr *buf_attr, bool is_direct, struct ib_udata *udata, unsigned long user_addr) { struct ib_device *ibdev = &hr_dev->ib_dev; int max_pg_shift = buf_attr->page_shift; int best_pg_shift = 0; int all_pg_count = 0; size_t direct_size; size_t total_size; unsigned long tmp; int ret = 0; total_size = mtr_bufs_size(buf_attr); if (total_size < 1) { ibdev_err(ibdev, "Failed to check mtr size\n"); return -EINVAL; } if (udata) { mtr->kmem = NULL; mtr->umem = ib_umem_get(ibdev, user_addr, total_size, buf_attr->user_access); if (IS_ERR_OR_NULL(mtr->umem)) { ibdev_err(ibdev, "Failed to get umem, ret %ld\n", PTR_ERR(mtr->umem)); return -ENOMEM; } if (buf_attr->fixed_page) { best_pg_shift = max_pg_shift; } else { tmp = GENMASK(max_pg_shift, 0); ret = ib_umem_find_best_pgsz(mtr->umem, tmp, user_addr); best_pg_shift = (ret <= PAGE_SIZE) ? PAGE_SHIFT : ilog2(ret); } all_pg_count = mtr_umem_page_count(mtr->umem, best_pg_shift); ret = 0; } else { mtr->umem = NULL; mtr->kmem = kzalloc(sizeof(*mtr->kmem), GFP_KERNEL); if (!mtr->kmem) { ibdev_err(ibdev, "Failed to alloc kmem\n"); return -ENOMEM; } direct_size = mtr_kmem_direct_size(is_direct, total_size, max_pg_shift); ret = hns_roce_buf_alloc(hr_dev, total_size, direct_size, mtr->kmem, max_pg_shift); if (ret) { ibdev_err(ibdev, "Failed to alloc kmem, ret %d\n", ret); goto err_alloc_mem; } else { best_pg_shift = max_pg_shift; all_pg_count = mtr->kmem->npages; } } /* must bigger than minimum hardware page shift */ if (best_pg_shift < PAGE_ADDR_SHIFT || all_pg_count < 1) { ret = -EINVAL; ibdev_err(ibdev, "Failed to check mtr page shift %d count %d\n", best_pg_shift, all_pg_count); goto err_alloc_mem; } mtr->hem_cfg.buf_pg_shift = best_pg_shift; mtr->hem_cfg.buf_pg_count = all_pg_count; return 0; err_alloc_mem: mtr_free_bufs(hr_dev, mtr); return ret; } static int mtr_get_pages(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, dma_addr_t *pages, int count, int page_shift) { struct ib_device *ibdev = &hr_dev->ib_dev; int npage; int err; if (mtr->umem) npage = hns_roce_get_umem_bufs(hr_dev, pages, count, 0, mtr->umem, page_shift); else npage = hns_roce_get_kmem_bufs(hr_dev, pages, count, 0, mtr->kmem); if (mtr->hem_cfg.is_direct && npage > 1) { err = mtr_check_direct_pages(pages, npage, page_shift); if (err) { ibdev_err(ibdev, "Failed to check %s direct page-%d\n", mtr->umem ? "user" : "kernel", err); npage = err; } } return npage; } int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, struct hns_roce_buf_region *regions, int region_cnt, dma_addr_t *pages, int page_cnt) { struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_buf_region *r; int err; int i; for (i = 0; i < region_cnt; i++) { r = ®ions[i]; if (r->offset + r->count > page_cnt) { err = -EINVAL; ibdev_err(ibdev, "Failed to check mtr%d end %d + %d, max %d\n", i, r->offset, r->count, page_cnt); return err; } err = mtr_map_region(hr_dev, mtr, &pages[r->offset], r); if (err) { ibdev_err(ibdev, "Failed to map mtr%d offset %d, err %d\n", i, r->offset, err); return err; } } return 0; } int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, int offset, u64 *mtt_buf, int mtt_max, u64 *base_addr) { int mtt_count; int total = 0; __le64 *mtts; int npage; u64 addr; int left; if (!mtt_buf || mtt_max < 1) goto done; /* no mtt memory in direct mode, so just return the buffer address */ if (mtr->hem_cfg.is_direct) { npage = offset; for (total = 0; total < mtt_max; total++, npage++) { addr = mtr->hem_cfg.root_ba + (npage << mtr->hem_cfg.buf_pg_shift); if (hr_dev->hw_rev == HNS_ROCE_HW_VER1) mtt_buf[total] = to_hr_hw_page_addr(addr); else mtt_buf[total] = addr; } goto done; } left = mtt_max; while (left > 0) { mtt_count = 0; mtts = hns_roce_hem_list_find_mtt(hr_dev, &mtr->hem_list, offset + total, &mtt_count, NULL); if (!mtts || !mtt_count) goto done; npage = min(mtt_count, left); left -= npage; for (mtt_count = 0; mtt_count < npage; mtt_count++) mtt_buf[total++] = le64_to_cpu(mtts[mtt_count]); } done: if (base_addr) *base_addr = mtr->hem_cfg.root_ba; return total; } /* convert buffer size to page index and page count */ static int mtr_init_region(struct hns_roce_buf_attr *attr, int page_cnt, struct hns_roce_buf_region *regions, int region_cnt, int page_shift) { unsigned int page_size = 1 << page_shift; int max_region = attr->region_count; struct hns_roce_buf_region *r; int page_idx = 0; int i = 0; for (; i < region_cnt && i < max_region && page_idx < page_cnt; i++) { r = ®ions[i]; r->hopnum = attr->region[i].hopnum == HNS_ROCE_HOP_NUM_0 ? 0 : attr->region[i].hopnum; r->offset = page_idx; r->count = DIV_ROUND_UP(attr->region[i].size, page_size); page_idx += r->count; } return i; } /** * hns_roce_mtr_create - Create hns memory translate region. * * @mtr: memory translate region * @init_attr: init attribute for creating mtr * @page_shift: page shift for multi-hop base address table * @udata: user space context, if it's NULL, means kernel space * @user_addr: userspace virtual address to start at * @buf_alloced: mtr has private buffer, true means need to alloc */ int hns_roce_mtr_create(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, struct hns_roce_buf_attr *buf_attr, int page_shift, struct ib_udata *udata, unsigned long user_addr) { struct hns_roce_buf_region regions[HNS_ROCE_MAX_BT_REGION] = {}; struct ib_device *ibdev = &hr_dev->ib_dev; dma_addr_t *pages = NULL; int region_cnt = 0; int all_pg_cnt; int get_pg_cnt; bool has_mtt; int err = 0; has_mtt = mtr_has_mtt(buf_attr); /* if buffer only need mtt, just init the hem cfg */ if (buf_attr->mtt_only) { mtr->hem_cfg.buf_pg_shift = buf_attr->page_shift; mtr->hem_cfg.buf_pg_count = mtr_bufs_size(buf_attr) >> buf_attr->page_shift; mtr->umem = NULL; mtr->kmem = NULL; } else { err = mtr_alloc_bufs(hr_dev, mtr, buf_attr, !has_mtt, udata, user_addr); if (err) { ibdev_err(ibdev, "Failed to alloc mtr bufs, err %d\n", err); return err; } } /* alloc mtt memory */ all_pg_cnt = mtr->hem_cfg.buf_pg_count; hns_roce_hem_list_init(&mtr->hem_list); mtr->hem_cfg.is_direct = !has_mtt; mtr->hem_cfg.ba_pg_shift = page_shift; if (has_mtt) { region_cnt = mtr_init_region(buf_attr, all_pg_cnt, regions, ARRAY_SIZE(regions), mtr->hem_cfg.buf_pg_shift); if (region_cnt < 1) { err = -ENOBUFS; ibdev_err(ibdev, "Failed to init mtr region %d\n", region_cnt); goto err_alloc_bufs; } err = hns_roce_hem_list_request(hr_dev, &mtr->hem_list, regions, region_cnt, page_shift); if (err) { ibdev_err(ibdev, "Failed to request mtr hem, err %d\n", err); goto err_alloc_bufs; } mtr->hem_cfg.root_ba = mtr->hem_list.root_ba; } /* no buffer to map */ if (buf_attr->mtt_only) return 0; /* alloc a tmp array to store buffer's dma address */ pages = kvcalloc(all_pg_cnt, sizeof(dma_addr_t), GFP_KERNEL); if (!pages) { err = -ENOMEM; ibdev_err(ibdev, "Failed to alloc mtr page list %d\n", all_pg_cnt); goto err_alloc_hem_list; } get_pg_cnt = mtr_get_pages(hr_dev, mtr, pages, all_pg_cnt, mtr->hem_cfg.buf_pg_shift); if (get_pg_cnt != all_pg_cnt) { ibdev_err(ibdev, "Failed to get mtr page %d != %d\n", get_pg_cnt, all_pg_cnt); err = -ENOBUFS; goto err_alloc_page_list; } if (!has_mtt) { mtr->hem_cfg.root_ba = pages[0]; } else { /* write buffer's dma address to BA table */ err = hns_roce_mtr_map(hr_dev, mtr, regions, region_cnt, pages, all_pg_cnt); if (err) { ibdev_err(ibdev, "Failed to map mtr pages, err %d\n", err); goto err_alloc_page_list; } } /* drop tmp array */ kvfree(pages); return 0; err_alloc_page_list: kvfree(pages); err_alloc_hem_list: hns_roce_hem_list_release(hr_dev, &mtr->hem_list); err_alloc_bufs: mtr_free_bufs(hr_dev, mtr); return err; } void hns_roce_mtr_destroy(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr) { /* release multi-hop addressing resource */ hns_roce_hem_list_release(hr_dev, &mtr->hem_list); /* free buffers */ mtr_free_bufs(hr_dev, mtr); }