提交 f65e192a 编写于 作者: S Stephen Warren 提交者: David S. Miller

net/mlx4: Get rid of page operation after dma_alloc_coherent

This patch solves a crash at the time of mlx4 driver unload or system
shutdown. The crash occurs because dma_alloc_coherent() returns one
value in mlx4_alloc_icm_coherent(), but a different value is passed to
dma_free_coherent() in mlx4_free_icm_coherent(). In turn this is because
when allocated, that pointer is passed to sg_set_buf() to record it,
then when freed it is re-calculated by calling
lowmem_page_address(sg_page()) which returns a different value. Solve
this by recording the value that dma_alloc_coherent() returns, and
passing this to dma_free_coherent().

This patch is roughly equivalent to commit 378efe79 ("RDMA/hns: Get
rid of page operation after dma_alloc_coherent").

Based-on-code-from: Christoph Hellwig <hch@lst.de>
Signed-off-by: NStephen Warren <swarren@nvidia.com>
Reviewed-by: NTariq Toukan <tariqt@mellanox.com>
Signed-off-by: NDavid S. Miller <davem@davemloft.net>
上级 d4a7e9bb
...@@ -57,12 +57,12 @@ static void mlx4_free_icm_pages(struct mlx4_dev *dev, struct mlx4_icm_chunk *chu ...@@ -57,12 +57,12 @@ static void mlx4_free_icm_pages(struct mlx4_dev *dev, struct mlx4_icm_chunk *chu
int i; int i;
if (chunk->nsg > 0) if (chunk->nsg > 0)
pci_unmap_sg(dev->persist->pdev, chunk->mem, chunk->npages, pci_unmap_sg(dev->persist->pdev, chunk->sg, chunk->npages,
PCI_DMA_BIDIRECTIONAL); PCI_DMA_BIDIRECTIONAL);
for (i = 0; i < chunk->npages; ++i) for (i = 0; i < chunk->npages; ++i)
__free_pages(sg_page(&chunk->mem[i]), __free_pages(sg_page(&chunk->sg[i]),
get_order(chunk->mem[i].length)); get_order(chunk->sg[i].length));
} }
static void mlx4_free_icm_coherent(struct mlx4_dev *dev, struct mlx4_icm_chunk *chunk) static void mlx4_free_icm_coherent(struct mlx4_dev *dev, struct mlx4_icm_chunk *chunk)
...@@ -71,9 +71,9 @@ static void mlx4_free_icm_coherent(struct mlx4_dev *dev, struct mlx4_icm_chunk * ...@@ -71,9 +71,9 @@ static void mlx4_free_icm_coherent(struct mlx4_dev *dev, struct mlx4_icm_chunk *
for (i = 0; i < chunk->npages; ++i) for (i = 0; i < chunk->npages; ++i)
dma_free_coherent(&dev->persist->pdev->dev, dma_free_coherent(&dev->persist->pdev->dev,
chunk->mem[i].length, chunk->buf[i].size,
lowmem_page_address(sg_page(&chunk->mem[i])), chunk->buf[i].addr,
sg_dma_address(&chunk->mem[i])); chunk->buf[i].dma_addr);
} }
void mlx4_free_icm(struct mlx4_dev *dev, struct mlx4_icm *icm, int coherent) void mlx4_free_icm(struct mlx4_dev *dev, struct mlx4_icm *icm, int coherent)
...@@ -111,22 +111,21 @@ static int mlx4_alloc_icm_pages(struct scatterlist *mem, int order, ...@@ -111,22 +111,21 @@ static int mlx4_alloc_icm_pages(struct scatterlist *mem, int order,
return 0; return 0;
} }
static int mlx4_alloc_icm_coherent(struct device *dev, struct scatterlist *mem, static int mlx4_alloc_icm_coherent(struct device *dev, struct mlx4_icm_buf *buf,
int order, gfp_t gfp_mask) int order, gfp_t gfp_mask)
{ {
void *buf = dma_alloc_coherent(dev, PAGE_SIZE << order, buf->addr = dma_alloc_coherent(dev, PAGE_SIZE << order,
&sg_dma_address(mem), gfp_mask); &buf->dma_addr, gfp_mask);
if (!buf) if (!buf->addr)
return -ENOMEM; return -ENOMEM;
if (offset_in_page(buf)) { if (offset_in_page(buf->addr)) {
dma_free_coherent(dev, PAGE_SIZE << order, dma_free_coherent(dev, PAGE_SIZE << order, buf->addr,
buf, sg_dma_address(mem)); buf->dma_addr);
return -ENOMEM; return -ENOMEM;
} }
sg_set_buf(mem, buf, PAGE_SIZE << order); buf->size = PAGE_SIZE << order;
sg_dma_len(mem) = PAGE_SIZE << order;
return 0; return 0;
} }
...@@ -159,21 +158,21 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages, ...@@ -159,21 +158,21 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages,
while (npages > 0) { while (npages > 0) {
if (!chunk) { if (!chunk) {
chunk = kmalloc_node(sizeof(*chunk), chunk = kzalloc_node(sizeof(*chunk),
gfp_mask & ~(__GFP_HIGHMEM | gfp_mask & ~(__GFP_HIGHMEM |
__GFP_NOWARN), __GFP_NOWARN),
dev->numa_node); dev->numa_node);
if (!chunk) { if (!chunk) {
chunk = kmalloc(sizeof(*chunk), chunk = kzalloc(sizeof(*chunk),
gfp_mask & ~(__GFP_HIGHMEM | gfp_mask & ~(__GFP_HIGHMEM |
__GFP_NOWARN)); __GFP_NOWARN));
if (!chunk) if (!chunk)
goto fail; goto fail;
} }
chunk->coherent = coherent;
sg_init_table(chunk->mem, MLX4_ICM_CHUNK_LEN); if (!coherent)
chunk->npages = 0; sg_init_table(chunk->sg, MLX4_ICM_CHUNK_LEN);
chunk->nsg = 0;
list_add_tail(&chunk->list, &icm->chunk_list); list_add_tail(&chunk->list, &icm->chunk_list);
} }
...@@ -186,10 +185,10 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages, ...@@ -186,10 +185,10 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages,
if (coherent) if (coherent)
ret = mlx4_alloc_icm_coherent(&dev->persist->pdev->dev, ret = mlx4_alloc_icm_coherent(&dev->persist->pdev->dev,
&chunk->mem[chunk->npages], &chunk->buf[chunk->npages],
cur_order, mask); cur_order, mask);
else else
ret = mlx4_alloc_icm_pages(&chunk->mem[chunk->npages], ret = mlx4_alloc_icm_pages(&chunk->sg[chunk->npages],
cur_order, mask, cur_order, mask,
dev->numa_node); dev->numa_node);
...@@ -205,7 +204,7 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages, ...@@ -205,7 +204,7 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages,
if (coherent) if (coherent)
++chunk->nsg; ++chunk->nsg;
else if (chunk->npages == MLX4_ICM_CHUNK_LEN) { else if (chunk->npages == MLX4_ICM_CHUNK_LEN) {
chunk->nsg = pci_map_sg(dev->persist->pdev, chunk->mem, chunk->nsg = pci_map_sg(dev->persist->pdev, chunk->sg,
chunk->npages, chunk->npages,
PCI_DMA_BIDIRECTIONAL); PCI_DMA_BIDIRECTIONAL);
...@@ -220,7 +219,7 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages, ...@@ -220,7 +219,7 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages,
} }
if (!coherent && chunk) { if (!coherent && chunk) {
chunk->nsg = pci_map_sg(dev->persist->pdev, chunk->mem, chunk->nsg = pci_map_sg(dev->persist->pdev, chunk->sg,
chunk->npages, chunk->npages,
PCI_DMA_BIDIRECTIONAL); PCI_DMA_BIDIRECTIONAL);
...@@ -320,7 +319,7 @@ void *mlx4_table_find(struct mlx4_icm_table *table, u32 obj, ...@@ -320,7 +319,7 @@ void *mlx4_table_find(struct mlx4_icm_table *table, u32 obj,
u64 idx; u64 idx;
struct mlx4_icm_chunk *chunk; struct mlx4_icm_chunk *chunk;
struct mlx4_icm *icm; struct mlx4_icm *icm;
struct page *page = NULL; void *addr = NULL;
if (!table->lowmem) if (!table->lowmem)
return NULL; return NULL;
...@@ -336,28 +335,49 @@ void *mlx4_table_find(struct mlx4_icm_table *table, u32 obj, ...@@ -336,28 +335,49 @@ void *mlx4_table_find(struct mlx4_icm_table *table, u32 obj,
list_for_each_entry(chunk, &icm->chunk_list, list) { list_for_each_entry(chunk, &icm->chunk_list, list) {
for (i = 0; i < chunk->npages; ++i) { for (i = 0; i < chunk->npages; ++i) {
dma_addr_t dma_addr;
size_t len;
if (table->coherent) {
len = chunk->buf[i].size;
dma_addr = chunk->buf[i].dma_addr;
addr = chunk->buf[i].addr;
} else {
struct page *page;
len = sg_dma_len(&chunk->sg[i]);
dma_addr = sg_dma_address(&chunk->sg[i]);
/* XXX: we should never do this for highmem
* allocation. This function either needs
* to be split, or the kernel virtual address
* return needs to be made optional.
*/
page = sg_page(&chunk->sg[i]);
addr = lowmem_page_address(page);
}
if (dma_handle && dma_offset >= 0) { if (dma_handle && dma_offset >= 0) {
if (sg_dma_len(&chunk->mem[i]) > dma_offset) if (len > dma_offset)
*dma_handle = sg_dma_address(&chunk->mem[i]) + *dma_handle = dma_addr + dma_offset;
dma_offset; dma_offset -= len;
dma_offset -= sg_dma_len(&chunk->mem[i]);
} }
/* /*
* DMA mapping can merge pages but not split them, * DMA mapping can merge pages but not split them,
* so if we found the page, dma_handle has already * so if we found the page, dma_handle has already
* been assigned to. * been assigned to.
*/ */
if (chunk->mem[i].length > offset) { if (len > offset)
page = sg_page(&chunk->mem[i]);
goto out; goto out;
} offset -= len;
offset -= chunk->mem[i].length;
} }
} }
addr = NULL;
out: out:
mutex_unlock(&table->mutex); mutex_unlock(&table->mutex);
return page ? lowmem_page_address(page) + offset : NULL; return addr ? addr + offset : NULL;
} }
int mlx4_table_get_range(struct mlx4_dev *dev, struct mlx4_icm_table *table, int mlx4_table_get_range(struct mlx4_dev *dev, struct mlx4_icm_table *table,
......
...@@ -47,11 +47,21 @@ enum { ...@@ -47,11 +47,21 @@ enum {
MLX4_ICM_PAGE_SIZE = 1 << MLX4_ICM_PAGE_SHIFT, MLX4_ICM_PAGE_SIZE = 1 << MLX4_ICM_PAGE_SHIFT,
}; };
struct mlx4_icm_buf {
void *addr;
size_t size;
dma_addr_t dma_addr;
};
struct mlx4_icm_chunk { struct mlx4_icm_chunk {
struct list_head list; struct list_head list;
int npages; int npages;
int nsg; int nsg;
struct scatterlist mem[MLX4_ICM_CHUNK_LEN]; bool coherent;
union {
struct scatterlist sg[MLX4_ICM_CHUNK_LEN];
struct mlx4_icm_buf buf[MLX4_ICM_CHUNK_LEN];
};
}; };
struct mlx4_icm { struct mlx4_icm {
...@@ -114,12 +124,18 @@ static inline void mlx4_icm_next(struct mlx4_icm_iter *iter) ...@@ -114,12 +124,18 @@ static inline void mlx4_icm_next(struct mlx4_icm_iter *iter)
static inline dma_addr_t mlx4_icm_addr(struct mlx4_icm_iter *iter) static inline dma_addr_t mlx4_icm_addr(struct mlx4_icm_iter *iter)
{ {
return sg_dma_address(&iter->chunk->mem[iter->page_idx]); if (iter->chunk->coherent)
return iter->chunk->buf[iter->page_idx].dma_addr;
else
return sg_dma_address(&iter->chunk->sg[iter->page_idx]);
} }
static inline unsigned long mlx4_icm_size(struct mlx4_icm_iter *iter) static inline unsigned long mlx4_icm_size(struct mlx4_icm_iter *iter)
{ {
return sg_dma_len(&iter->chunk->mem[iter->page_idx]); if (iter->chunk->coherent)
return iter->chunk->buf[iter->page_idx].size;
else
return sg_dma_len(&iter->chunk->sg[iter->page_idx]);
} }
int mlx4_MAP_ICM_AUX(struct mlx4_dev *dev, struct mlx4_icm *icm); int mlx4_MAP_ICM_AUX(struct mlx4_dev *dev, struct mlx4_icm *icm);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册