提交 5de406c0 编写于 作者: O Ofir Bitton 提交者: Oded Gabbay

habanalabs: sync stream collective support

Implement sync stream collective for GAUDI. Need to allocate additional
resources for that and add ctx_fini() to clean up those resources.
Signed-off-by: NOfir Bitton <obitton@habana.ai>
Reviewed-by: NOded Gabbay <ogabbay@kernel.org>
Signed-off-by: NOded Gabbay <ogabbay@kernel.org>
上级 0940caba
...@@ -142,7 +142,7 @@ static void hl_fence_init(struct hl_fence *fence) ...@@ -142,7 +142,7 @@ static void hl_fence_init(struct hl_fence *fence)
init_completion(&fence->completion); init_completion(&fence->completion);
} }
static void cs_get(struct hl_cs *cs) void cs_get(struct hl_cs *cs)
{ {
kref_get(&cs->refcount); kref_get(&cs->refcount);
} }
...@@ -917,6 +917,9 @@ static int cs_ioctl_signal_wait_create_jobs(struct hl_device *hdev, ...@@ -917,6 +917,9 @@ static int cs_ioctl_signal_wait_create_jobs(struct hl_device *hdev,
job->job_cb_size = job->user_cb_size; job->job_cb_size = job->user_cb_size;
hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT); hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
/* increment refcount as for external queues we get completion */
cs_get(cs);
cs->jobs_in_queue_cnt[job->hw_queue_id]++; cs->jobs_in_queue_cnt[job->hw_queue_id]++;
list_add_tail(&job->cs_node, &cs->job_list); list_add_tail(&job->cs_node, &cs->job_list);
...@@ -1070,11 +1073,7 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type, ...@@ -1070,11 +1073,7 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
cs, q_idx, collective_engine_id); cs, q_idx, collective_engine_id);
if (rc) if (rc)
goto put_cs; goto free_cs_object;
/* increment refcount as for external queues we get completion */
cs_get(cs);
rc = hl_hw_queue_schedule_cs(cs); rc = hl_hw_queue_schedule_cs(cs);
if (rc) { if (rc) {
......
...@@ -40,6 +40,7 @@ static void hl_ctx_fini(struct hl_ctx *ctx) ...@@ -40,6 +40,7 @@ static void hl_ctx_fini(struct hl_ctx *ctx)
if ((hdev->in_debug) && (hdev->compute_ctx == ctx)) if ((hdev->in_debug) && (hdev->compute_ctx == ctx))
hl_device_set_debug_mode(hdev, false); hl_device_set_debug_mode(hdev, false);
hdev->asic_funcs->ctx_fini(ctx);
hl_cb_va_pool_fini(ctx); hl_cb_va_pool_fini(ctx);
hl_vm_ctx_fini(ctx); hl_vm_ctx_fini(ctx);
hl_asid_free(hdev, ctx->asid); hl_asid_free(hdev, ctx->asid);
......
...@@ -65,8 +65,8 @@ ...@@ -65,8 +65,8 @@
* HL_RSVD_SOBS 'sync stream' reserved sync objects per QMAN stream * HL_RSVD_SOBS 'sync stream' reserved sync objects per QMAN stream
* HL_RSVD_MONS 'sync stream' reserved monitors per QMAN stream * HL_RSVD_MONS 'sync stream' reserved monitors per QMAN stream
*/ */
#define HL_RSVD_SOBS 4 #define HL_RSVD_SOBS 2
#define HL_RSVD_MONS 2 #define HL_RSVD_MONS 1
/* /*
* HL_COLLECTIVE_RSVD_MSTR_MONS 'collective' reserved monitors per QMAN stream * HL_COLLECTIVE_RSVD_MSTR_MONS 'collective' reserved monitors per QMAN stream
...@@ -785,6 +785,7 @@ enum div_select_defs { ...@@ -785,6 +785,7 @@ enum div_select_defs {
* @wreg: Write a register. Needed for simulator support. * @wreg: Write a register. Needed for simulator support.
* @halt_coresight: stop the ETF and ETR traces. * @halt_coresight: stop the ETF and ETR traces.
* @ctx_init: context dependent initialization. * @ctx_init: context dependent initialization.
* @ctx_fini: context dependent cleanup.
* @get_clk_rate: Retrieve the ASIC current and maximum clock rate in MHz * @get_clk_rate: Retrieve the ASIC current and maximum clock rate in MHz
* @get_queue_id_for_cq: Get the H/W queue id related to the given CQ index. * @get_queue_id_for_cq: Get the H/W queue id related to the given CQ index.
* @read_device_fw_version: read the device's firmware versions that are * @read_device_fw_version: read the device's firmware versions that are
...@@ -891,6 +892,7 @@ struct hl_asic_funcs { ...@@ -891,6 +892,7 @@ struct hl_asic_funcs {
void (*wreg)(struct hl_device *hdev, u32 reg, u32 val); void (*wreg)(struct hl_device *hdev, u32 reg, u32 val);
void (*halt_coresight)(struct hl_device *hdev); void (*halt_coresight)(struct hl_device *hdev);
int (*ctx_init)(struct hl_ctx *ctx); int (*ctx_init)(struct hl_ctx *ctx);
void (*ctx_fini)(struct hl_ctx *ctx);
int (*get_clk_rate)(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk); int (*get_clk_rate)(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk);
u32 (*get_queue_id_for_cq)(struct hl_device *hdev, u32 cq_idx); u32 (*get_queue_id_for_cq)(struct hl_device *hdev, u32 cq_idx);
void (*read_device_fw_version)(struct hl_device *hdev, void (*read_device_fw_version)(struct hl_device *hdev,
...@@ -1992,6 +1994,7 @@ void hl_sob_reset_error(struct kref *ref); ...@@ -1992,6 +1994,7 @@ void hl_sob_reset_error(struct kref *ref);
int hl_gen_sob_mask(u16 sob_base, u8 sob_mask, u8 *mask); int hl_gen_sob_mask(u16 sob_base, u8 sob_mask, u8 *mask);
void hl_fence_put(struct hl_fence *fence); void hl_fence_put(struct hl_fence *fence);
void hl_fence_get(struct hl_fence *fence); void hl_fence_get(struct hl_fence *fence);
void cs_get(struct hl_cs *cs);
void goya_set_asic_funcs(struct hl_device *hdev); void goya_set_asic_funcs(struct hl_device *hdev);
void gaudi_set_asic_funcs(struct hl_device *hdev); void gaudi_set_asic_funcs(struct hl_device *hdev);
......
...@@ -23,6 +23,10 @@ ...@@ -23,6 +23,10 @@
NUMBER_OF_CPU_HW_QUEUES + \ NUMBER_OF_CPU_HW_QUEUES + \
NUMBER_OF_INT_HW_QUEUES) NUMBER_OF_INT_HW_QUEUES)
/* 10 NIC QMANs, DMA5 QMAN, TPC7 QMAN */
#define NUMBER_OF_COLLECTIVE_QUEUES 12
#define NUMBER_OF_SOBS_IN_GRP 11
/* /*
* Number of MSI interrupts IDS: * Number of MSI interrupts IDS:
* Each completion queue has 1 ID * Each completion queue has 1 ID
...@@ -149,10 +153,14 @@ ...@@ -149,10 +153,14 @@
/* Virtual address space */ /* Virtual address space */
#define VA_HOST_SPACE_START 0x1000000000000ull /* 256TB */ #define VA_HOST_SPACE_START 0x1000000000000ull /* 256TB */
#define VA_HOST_SPACE_END 0x3FF8000000000ull /* 1PB - 1TB */ #define VA_HOST_SPACE_END 0x3FF7FFFE00000ull /* 1PB - 1TB */
#define VA_HOST_SPACE_SIZE (VA_HOST_SPACE_END - \ #define VA_HOST_SPACE_SIZE (VA_HOST_SPACE_END - \
VA_HOST_SPACE_START) /* 767TB */ VA_HOST_SPACE_START) /* 767TB */
#define VA_HOST_SPACE_INTERNAL_CB_START 0x3FF7FFFE00000ull /* 1PB - 1TB - 2MB */
#define VA_HOST_SPACE_INTERNAL_CB_END 0x3FF8000000000ull /* 1PB - 1TB */
#define HOST_SPACE_INTERNAL_CB_SZ SZ_2M
#define HW_CAP_PLL BIT(0) #define HW_CAP_PLL BIT(0)
#define HW_CAP_HBM BIT(1) #define HW_CAP_HBM BIT(1)
#define HW_CAP_MMU BIT(2) #define HW_CAP_MMU BIT(2)
...@@ -240,6 +248,34 @@ enum gaudi_nic_mask { ...@@ -240,6 +248,34 @@ enum gaudi_nic_mask {
GAUDI_NIC_MASK_ALL = 0x3FF GAUDI_NIC_MASK_ALL = 0x3FF
}; };
/*
* struct gaudi_hw_sob_group - H/W SOB group info.
* @hdev: habanalabs device structure.
* @kref: refcount of this SOB group. group will reset once refcount is zero.
* @base_sob_id: base sob id of this SOB group.
*/
struct gaudi_hw_sob_group {
struct hl_device *hdev;
struct kref kref;
u32 base_sob_id;
};
#define NUM_SOB_GROUPS (HL_RSVD_SOBS * QMAN_STREAMS)
/**
* struct gaudi_collective_properties -
* holds all SOB groups and queues info reserved for the collective
* @hw_sob_group: H/W SOB groups.
* @next_sob_group_val: the next value to use for the currently used SOB group.
* @curr_sob_group_idx: the index of the currently used SOB group.
* @mstr_sob_mask: pre-defined masks for collective master monitors
*/
struct gaudi_collective_properties {
struct gaudi_hw_sob_group hw_sob_group[NUM_SOB_GROUPS];
u16 next_sob_group_val[QMAN_STREAMS];
u8 curr_sob_group_idx[QMAN_STREAMS];
u8 mstr_sob_mask[HL_COLLECTIVE_RSVD_MSTR_MONS];
};
/** /**
* struct gaudi_internal_qman_info - Internal QMAN information. * struct gaudi_internal_qman_info - Internal QMAN information.
* @pq_kernel_addr: Kernel address of the PQ memory area in the host. * @pq_kernel_addr: Kernel address of the PQ memory area in the host.
...@@ -285,6 +321,8 @@ struct gaudi_device { ...@@ -285,6 +321,8 @@ struct gaudi_device {
struct gaudi_internal_qman_info internal_qmans[GAUDI_QUEUE_ID_SIZE]; struct gaudi_internal_qman_info internal_qmans[GAUDI_QUEUE_ID_SIZE];
struct gaudi_collective_properties collective_props;
u64 hbm_bar_cur_addr; u64 hbm_bar_cur_addr;
u64 max_freq_value; u64 max_freq_value;
......
...@@ -5343,6 +5343,11 @@ int goya_collective_wait_create_jobs(struct hl_device *hdev, ...@@ -5343,6 +5343,11 @@ int goya_collective_wait_create_jobs(struct hl_device *hdev,
return -EINVAL; return -EINVAL;
} }
static void goya_ctx_fini(struct hl_ctx *ctx)
{
}
static const struct hl_asic_funcs goya_funcs = { static const struct hl_asic_funcs goya_funcs = {
.early_init = goya_early_init, .early_init = goya_early_init,
.early_fini = goya_early_fini, .early_fini = goya_early_fini,
...@@ -5404,6 +5409,7 @@ static const struct hl_asic_funcs goya_funcs = { ...@@ -5404,6 +5409,7 @@ static const struct hl_asic_funcs goya_funcs = {
.wreg = hl_wreg, .wreg = hl_wreg,
.halt_coresight = goya_halt_coresight, .halt_coresight = goya_halt_coresight,
.ctx_init = goya_ctx_init, .ctx_init = goya_ctx_init,
.ctx_fini = goya_ctx_fini,
.get_clk_rate = goya_get_clk_rate, .get_clk_rate = goya_get_clk_rate,
.get_queue_id_for_cq = goya_get_queue_id_for_cq, .get_queue_id_for_cq = goya_get_queue_id_for_cq,
.read_device_fw_version = goya_read_device_fw_version, .read_device_fw_version = goya_read_device_fw_version,
......
...@@ -18,8 +18,18 @@ ...@@ -18,8 +18,18 @@
#define GOYA_KMD_SRAM_RESERVED_SIZE_FROM_START 0x8000 /* 32KB */ #define GOYA_KMD_SRAM_RESERVED_SIZE_FROM_START 0x8000 /* 32KB */
#define GAUDI_DRIVER_SRAM_RESERVED_SIZE_FROM_START 0x80 /* 128 bytes */ #define GAUDI_DRIVER_SRAM_RESERVED_SIZE_FROM_START 0x80 /* 128 bytes */
#define GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT 32 /*
#define GAUDI_FIRST_AVAILABLE_W_S_MONITOR 16 * 128 SOBs reserved for collective wait
* 16 SOBs reserved for sync stream
*/
#define GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT 144
/*
* 64 monitors reserved for collective wait
* 8 monitors reserved for sync stream
*/
#define GAUDI_FIRST_AVAILABLE_W_S_MONITOR 72
/* /*
* Goya queue Numbering * Goya queue Numbering
* *
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册