提交 2992c1dc 编写于 作者: O Ofir Bitton 提交者: Oded Gabbay

habanalabs: add support for multiple SOBs per monitor

Support advanced monitor functionality to monitor more than a
single SOB. In addition expand all CB generation functions
with buffer offset in order to put in them multiple packets that are
generated by different functions.
Signed-off-by: NOfir Bitton <obitton@habana.ai>
Reviewed-by: NOded Gabbay <ogabbay@kernel.org>
Signed-off-by: NOded Gabbay <ogabbay@kernel.org>
上级 3cf74b36
...@@ -38,6 +38,38 @@ void hl_sob_reset_error(struct kref *ref) ...@@ -38,6 +38,38 @@ void hl_sob_reset_error(struct kref *ref)
hw_sob->q_idx, hw_sob->sob_id); hw_sob->q_idx, hw_sob->sob_id);
} }
/**
* hl_gen_sob_mask() - Generates a sob mask to be used in a monitor arm packet
* @sob_base: sob base id
* @sob_mask: sob user mask, each bit represents a sob offset from sob base
* @mask: generated mask
*
* Return: 0 if given parameters are valid
*/
int hl_gen_sob_mask(u16 sob_base, u8 sob_mask, u8 *mask)
{
int i;
if (sob_mask == 0)
return -EINVAL;
if (sob_mask == 0x1) {
*mask = ~(1 << (sob_base & 0x7));
} else {
/* find msb in order to verify sob range is valid */
for (i = BITS_PER_BYTE - 1 ; i >= 0 ; i--)
if (BIT(i) & sob_mask)
break;
if (i > (HL_MAX_SOBS_PER_MONITOR - (sob_base & 0x7) - 1))
return -EINVAL;
*mask = ~sob_mask;
}
return 0;
}
static void hl_fence_release(struct kref *kref) static void hl_fence_release(struct kref *kref)
{ {
struct hl_fence *fence = struct hl_fence *fence =
......
...@@ -77,20 +77,26 @@ ...@@ -77,20 +77,26 @@
#define HL_MAX_DCORES 4 #define HL_MAX_DCORES 4
#define HL_MAX_SOBS_PER_MONITOR 8
/** /**
* struct hl_gen_wait_properties - properties for generating a wait CB * struct hl_gen_wait_properties - properties for generating a wait CB
* @data: command buffer * @data: command buffer
* @q_idx: queue id is used to extract fence register address * @q_idx: queue id is used to extract fence register address
* @sob_id: SOB id to use in this wait CB * @size: offset in command buffer
* @sob_base: SOB base to use in this wait CB
* @sob_val: SOB value to wait for * @sob_val: SOB value to wait for
* @mon_id: monitor to use in this wait CB * @mon_id: monitor to use in this wait CB
* @sob_mask: each bit represents a SOB offset from sob_base to be used
*/ */
struct hl_gen_wait_properties { struct hl_gen_wait_properties {
void *data; void *data;
u32 q_idx; u32 q_idx;
u16 sob_id; u32 size;
u16 sob_base;
u16 sob_val; u16 sob_val;
u16 mon_id; u16 mon_id;
u8 sob_mask;
}; };
/** /**
...@@ -844,8 +850,9 @@ struct hl_asic_funcs { ...@@ -844,8 +850,9 @@ struct hl_asic_funcs {
int (*load_boot_fit_to_device)(struct hl_device *hdev); int (*load_boot_fit_to_device)(struct hl_device *hdev);
u32 (*get_signal_cb_size)(struct hl_device *hdev); u32 (*get_signal_cb_size)(struct hl_device *hdev);
u32 (*get_wait_cb_size)(struct hl_device *hdev); u32 (*get_wait_cb_size)(struct hl_device *hdev);
void (*gen_signal_cb)(struct hl_device *hdev, void *data, u16 sob_id); u32 (*gen_signal_cb)(struct hl_device *hdev, void *data, u16 sob_id,
void (*gen_wait_cb)(struct hl_device *hdev, u32 size);
u32 (*gen_wait_cb)(struct hl_device *hdev,
struct hl_gen_wait_properties *prop); struct hl_gen_wait_properties *prop);
void (*reset_sob)(struct hl_device *hdev, void *data); void (*reset_sob)(struct hl_device *hdev, void *data);
void (*set_dma_mask_from_fw)(struct hl_device *hdev); void (*set_dma_mask_from_fw)(struct hl_device *hdev);
...@@ -1927,6 +1934,7 @@ void hl_cs_rollback_all(struct hl_device *hdev); ...@@ -1927,6 +1934,7 @@ void hl_cs_rollback_all(struct hl_device *hdev);
struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev, struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
enum hl_queue_type queue_type, bool is_kernel_allocated_cb); enum hl_queue_type queue_type, bool is_kernel_allocated_cb);
void hl_sob_reset_error(struct kref *ref); void hl_sob_reset_error(struct kref *ref);
int hl_gen_sob_mask(u16 sob_base, u8 sob_mask, u8 *mask);
void hl_fence_put(struct hl_fence *fence); void hl_fence_put(struct hl_fence *fence);
void hl_fence_get(struct hl_fence *fence); void hl_fence_get(struct hl_fence *fence);
......
...@@ -407,7 +407,7 @@ static void init_signal_cs(struct hl_device *hdev, ...@@ -407,7 +407,7 @@ static void init_signal_cs(struct hl_device *hdev,
cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, q_idx); cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, q_idx);
hdev->asic_funcs->gen_signal_cb(hdev, job->patched_cb, hdev->asic_funcs->gen_signal_cb(hdev, job->patched_cb,
cs_cmpl->hw_sob->sob_id); cs_cmpl->hw_sob->sob_id, 0);
kref_get(&hw_sob->kref); kref_get(&hw_sob->kref);
...@@ -454,10 +454,12 @@ static void init_wait_cs(struct hl_device *hdev, struct hl_cs *cs, ...@@ -454,10 +454,12 @@ static void init_wait_cs(struct hl_device *hdev, struct hl_cs *cs,
prop->base_mon_id, q_idx); prop->base_mon_id, q_idx);
wait_prop.data = (void *) job->patched_cb; wait_prop.data = (void *) job->patched_cb;
wait_prop.sob_id = cs_cmpl->hw_sob->sob_id; wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
wait_prop.sob_mask = 0x1;
wait_prop.sob_val = cs_cmpl->sob_val; wait_prop.sob_val = cs_cmpl->sob_val;
wait_prop.mon_id = prop->base_mon_id; wait_prop.mon_id = prop->base_mon_id;
wait_prop.q_idx = q_idx; wait_prop.q_idx = q_idx;
wait_prop.size = 0;
hdev->asic_funcs->gen_wait_cb(hdev, &wait_prop); hdev->asic_funcs->gen_wait_cb(hdev, &wait_prop);
kref_get(&cs_cmpl->hw_sob->kref); kref_get(&cs_cmpl->hw_sob->kref);
......
...@@ -6374,14 +6374,15 @@ static u32 gaudi_get_wait_cb_size(struct hl_device *hdev) ...@@ -6374,14 +6374,15 @@ static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
sizeof(struct packet_msg_prot) * 2; sizeof(struct packet_msg_prot) * 2;
} }
static void gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id) static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
u32 size)
{ {
struct hl_cb *cb = (struct hl_cb *) data; struct hl_cb *cb = (struct hl_cb *) data;
struct packet_msg_short *pkt; struct packet_msg_short *pkt;
u32 value, ctl; u32 value, ctl, pkt_size = sizeof(*pkt);
pkt = cb->kernel_address; pkt = cb->kernel_address + size;
memset(pkt, 0, sizeof(*pkt)); memset(pkt, 0, pkt_size);
/* Inc by 1, Mode ADD */ /* Inc by 1, Mode ADD */
value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1); value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
...@@ -6397,6 +6398,8 @@ static void gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id) ...@@ -6397,6 +6398,8 @@ static void gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id)
pkt->value = cpu_to_le32(value); pkt->value = cpu_to_le32(value);
pkt->ctl = cpu_to_le32(ctl); pkt->ctl = cpu_to_le32(ctl);
return size + pkt_size;
} }
static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value, static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
...@@ -6419,21 +6422,42 @@ static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value, ...@@ -6419,21 +6422,42 @@ static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
return pkt_size; return pkt_size;
} }
static u32 gaudi_add_arm_monitor_pkt(struct packet_msg_short *pkt, u16 sob_id, static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev,
u16 sob_val, u16 addr) struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask,
u16 sob_val, u16 mon_id)
{ {
u64 monitor_base;
u32 ctl, value, pkt_size = sizeof(*pkt); u32 ctl, value, pkt_size = sizeof(*pkt);
u8 mask = ~(1 << (sob_id & 0x7)); u16 msg_addr_offset;
u8 mask;
if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
dev_err(hdev->dev,
"sob_base %u (mask %#x) is not valid\n",
sob_base, sob_mask);
return 0;
}
/*
* monitor_base should be the content of the base0 address registers,
* so it will be added to the msg short offsets
*/
monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
msg_addr_offset =
(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
monitor_base;
memset(pkt, 0, pkt_size); memset(pkt, 0, pkt_size);
value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_id / 8); /* Monitor config packet: bind the monitor to a sync object */
value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val); value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK, value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
0); /* GREATER OR EQUAL*/ 0); /* GREATER OR EQUAL*/
value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask); value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr); ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset);
ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */ ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */ ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT); ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
...@@ -6468,60 +6492,61 @@ static u32 gaudi_add_fence_pkt(struct packet_fence *pkt) ...@@ -6468,60 +6492,61 @@ static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
return pkt_size; return pkt_size;
} }
static void gaudi_gen_wait_cb(struct hl_device *hdev, static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr)
struct hl_gen_wait_properties *prop)
{ {
struct hl_cb *cb = (struct hl_cb *) prop->data; u32 offset;
void *buf = cb->kernel_address;
u64 monitor_base, fence_addr = 0;
u32 size = 0;
u16 msg_addr_offset;
switch (prop->q_idx) { switch (queue_id) {
case GAUDI_QUEUE_ID_DMA_0_0: case GAUDI_QUEUE_ID_DMA_0_0:
fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_0; offset = mmDMA0_QM_CP_FENCE2_RDATA_0;
break; break;
case GAUDI_QUEUE_ID_DMA_0_1: case GAUDI_QUEUE_ID_DMA_0_1:
fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_1; offset = mmDMA0_QM_CP_FENCE2_RDATA_1;
break; break;
case GAUDI_QUEUE_ID_DMA_0_2: case GAUDI_QUEUE_ID_DMA_0_2:
fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_2; offset = mmDMA0_QM_CP_FENCE2_RDATA_2;
break; break;
case GAUDI_QUEUE_ID_DMA_0_3: case GAUDI_QUEUE_ID_DMA_0_3:
fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_3; offset = mmDMA0_QM_CP_FENCE2_RDATA_3;
break; break;
case GAUDI_QUEUE_ID_DMA_1_0: case GAUDI_QUEUE_ID_DMA_1_0:
fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_0; offset = mmDMA1_QM_CP_FENCE2_RDATA_0;
break; break;
case GAUDI_QUEUE_ID_DMA_1_1: case GAUDI_QUEUE_ID_DMA_1_1:
fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_1; offset = mmDMA1_QM_CP_FENCE2_RDATA_1;
break; break;
case GAUDI_QUEUE_ID_DMA_1_2: case GAUDI_QUEUE_ID_DMA_1_2:
fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_2; offset = mmDMA1_QM_CP_FENCE2_RDATA_2;
break; break;
case GAUDI_QUEUE_ID_DMA_1_3: case GAUDI_QUEUE_ID_DMA_1_3:
fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_3; offset = mmDMA1_QM_CP_FENCE2_RDATA_3;
break; break;
case GAUDI_QUEUE_ID_DMA_5_0: case GAUDI_QUEUE_ID_DMA_5_0:
fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_0; offset = mmDMA5_QM_CP_FENCE2_RDATA_0;
break; break;
case GAUDI_QUEUE_ID_DMA_5_1: case GAUDI_QUEUE_ID_DMA_5_1:
fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_1; offset = mmDMA5_QM_CP_FENCE2_RDATA_1;
break; break;
case GAUDI_QUEUE_ID_DMA_5_2: case GAUDI_QUEUE_ID_DMA_5_2:
fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_2; offset = mmDMA5_QM_CP_FENCE2_RDATA_2;
break; break;
case GAUDI_QUEUE_ID_DMA_5_3: case GAUDI_QUEUE_ID_DMA_5_3:
fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_3; offset = mmDMA5_QM_CP_FENCE2_RDATA_3;
break; break;
default: default:
/* queue index should be valid here */ return -EINVAL;
dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
prop->q_idx);
return;
} }
fence_addr += CFG_BASE; *addr = CFG_BASE + offset;
return 0;
}
static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr)
{
u64 monitor_base;
u32 size = 0;
u16 msg_addr_offset;
/* /*
* monitor_base should be the content of the base0 address registers, * monitor_base should be the content of the base0 address registers,
...@@ -6530,15 +6555,17 @@ static void gaudi_gen_wait_cb(struct hl_device *hdev, ...@@ -6530,15 +6555,17 @@ static void gaudi_gen_wait_cb(struct hl_device *hdev,
monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0; monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
/* First monitor config packet: low address of the sync */ /* First monitor config packet: low address of the sync */
msg_addr_offset = (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + msg_addr_offset =
prop->mon_id * 4) - monitor_base; (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
monitor_base;
size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr, size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
msg_addr_offset); msg_addr_offset);
/* Second monitor config packet: high address of the sync */ /* Second monitor config packet: high address of the sync */
msg_addr_offset = (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + msg_addr_offset =
prop->mon_id * 4) - monitor_base; (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
monitor_base;
size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32), size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
msg_addr_offset); msg_addr_offset);
...@@ -6547,20 +6574,35 @@ static void gaudi_gen_wait_cb(struct hl_device *hdev, ...@@ -6547,20 +6574,35 @@ static void gaudi_gen_wait_cb(struct hl_device *hdev,
* Third monitor config packet: the payload, i.e. what to write when the * Third monitor config packet: the payload, i.e. what to write when the
* sync triggers * sync triggers
*/ */
msg_addr_offset = (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + msg_addr_offset =
prop->mon_id * 4) - monitor_base; (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
monitor_base;
size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset); size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
/* Fourth monitor config packet: bind the monitor to a sync object */ return size;
msg_addr_offset = }
(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + prop->mon_id * 4) -
monitor_base; u32 gaudi_gen_wait_cb(struct hl_device *hdev,
size += gaudi_add_arm_monitor_pkt(buf + size, prop->sob_id, struct hl_gen_wait_properties *prop)
prop->sob_val, msg_addr_offset); {
struct hl_cb *cb = (struct hl_cb *) prop->data;
void *buf = cb->kernel_address;
u64 fence_addr = 0;
u32 size = prop->size;
/* Fence packet */ if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) {
dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
prop->q_idx);
return 0;
}
size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr);
size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base,
prop->sob_mask, prop->sob_val, prop->mon_id);
size += gaudi_add_fence_pkt(buf + size); size += gaudi_add_fence_pkt(buf + size);
return size;
} }
static void gaudi_reset_sob(struct hl_device *hdev, void *data) static void gaudi_reset_sob(struct hl_device *hdev, void *data)
......
...@@ -5288,15 +5288,16 @@ static u32 goya_get_wait_cb_size(struct hl_device *hdev) ...@@ -5288,15 +5288,16 @@ static u32 goya_get_wait_cb_size(struct hl_device *hdev)
return 0; return 0;
} }
static void goya_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id) static u32 goya_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
u32 size)
{ {
return 0;
} }
static void goya_gen_wait_cb(struct hl_device *hdev, static u32 goya_gen_wait_cb(struct hl_device *hdev,
struct hl_gen_wait_properties *prop) struct hl_gen_wait_properties *prop)
{ {
return 0;
} }
static void goya_reset_sob(struct hl_device *hdev, void *data) static void goya_reset_sob(struct hl_device *hdev, void *data)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册