diff --git a/Documentation/ABI/testing/debugfs-driver-habanalabs b/Documentation/ABI/testing/debugfs-driver-habanalabs index 284e2dfa61cd282ccf6e7119abd75402ae3650ea..63c46d9d538fd4e51014d6a1a870bac6a81397df 100644 --- a/Documentation/ABI/testing/debugfs-driver-habanalabs +++ b/Documentation/ABI/testing/debugfs-driver-habanalabs @@ -226,6 +226,12 @@ Description: Gets the state dump occurring on a CS timeout or failure. Writing an integer X discards X state dumps, so that the next read would return X+1-st newest state dump. +What: /sys/kernel/debug/habanalabs/hl/timeout_locked +Date: Sep 2021 +KernelVersion: 5.16 +Contact: obitton@habana.ai +Description: Sets the command submission timeout value in seconds. + What: /sys/kernel/debug/habanalabs/hl/stop_on_err Date: Mar 2020 KernelVersion: 5.6 diff --git a/drivers/misc/habanalabs/Kconfig b/drivers/misc/habanalabs/Kconfig index 293d79811372ce8612efe7f560f5402eb842d4a2..861c81006c6d0261ea51510f2f875c890229b3a0 100644 --- a/drivers/misc/habanalabs/Kconfig +++ b/drivers/misc/habanalabs/Kconfig @@ -8,6 +8,8 @@ config HABANA_AI depends on PCI && HAS_IOMEM select GENERIC_ALLOCATOR select HWMON + select DMA_SHARED_BUFFER + select CRC32 help Enables PCIe card driver for Habana's AI Processors (AIP) that are designed to accelerate Deep Learning inference and training workloads. diff --git a/drivers/misc/habanalabs/common/Makefile b/drivers/misc/habanalabs/common/Makefile index 6ebe3c7001ffe36538c7b113d1d6afc2c748fee6..82c3824cad00e9a053bc7bb8febfee39e7eb3003 100644 --- a/drivers/misc/habanalabs/common/Makefile +++ b/drivers/misc/habanalabs/common/Makefile @@ -11,4 +11,4 @@ HL_COMMON_FILES := common/habanalabs_drv.o common/device.o common/context.o \ common/command_buffer.o common/hw_queue.o common/irq.o \ common/sysfs.o common/hwmon.o common/memory.o \ common/command_submission.o common/firmware_if.o \ - common/state_dump.o + common/state_dump.o common/hwmgr.o diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c index 6dafff375f1c6e614010552e79f5b52292cfb1af..4c8000fd246cddfce14fb3c8cee153a9b076a11d 100644 --- a/drivers/misc/habanalabs/common/command_submission.c +++ b/drivers/misc/habanalabs/common/command_submission.c @@ -143,6 +143,7 @@ static void hl_fence_init(struct hl_fence *fence, u64 sequence) fence->cs_sequence = sequence; fence->error = 0; fence->timestamp = ktime_set(0, 0); + fence->mcs_handling_done = false; init_completion(&fence->completion); } @@ -431,11 +432,10 @@ static void cs_handle_tdr(struct hl_device *hdev, struct hl_cs *cs) /* Don't cancel TDR in case this CS was timedout because we might be * running from the TDR context */ - if (cs && (cs->timedout || - hdev->timeout_jiffies == MAX_SCHEDULE_TIMEOUT)) + if (cs->timedout || hdev->timeout_jiffies == MAX_SCHEDULE_TIMEOUT) return; - if (cs && cs->tdr_active) + if (cs->tdr_active) cancel_delayed_work_sync(&cs->work_tdr); spin_lock(&hdev->cs_mirror_lock); @@ -536,10 +536,21 @@ static void complete_multi_cs(struct hl_device *hdev, struct hl_cs *cs) mcs_compl->timestamp = ktime_to_ns(fence->timestamp); complete_all(&mcs_compl->completion); + + /* + * Setting mcs_handling_done inside the lock ensures + * at least one fence have mcs_handling_done set to + * true before wait for mcs finish. This ensures at + * least one CS will be set as completed when polling + * mcs fences. + */ + fence->mcs_handling_done = true; } spin_unlock(&mcs_compl->lock); } + /* In case CS completed without mcs completion initialized */ + fence->mcs_handling_done = true; } static inline void cs_release_sob_reset_handler(struct hl_device *hdev, @@ -2371,32 +2382,48 @@ static int hl_cs_poll_fences(struct multi_cs_data *mcs_data) break; } - mcs_data->stream_master_qid_map |= fence->stream_master_qid_map; - - if (status == CS_WAIT_STATUS_BUSY) - continue; - - mcs_data->completion_bitmap |= BIT(i); - - /* - * best effort to extract timestamp. few notes: - * - if even single fence is gone we cannot extract timestamp - * (as fence not exist anymore) - * - for all completed CSs we take the earliest timestamp. - * for this we have to validate that: - * 1. given timestamp was indeed set - * 2. the timestamp is earliest of all timestamps so far - */ + switch (status) { + case CS_WAIT_STATUS_BUSY: + /* CS did not finished, keep waiting on its QID*/ + mcs_data->stream_master_qid_map |= + fence->stream_master_qid_map; + break; + case CS_WAIT_STATUS_COMPLETED: + /* + * Using mcs_handling_done to avoid possibility of mcs_data + * returns to user indicating CS completed before it finished + * all of its mcs handling, to avoid race the next time the + * user waits for mcs. + */ + if (!fence->mcs_handling_done) + break; - if (status == CS_WAIT_STATUS_GONE) { + mcs_data->completion_bitmap |= BIT(i); + /* + * For all completed CSs we take the earliest timestamp. + * For this we have to validate that the timestamp is + * earliest of all timestamps so far. + */ + if (mcs_data->update_ts && + (ktime_compare(fence->timestamp, first_cs_time) < 0)) + first_cs_time = fence->timestamp; + break; + case CS_WAIT_STATUS_GONE: mcs_data->update_ts = false; mcs_data->gone_cs = true; - } else if (mcs_data->update_ts && - (ktime_compare(fence->timestamp, - ktime_set(0, 0)) > 0) && - (ktime_compare(fence->timestamp, first_cs_time) < 0)) { - first_cs_time = fence->timestamp; + /* + * It is possible to get an old sequence numbers from user + * which related to already completed CSs and their fences + * already gone. In this case, CS set as completed but + * no need to consider its QID for mcs completion. + */ + mcs_data->completion_bitmap |= BIT(i); + break; + default: + dev_err(hdev->dev, "Invalid fence status\n"); + return -EINVAL; } + } hl_fences_put(mcs_data->fence_arr, arr_len); @@ -2740,13 +2767,14 @@ static int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data) static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, u32 timeout_us, u64 user_address, - u32 target_value, u16 interrupt_offset, - enum hl_cs_wait_status *status) + u64 target_value, u16 interrupt_offset, + enum hl_cs_wait_status *status, + u64 *timestamp) { struct hl_user_pending_interrupt *pend; struct hl_user_interrupt *interrupt; unsigned long timeout, flags; - u32 completion_value; + u64 completion_value; long completion_rc; int rc = 0; @@ -2780,15 +2808,17 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, /* We check for completion value as interrupt could have been received * before we added the node to the wait list */ - if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 4)) { + if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 8)) { dev_err(hdev->dev, "Failed to copy completion value from user\n"); rc = -EFAULT; goto remove_pending_user_interrupt; } - if (completion_value >= target_value) + if (completion_value >= target_value) { *status = CS_WAIT_STATUS_COMPLETED; - else + /* There was no interrupt, we assume the completion is now. */ + pend->fence.timestamp = ktime_get(); + } else *status = CS_WAIT_STATUS_BUSY; if (!timeout_us || (*status == CS_WAIT_STATUS_COMPLETED)) @@ -2812,7 +2842,7 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, reinit_completion(&pend->fence.completion); spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); - if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 4)) { + if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 8)) { dev_err(hdev->dev, "Failed to copy completion value from user\n"); rc = -EFAULT; @@ -2839,6 +2869,8 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, list_del(&pend->wait_list_node); spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); + *timestamp = ktime_to_ns(pend->fence.timestamp); + kfree(pend); hl_ctx_put(ctx); @@ -2852,6 +2884,7 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data) struct asic_fixed_properties *prop; union hl_wait_cs_args *args = data; enum hl_cs_wait_status status; + u64 timestamp; int rc; prop = &hdev->asic_prop; @@ -2881,7 +2914,8 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data) rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, args->in.interrupt_timeout_us, args->in.addr, - args->in.target, interrupt_offset, &status); + args->in.target, interrupt_offset, &status, + ×tamp); if (rc) { if (rc != -EINTR) @@ -2893,6 +2927,11 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data) memset(args, 0, sizeof(*args)); + if (timestamp) { + args->out.timestamp_nsec = timestamp; + args->out.flags |= HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD; + } + switch (status) { case CS_WAIT_STATUS_COMPLETED: args->out.status = HL_WAIT_CS_STATUS_COMPLETED; diff --git a/drivers/misc/habanalabs/common/context.c b/drivers/misc/habanalabs/common/context.c index 22978303ad63f192e6b75810b42d68deb2e3ec60..d0aaccd4df2cdd133f32e1e2a0818c9b596134f2 100644 --- a/drivers/misc/habanalabs/common/context.c +++ b/drivers/misc/habanalabs/common/context.c @@ -181,12 +181,6 @@ int hl_ctx_create(struct hl_device *hdev, struct hl_fpriv *hpriv) return rc; } -void hl_ctx_free(struct hl_device *hdev, struct hl_ctx *ctx) -{ - if (kref_put(&ctx->refcount, hl_ctx_do_release) == 1) - return; -} - int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx) { int rc = 0; @@ -392,7 +386,7 @@ void hl_ctx_mgr_fini(struct hl_device *hdev, struct hl_ctx_mgr *mgr) idp = &mgr->ctx_handles; idr_for_each_entry(idp, ctx, id) - hl_ctx_free(hdev, ctx); + kref_put(&ctx->refcount, hl_ctx_do_release); idr_destroy(&mgr->ctx_handles); mutex_destroy(&mgr->ctx_lock); diff --git a/drivers/misc/habanalabs/common/debugfs.c b/drivers/misc/habanalabs/common/debugfs.c index 985f1f3dbd20f4fe9062995b19a8e338260243b0..1f2a3dc6c4e2f91166844162b791f22acc9a4362 100644 --- a/drivers/misc/habanalabs/common/debugfs.c +++ b/drivers/misc/habanalabs/common/debugfs.c @@ -1167,6 +1167,45 @@ static ssize_t hl_state_dump_write(struct file *f, const char __user *buf, return count; } +static ssize_t hl_timeout_locked_read(struct file *f, char __user *buf, + size_t count, loff_t *ppos) +{ + struct hl_dbg_device_entry *entry = file_inode(f)->i_private; + struct hl_device *hdev = entry->hdev; + char tmp_buf[200]; + ssize_t rc; + + if (*ppos) + return 0; + + sprintf(tmp_buf, "%d\n", + jiffies_to_msecs(hdev->timeout_jiffies) / 1000); + rc = simple_read_from_buffer(buf, strlen(tmp_buf) + 1, ppos, tmp_buf, + strlen(tmp_buf) + 1); + + return rc; +} + +static ssize_t hl_timeout_locked_write(struct file *f, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct hl_dbg_device_entry *entry = file_inode(f)->i_private; + struct hl_device *hdev = entry->hdev; + u32 value; + ssize_t rc; + + rc = kstrtouint_from_user(buf, count, 10, &value); + if (rc) + return rc; + + if (value) + hdev->timeout_jiffies = msecs_to_jiffies(value * 1000); + else + hdev->timeout_jiffies = MAX_SCHEDULE_TIMEOUT; + + return count; +} + static const struct file_operations hl_data32b_fops = { .owner = THIS_MODULE, .read = hl_data_read32, @@ -1240,6 +1279,12 @@ static const struct file_operations hl_state_dump_fops = { .write = hl_state_dump_write }; +static const struct file_operations hl_timeout_locked_fops = { + .owner = THIS_MODULE, + .read = hl_timeout_locked_read, + .write = hl_timeout_locked_write +}; + static const struct hl_info_list hl_debugfs_list[] = { {"command_buffers", command_buffers_show, NULL}, {"command_submission", command_submission_show, NULL}, @@ -1421,6 +1466,12 @@ void hl_debugfs_add_device(struct hl_device *hdev) dev_entry, &hl_state_dump_fops); + debugfs_create_file("timeout_locked", + 0644, + dev_entry->root, + dev_entry, + &hl_timeout_locked_fops); + for (i = 0, entry = dev_entry->entry_arr ; i < count ; i++, entry++) { debugfs_create_file(hl_debugfs_list[i].name, 0444, diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index 97c7c86580e61aab88afe4290c2e3d63f0884585..2022e5d7b3ade9710cb107607e3e4b0a3c80f436 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -69,13 +69,6 @@ static void hpriv_release(struct kref *ref) mutex_destroy(&hpriv->restore_phase_mutex); - mutex_lock(&hdev->fpriv_list_lock); - list_del(&hpriv->dev_node); - hdev->compute_ctx = NULL; - mutex_unlock(&hdev->fpriv_list_lock); - - kfree(hpriv); - if ((!hdev->pldm) && (hdev->pdev) && (!hdev->asic_funcs->is_device_idle(hdev, idle_mask, @@ -87,9 +80,32 @@ static void hpriv_release(struct kref *ref) device_is_idle = false; } + /* We need to remove the user from the list to make sure the reset process won't + * try to kill the user process. Because, if we got here, it means there are no + * more driver/device resources that the user process is occupying so there is + * no need to kill it + * + * However, we can't set the compute_ctx to NULL at this stage. This is to prevent + * a race between the release and opening the device again. We don't want to let + * a user open the device while there a reset is about to happen. + */ + mutex_lock(&hdev->fpriv_list_lock); + list_del(&hpriv->dev_node); + mutex_unlock(&hdev->fpriv_list_lock); + if ((hdev->reset_if_device_not_idle && !device_is_idle) || hdev->reset_upon_device_release) hl_device_reset(hdev, HL_RESET_DEVICE_RELEASE); + + /* Now we can mark the compute_ctx as empty. Even if a reset is running in a different + * thread, we don't care because the in_reset is marked so if a user will try to open + * the device it will fail on that, even if compute_ctx is NULL. + */ + mutex_lock(&hdev->fpriv_list_lock); + hdev->compute_ctx = NULL; + mutex_unlock(&hdev->fpriv_list_lock); + + kfree(hpriv); } void hl_hpriv_get(struct hl_fpriv *hpriv) @@ -530,6 +546,19 @@ static void hl_device_heartbeat(struct work_struct *work) return; reschedule: + /* + * prev_reset_trigger tracks consecutive fatal h/w errors until first + * heartbeat immediately post reset. + * If control reached here, then at least one heartbeat work has been + * scheduled since last reset/init cycle. + * So if the device is not already in reset cycle, reset the flag + * prev_reset_trigger as no reset occurred with HL_RESET_FW_FATAL_ERR + * status for at least one heartbeat. From this point driver restarts + * tracking future consecutive fatal errors. + */ + if (!(atomic_read(&hdev->in_reset))) + hdev->prev_reset_trigger = HL_RESET_TRIGGER_DEFAULT; + schedule_delayed_work(&hdev->work_heartbeat, usecs_to_jiffies(HL_HEARTBEAT_PER_USEC)); } @@ -909,6 +938,65 @@ static void device_disable_open_processes(struct hl_device *hdev) mutex_unlock(&hdev->fpriv_list_lock); } +static void handle_reset_trigger(struct hl_device *hdev, u32 flags) +{ + u32 cur_reset_trigger = HL_RESET_TRIGGER_DEFAULT; + + /* + * 'reset cause' is being updated here, because getting here + * means that it's the 1st time and the last time we're here + * ('in_reset' makes sure of it). This makes sure that + * 'reset_cause' will continue holding its 1st recorded reason! + */ + if (flags & HL_RESET_HEARTBEAT) { + hdev->curr_reset_cause = HL_RESET_CAUSE_HEARTBEAT; + cur_reset_trigger = HL_RESET_HEARTBEAT; + } else if (flags & HL_RESET_TDR) { + hdev->curr_reset_cause = HL_RESET_CAUSE_TDR; + cur_reset_trigger = HL_RESET_TDR; + } else if (flags & HL_RESET_FW_FATAL_ERR) { + hdev->curr_reset_cause = HL_RESET_CAUSE_UNKNOWN; + cur_reset_trigger = HL_RESET_FW_FATAL_ERR; + } else { + hdev->curr_reset_cause = HL_RESET_CAUSE_UNKNOWN; + } + + /* + * If reset cause is same twice, then reset_trigger_repeated + * is set and if this reset is due to a fatal FW error + * device is set to an unstable state. + */ + if (hdev->prev_reset_trigger != cur_reset_trigger) { + hdev->prev_reset_trigger = cur_reset_trigger; + hdev->reset_trigger_repeated = 0; + } else { + hdev->reset_trigger_repeated = 1; + } + + /* If reset is due to heartbeat, device CPU is no responsive in + * which case no point sending PCI disable message to it. + * + * If F/W is performing the reset, no need to send it a message to disable + * PCI access + */ + if ((flags & HL_RESET_HARD) && + !(flags & (HL_RESET_HEARTBEAT | HL_RESET_FW))) { + /* Disable PCI access from device F/W so he won't send + * us additional interrupts. We disable MSI/MSI-X at + * the halt_engines function and we can't have the F/W + * sending us interrupts after that. We need to disable + * the access here because if the device is marked + * disable, the message won't be send. Also, in case + * of heartbeat, the device CPU is marked as disable + * so this message won't be sent + */ + if (hl_fw_send_pci_access_msg(hdev, + CPUCP_PACKET_DISABLE_PCI_ACCESS)) + dev_warn(hdev->dev, + "Failed to disable PCI access by F/W\n"); + } +} + /* * hl_device_reset - reset the device * @@ -954,7 +1042,7 @@ int hl_device_reset(struct hl_device *hdev, u32 flags) goto do_reset; } - if (!hard_reset && !hdev->allow_external_soft_reset) { + if (!hard_reset && !hdev->allow_inference_soft_reset) { hard_instead_soft = true; hard_reset = true; } @@ -978,47 +1066,21 @@ int hl_device_reset(struct hl_device *hdev, u32 flags) if (rc) return 0; - /* - * 'reset cause' is being updated here, because getting here - * means that it's the 1st time and the last time we're here - * ('in_reset' makes sure of it). This makes sure that - * 'reset_cause' will continue holding its 1st recorded reason! - */ - if (flags & HL_RESET_HEARTBEAT) - hdev->curr_reset_cause = HL_RESET_CAUSE_HEARTBEAT; - else if (flags & HL_RESET_TDR) - hdev->curr_reset_cause = HL_RESET_CAUSE_TDR; - else - hdev->curr_reset_cause = HL_RESET_CAUSE_UNKNOWN; - - /* If reset is due to heartbeat, device CPU is no responsive in - * which case no point sending PCI disable message to it. - * - * If F/W is performing the reset, no need to send it a message to disable - * PCI access - */ - if (hard_reset && !(flags & (HL_RESET_HEARTBEAT | HL_RESET_FW))) { - /* Disable PCI access from device F/W so he won't send - * us additional interrupts. We disable MSI/MSI-X at - * the halt_engines function and we can't have the F/W - * sending us interrupts after that. We need to disable - * the access here because if the device is marked - * disable, the message won't be send. Also, in case - * of heartbeat, the device CPU is marked as disable - * so this message won't be sent - */ - if (hl_fw_send_pci_access_msg(hdev, - CPUCP_PACKET_DISABLE_PCI_ACCESS)) - dev_warn(hdev->dev, - "Failed to disable PCI access by F/W\n"); - } + handle_reset_trigger(hdev, flags); /* This also blocks future CS/VM/JOB completion operations */ hdev->disabled = true; take_release_locks(hdev); - dev_err(hdev->dev, "Going to RESET device!\n"); + if (hard_reset) + dev_info(hdev->dev, "Going to reset device\n"); + else if (flags & HL_RESET_DEVICE_RELEASE) + dev_info(hdev->dev, + "Going to reset device after it was released by user\n"); + else + dev_info(hdev->dev, + "Going to reset compute engines of inference device\n"); } again: @@ -1108,6 +1170,17 @@ int hl_device_reset(struct hl_device *hdev, u32 flags) hdev->device_cpu_disabled = false; hdev->hard_reset_pending = false; + if (hdev->reset_trigger_repeated && + (hdev->prev_reset_trigger == HL_RESET_FW_FATAL_ERR)) { + /* if there 2 back to back resets from FW, + * ensure driver puts the driver in a unusable state + */ + dev_crit(hdev->dev, + "Consecutive FW fatal errors received, stopping hard reset\n"); + rc = -EIO; + goto out_err; + } + if (hdev->kernel_ctx) { dev_crit(hdev->dev, "kernel ctx was alive during hard reset, something is terribly wrong\n"); diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c index 8d2568c63f19e8984eecb7c8705417d2db86a19a..4e68fb9d2a6bd59cad14dc6e525acae39fdc19a8 100644 --- a/drivers/misc/habanalabs/common/firmware_if.c +++ b/drivers/misc/habanalabs/common/firmware_if.c @@ -2162,18 +2162,17 @@ static void hl_fw_linux_update_state(struct hl_device *hdev, } /** - * hl_fw_dynamic_report_reset_cause - send a COMMS message with the cause - * of the newly triggered hard reset + * hl_fw_dynamic_send_msg - send a COMMS message with attached data * * @hdev: pointer to the habanalabs device structure * @fw_loader: managing structure for loading device's FW - * @reset_cause: enumerated cause for the recent hard reset + * @msg_type: message type + * @data: data to be sent * * @return 0 on success, otherwise non-zero error code */ -static int hl_fw_dynamic_report_reset_cause(struct hl_device *hdev, - struct fw_load_mgr *fw_loader, - enum comms_reset_cause reset_cause) +static int hl_fw_dynamic_send_msg(struct hl_device *hdev, + struct fw_load_mgr *fw_loader, u8 msg_type, void *data) { struct lkd_msg_comms msg; int rc; @@ -2181,11 +2180,20 @@ static int hl_fw_dynamic_report_reset_cause(struct hl_device *hdev, memset(&msg, 0, sizeof(msg)); /* create message to be sent */ - msg.header.type = HL_COMMS_RESET_CAUSE_TYPE; + msg.header.type = msg_type; msg.header.size = cpu_to_le16(sizeof(struct comms_msg_header)); msg.header.magic = cpu_to_le32(HL_COMMS_MSG_MAGIC); - msg.reset_cause = reset_cause; + switch (msg_type) { + case HL_COMMS_RESET_CAUSE_TYPE: + msg.reset_cause = *(__u8 *) data; + break; + default: + dev_err(hdev->dev, + "Send COMMS message - invalid message type %u\n", + msg_type); + return -EINVAL; + } rc = hl_fw_dynamic_request_descriptor(hdev, fw_loader, sizeof(struct lkd_msg_comms)); @@ -2252,8 +2260,8 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev, goto protocol_err; if (hdev->curr_reset_cause) { - rc = hl_fw_dynamic_report_reset_cause(hdev, fw_loader, - hdev->curr_reset_cause); + rc = hl_fw_dynamic_send_msg(hdev, fw_loader, + HL_COMMS_RESET_CAUSE_TYPE, &hdev->curr_reset_cause); if (rc) goto protocol_err; diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index bebebcb163ee84ece5a441e22cc460f3f510d03c..a2002cbf794b57a554918c1c3c0dd326a29353eb 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -26,6 +26,7 @@ #include #include #include +#include #define HL_NAME "habanalabs" @@ -68,6 +69,9 @@ #define HL_STATE_DUMP_HIST_LEN 5 +/* Default value for device reset trigger , an invalid value */ +#define HL_RESET_TRIGGER_DEFAULT 0xFF + #define OBJ_NAMES_HASH_TABLE_BITS 7 /* 1 << 7 buckets */ #define SYNC_TO_ENGINE_HASH_TABLE_BITS 7 /* 1 << 7 buckets */ @@ -132,13 +136,18 @@ enum hl_mmu_page_table_location { * - HL_RESET_FW * F/W will perform the reset. No need to ask it to reset the device. This is relevant * only when running with secured f/w + * + * - HL_RESET_FW_FATAL_ERR + * Set if reset is due to a fatal error from FW */ + #define HL_RESET_HARD (1 << 0) #define HL_RESET_FROM_RESET_THREAD (1 << 1) #define HL_RESET_HEARTBEAT (1 << 2) #define HL_RESET_TDR (1 << 3) #define HL_RESET_DEVICE_RELEASE (1 << 4) #define HL_RESET_FW (1 << 5) +#define HL_RESET_FW_FATAL_ERR (1 << 6) #define HL_MAX_SOBS_PER_MONITOR 8 @@ -447,6 +456,9 @@ struct hl_hints_range { * for hints validity check. * device_dma_offset_for_host_access: the offset to add to host DMA addresses * to enable the device to access them. + * @max_freq_value: current max clk frequency. + * @clk_pll_index: clock PLL index that specify which PLL determines the clock + * we display to the user * @mmu_pgt_size: MMU page tables total size. * @mmu_pte_size: PTE size in MMU page tables. * @mmu_hop_table_size: MMU hop table size. @@ -543,6 +555,8 @@ struct asic_fixed_properties { u64 cb_va_end_addr; u64 dram_hints_align_mask; u64 device_dma_offset_for_host_access; + u64 max_freq_value; + u32 clk_pll_index; u32 mmu_pgt_size; u32 mmu_pte_size; u32 mmu_hop_table_size; @@ -601,6 +615,9 @@ struct asic_fixed_properties { * masters QIDs that multi cs is waiting on * @error: mark this fence with error * @timestamp: timestamp upon completion + * @mcs_handling_done: indicates that corresponding command submission has + * finished msc handling, this does not mean it was part + * of the mcs */ struct hl_fence { struct completion completion; @@ -609,6 +626,7 @@ struct hl_fence { u32 stream_master_qid_map; int error; ktime_t timestamp; + u8 mcs_handling_done; }; /** @@ -1352,6 +1370,23 @@ struct hl_cs_counters_atomic { atomic64_t validation_drop_cnt; }; +/** + * struct hl_dmabuf_priv - a dma-buf private object. + * @dmabuf: pointer to dma-buf object. + * @ctx: pointer to the dma-buf owner's context. + * @phys_pg_pack: pointer to physical page pack if the dma-buf was exported for + * memory allocation handle. + * @device_address: physical address of the device's memory. Relevant only + * if phys_pg_pack is NULL (dma-buf was exported from address). + * The total size can be taken from the dmabuf object. + */ +struct hl_dmabuf_priv { + struct dma_buf *dmabuf; + struct hl_ctx *ctx; + struct hl_vm_phys_pg_pack *phys_pg_pack; + uint64_t device_address; +}; + /** * struct hl_ctx - user/kernel context. * @mem_hash: holds mapping from virtual address to virtual memory area @@ -1662,6 +1697,7 @@ struct hl_vm_hw_block_list_node { * @npages: num physical pages in the pack. * @total_size: total size of all the pages in this list. * @mapping_cnt: number of shared mappings. + * @exporting_cnt: number of dma-buf exporting. * @asid: the context related to this list. * @page_size: size of each page in the pack. * @flags: HL_MEM_* flags related to this list. @@ -1676,6 +1712,7 @@ struct hl_vm_phys_pg_pack { u64 npages; u64 total_size; atomic_t mapping_cnt; + u32 exporting_cnt; u32 asid; u32 page_size; u32 flags; @@ -2396,6 +2433,7 @@ struct multi_cs_data { * the error will be ignored by the driver during * device initialization. Mainly used to debug and * workaround firmware bugs + * @dram_pci_bar_start: start bus address of PCIe bar towards DRAM. * @last_successful_open_jif: timestamp (jiffies) of the last successful * device open. * @last_open_session_duration_jif: duration (jiffies) of the last device open @@ -2440,8 +2478,12 @@ struct multi_cs_data { * @collective_mon_idx: helper index for collective initialization * @supports_coresight: is CoreSight supported. * @supports_soft_reset: is soft reset supported. - * @allow_external_soft_reset: true if soft reset initiated by user or TDR is - * allowed. + * @allow_inference_soft_reset: true if the ASIC supports soft reset that is + * initiated by user or TDR. This is only true + * in inference ASICs, as there is no real-world + * use-case of doing soft-reset in training (due + * to the fact that training runs on multiple + * devices) * @supports_cb_mapping: is mapping a CB to the device's MMU supported. * @needs_reset: true if reset_on_lockup is false and device should be reset * due to lockup. @@ -2452,6 +2494,10 @@ struct multi_cs_data { * @supports_staged_submission: true if staged submissions are supported * @curr_reset_cause: saves an enumerated reset cause when a hard reset is * triggered, and cleared after it is shared with preboot. + * @prev_reset_trigger: saves the previous trigger which caused a reset, overidden + * with a new value on next reset + * @reset_trigger_repeated: set if device reset is triggered more than once with + * same cause. * @skip_reset_on_timeout: Skip device reset if CS has timed out, wait for it to * complete instead. * @device_cpu_is_halted: Flag to indicate whether the device CPU was already @@ -2537,6 +2583,7 @@ struct hl_device { u64 max_power; u64 clock_gating_mask; u64 boot_error_status_mask; + u64 dram_pci_bar_start; u64 last_successful_open_jif; u64 last_open_session_duration_jif; u64 open_counter; @@ -2572,13 +2619,15 @@ struct hl_device { u8 collective_mon_idx; u8 supports_coresight; u8 supports_soft_reset; - u8 allow_external_soft_reset; + u8 allow_inference_soft_reset; u8 supports_cb_mapping; u8 needs_reset; u8 process_kill_trial_cnt; u8 device_fini_pending; u8 supports_staged_submission; u8 curr_reset_cause; + u8 prev_reset_trigger; + u8 reset_trigger_repeated; u8 skip_reset_on_timeout; u8 device_cpu_is_halted; u8 supports_wait_for_multi_cs; @@ -2956,6 +3005,15 @@ int hl_set_voltage(struct hl_device *hdev, int sensor_index, u32 attr, long value); int hl_set_current(struct hl_device *hdev, int sensor_index, u32 attr, long value); +int hl_set_power(struct hl_device *hdev, + int sensor_index, u32 attr, long value); +int hl_get_power(struct hl_device *hdev, + int sensor_index, u32 attr, long *value); +int hl_get_clk_rate(struct hl_device *hdev, + u32 *cur_clk, u32 *max_clk); +void hl_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq); +void hl_add_device_attr(struct hl_device *hdev, + struct attribute_group *dev_attr_grp); void hw_sob_get(struct hl_hw_sob *hw_sob); void hw_sob_put(struct hl_hw_sob *hw_sob); void hl_encaps_handle_do_release(struct kref *ref); diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c index a75e4fceb9d85156fbf42ac04e97c5f2e550f775..949d1b5c5c41cc86c01c7b0ca623fa4e67ec3e89 100644 --- a/drivers/misc/habanalabs/common/habanalabs_drv.c +++ b/drivers/misc/habanalabs/common/habanalabs_drv.c @@ -225,6 +225,17 @@ int hl_device_open_ctrl(struct inode *inode, struct file *filp) if (!hpriv) return -ENOMEM; + /* Prevent other routines from reading partial hpriv data by + * initializing hpriv fields before inserting it to the list + */ + hpriv->hdev = hdev; + filp->private_data = hpriv; + hpriv->filp = filp; + hpriv->is_control = true; + nonseekable_open(inode, filp); + + hpriv->taskpid = find_get_pid(current->pid); + mutex_lock(&hdev->fpriv_list_lock); if (!hl_device_operational(hdev, NULL)) { @@ -238,19 +249,15 @@ int hl_device_open_ctrl(struct inode *inode, struct file *filp) list_add(&hpriv->dev_node, &hdev->fpriv_list); mutex_unlock(&hdev->fpriv_list_lock); - hpriv->hdev = hdev; - filp->private_data = hpriv; - hpriv->filp = filp; - hpriv->is_control = true; - nonseekable_open(inode, filp); - - hpriv->taskpid = find_get_pid(current->pid); - return 0; out_err: mutex_unlock(&hdev->fpriv_list_lock); + filp->private_data = NULL; + put_pid(hpriv->taskpid); + kfree(hpriv); + return rc; } @@ -339,6 +346,7 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev, set_driver_behavior_per_device(hdev); hdev->curr_reset_cause = HL_RESET_CAUSE_UNKNOWN; + hdev->prev_reset_trigger = HL_RESET_TRIGGER_DEFAULT; if (timeout_locked) hdev->timeout_jiffies = msecs_to_jiffies(timeout_locked * 1000); diff --git a/drivers/misc/habanalabs/gaudi/gaudi_hwmgr.c b/drivers/misc/habanalabs/common/hwmgr.c similarity index 61% rename from drivers/misc/habanalabs/gaudi/gaudi_hwmgr.c rename to drivers/misc/habanalabs/common/hwmgr.c index 9b60eadd4c355ee8ccd55ce1a37169dada71e915..5451019f143fac59213cedc9dba2b453ba20eadb 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi_hwmgr.c +++ b/drivers/misc/habanalabs/common/hwmgr.c @@ -1,29 +1,26 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright 2016-2018 HabanaLabs, Ltd. + * Copyright 2019-2021 HabanaLabs, Ltd. * All Rights Reserved. */ -#include "gaudiP.h" -#include "../include/gaudi/gaudi_fw_if.h" +#include "habanalabs.h" -void gaudi_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq) +void hl_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq) { - struct gaudi_device *gaudi = hdev->asic_specific; - - if (freq == PLL_LAST) - hl_set_frequency(hdev, HL_GAUDI_MME_PLL, gaudi->max_freq_value); + hl_set_frequency(hdev, hdev->asic_prop.clk_pll_index, + hdev->asic_prop.max_freq_value); } -int gaudi_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk) +int hl_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk) { long value; if (!hl_device_operational(hdev, NULL)) return -ENODEV; - value = hl_get_frequency(hdev, HL_GAUDI_MME_PLL, false); + value = hl_get_frequency(hdev, hdev->asic_prop.clk_pll_index, false); if (value < 0) { dev_err(hdev->dev, "Failed to retrieve device max clock %ld\n", @@ -33,7 +30,7 @@ int gaudi_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk) *max_clk = (value / 1000 / 1000); - value = hl_get_frequency(hdev, HL_GAUDI_MME_PLL, true); + value = hl_get_frequency(hdev, hdev->asic_prop.clk_pll_index, true); if (value < 0) { dev_err(hdev->dev, @@ -51,15 +48,14 @@ static ssize_t clk_max_freq_mhz_show(struct device *dev, struct device_attribute *attr, char *buf) { struct hl_device *hdev = dev_get_drvdata(dev); - struct gaudi_device *gaudi = hdev->asic_specific; long value; if (!hl_device_operational(hdev, NULL)) return -ENODEV; - value = hl_get_frequency(hdev, HL_GAUDI_MME_PLL, false); + value = hl_get_frequency(hdev, hdev->asic_prop.clk_pll_index, false); - gaudi->max_freq_value = value; + hdev->asic_prop.max_freq_value = value; return sprintf(buf, "%lu\n", (value / 1000 / 1000)); } @@ -68,7 +64,6 @@ static ssize_t clk_max_freq_mhz_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct hl_device *hdev = dev_get_drvdata(dev); - struct gaudi_device *gaudi = hdev->asic_specific; int rc; u64 value; @@ -83,9 +78,10 @@ static ssize_t clk_max_freq_mhz_store(struct device *dev, goto fail; } - gaudi->max_freq_value = value * 1000 * 1000; + hdev->asic_prop.max_freq_value = value * 1000 * 1000; - hl_set_frequency(hdev, HL_GAUDI_MME_PLL, gaudi->max_freq_value); + hl_set_frequency(hdev, hdev->asic_prop.clk_pll_index, + hdev->asic_prop.max_freq_value); fail: return count; @@ -100,7 +96,7 @@ static ssize_t clk_cur_freq_mhz_show(struct device *dev, if (!hl_device_operational(hdev, NULL)) return -ENODEV; - value = hl_get_frequency(hdev, HL_GAUDI_MME_PLL, true); + value = hl_get_frequency(hdev, hdev->asic_prop.clk_pll_index, true); return sprintf(buf, "%lu\n", (value / 1000 / 1000)); } @@ -108,14 +104,14 @@ static ssize_t clk_cur_freq_mhz_show(struct device *dev, static DEVICE_ATTR_RW(clk_max_freq_mhz); static DEVICE_ATTR_RO(clk_cur_freq_mhz); -static struct attribute *gaudi_dev_attrs[] = { +static struct attribute *hl_dev_attrs[] = { &dev_attr_clk_max_freq_mhz.attr, &dev_attr_clk_cur_freq_mhz.attr, NULL, }; -void gaudi_add_device_attr(struct hl_device *hdev, +void hl_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_attr_grp) { - dev_attr_grp->attrs = gaudi_dev_attrs; + dev_attr_grp->attrs = hl_dev_attrs; } diff --git a/drivers/misc/habanalabs/common/hwmon.c b/drivers/misc/habanalabs/common/hwmon.c index 6b421d76b3111dfac9edc4e44d16db5cf6a7c3ab..e33f65be8a00f3fe80a38a1f468d37f3cced19ea 100644 --- a/drivers/misc/habanalabs/common/hwmon.c +++ b/drivers/misc/habanalabs/common/hwmon.c @@ -113,6 +113,9 @@ static int hl_read(struct device *dev, enum hwmon_sensor_types type, { struct hl_device *hdev = dev_get_drvdata(dev); int rc; + u32 cpucp_attr; + bool use_cpucp_enum = (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 & + CPU_BOOT_DEV_STS0_MAP_HWMON_EN) ? true : false; if (!hl_device_operational(hdev, NULL)) return -ENODEV; @@ -121,65 +124,134 @@ static int hl_read(struct device *dev, enum hwmon_sensor_types type, case hwmon_temp: switch (attr) { case hwmon_temp_input: + cpucp_attr = cpucp_temp_input; + break; case hwmon_temp_max: + cpucp_attr = cpucp_temp_max; + break; case hwmon_temp_crit: + cpucp_attr = cpucp_temp_crit; + break; case hwmon_temp_max_hyst: + cpucp_attr = cpucp_temp_max_hyst; + break; case hwmon_temp_crit_hyst: + cpucp_attr = cpucp_temp_crit_hyst; + break; case hwmon_temp_offset: + cpucp_attr = cpucp_temp_offset; + break; case hwmon_temp_highest: + cpucp_attr = cpucp_temp_highest; break; default: return -EINVAL; } - rc = hl_get_temperature(hdev, channel, attr, val); + if (use_cpucp_enum) + rc = hl_get_temperature(hdev, channel, cpucp_attr, val); + else + rc = hl_get_temperature(hdev, channel, attr, val); break; case hwmon_in: switch (attr) { case hwmon_in_input: + cpucp_attr = cpucp_in_input; + break; case hwmon_in_min: + cpucp_attr = cpucp_in_min; + break; case hwmon_in_max: + cpucp_attr = cpucp_in_max; + break; case hwmon_in_highest: + cpucp_attr = cpucp_in_highest; break; default: return -EINVAL; } - rc = hl_get_voltage(hdev, channel, attr, val); + if (use_cpucp_enum) + rc = hl_get_voltage(hdev, channel, cpucp_attr, val); + else + rc = hl_get_voltage(hdev, channel, attr, val); break; case hwmon_curr: switch (attr) { case hwmon_curr_input: + cpucp_attr = cpucp_curr_input; + break; case hwmon_curr_min: + cpucp_attr = cpucp_curr_min; + break; case hwmon_curr_max: + cpucp_attr = cpucp_curr_max; + break; case hwmon_curr_highest: + cpucp_attr = cpucp_curr_highest; break; default: return -EINVAL; } - rc = hl_get_current(hdev, channel, attr, val); + if (use_cpucp_enum) + rc = hl_get_current(hdev, channel, cpucp_attr, val); + else + rc = hl_get_current(hdev, channel, attr, val); break; case hwmon_fan: switch (attr) { case hwmon_fan_input: + cpucp_attr = cpucp_fan_input; + break; case hwmon_fan_min: + cpucp_attr = cpucp_fan_min; + break; case hwmon_fan_max: + cpucp_attr = cpucp_fan_max; break; default: return -EINVAL; } - rc = hl_get_fan_speed(hdev, channel, attr, val); + + if (use_cpucp_enum) + rc = hl_get_fan_speed(hdev, channel, cpucp_attr, val); + else + rc = hl_get_fan_speed(hdev, channel, attr, val); break; case hwmon_pwm: switch (attr) { case hwmon_pwm_input: + cpucp_attr = cpucp_pwm_input; + break; case hwmon_pwm_enable: + cpucp_attr = cpucp_pwm_enable; + break; + default: + return -EINVAL; + } + + if (use_cpucp_enum) + rc = hl_get_pwm_info(hdev, channel, cpucp_attr, val); + else + rc = hl_get_pwm_info(hdev, channel, attr, val); + break; + case hwmon_power: + switch (attr) { + case hwmon_power_input: + cpucp_attr = CPUCP_POWER_INPUT; + break; + case hwmon_power_input_highest: + cpucp_attr = CPUCP_POWER_INPUT_HIGHEST; break; default: return -EINVAL; } - rc = hl_get_pwm_info(hdev, channel, attr, val); + + if (use_cpucp_enum) + rc = hl_get_power(hdev, channel, cpucp_attr, val); + else + rc = hl_get_power(hdev, channel, attr, val); break; default: return -EINVAL; @@ -191,6 +263,9 @@ static int hl_write(struct device *dev, enum hwmon_sensor_types type, u32 attr, int channel, long val) { struct hl_device *hdev = dev_get_drvdata(dev); + u32 cpucp_attr; + bool use_cpucp_enum = (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 & + CPU_BOOT_DEV_STS0_MAP_HWMON_EN) ? true : false; if (!hl_device_operational(hdev, NULL)) return -ENODEV; @@ -199,40 +274,78 @@ static int hl_write(struct device *dev, enum hwmon_sensor_types type, case hwmon_temp: switch (attr) { case hwmon_temp_offset: + cpucp_attr = cpucp_temp_offset; + break; case hwmon_temp_reset_history: + cpucp_attr = cpucp_temp_reset_history; break; default: return -EINVAL; } - hl_set_temperature(hdev, channel, attr, val); + + if (use_cpucp_enum) + hl_set_temperature(hdev, channel, cpucp_attr, val); + else + hl_set_temperature(hdev, channel, attr, val); break; case hwmon_pwm: switch (attr) { case hwmon_pwm_input: + cpucp_attr = cpucp_pwm_input; + break; case hwmon_pwm_enable: + cpucp_attr = cpucp_pwm_enable; break; default: return -EINVAL; } - hl_set_pwm_info(hdev, channel, attr, val); + + if (use_cpucp_enum) + hl_set_pwm_info(hdev, channel, cpucp_attr, val); + else + hl_set_pwm_info(hdev, channel, attr, val); break; case hwmon_in: switch (attr) { case hwmon_in_reset_history: + cpucp_attr = cpucp_in_reset_history; break; default: return -EINVAL; } - hl_set_voltage(hdev, channel, attr, val); + + if (use_cpucp_enum) + hl_set_voltage(hdev, channel, cpucp_attr, val); + else + hl_set_voltage(hdev, channel, attr, val); break; case hwmon_curr: switch (attr) { case hwmon_curr_reset_history: + cpucp_attr = cpucp_curr_reset_history; break; default: return -EINVAL; } - hl_set_current(hdev, channel, attr, val); + + if (use_cpucp_enum) + hl_set_current(hdev, channel, cpucp_attr, val); + else + hl_set_current(hdev, channel, attr, val); + break; + case hwmon_power: + switch (attr) { + case hwmon_power_reset_history: + cpucp_attr = CPUCP_POWER_RESET_INPUT_HISTORY; + break; + default: + return -EINVAL; + } + + if (use_cpucp_enum) + hl_set_power(hdev, channel, cpucp_attr, val); + else + hl_set_power(hdev, channel, attr, val); break; default: return -EINVAL; @@ -296,6 +409,15 @@ static umode_t hl_is_visible(const void *data, enum hwmon_sensor_types type, return 0644; } break; + case hwmon_power: + switch (attr) { + case hwmon_power_input: + case hwmon_power_input_highest: + return 0444; + case hwmon_power_reset_history: + return 0200; + } + break; default: break; } @@ -551,6 +673,60 @@ int hl_set_current(struct hl_device *hdev, return rc; } +int hl_set_power(struct hl_device *hdev, + int sensor_index, u32 attr, long value) +{ + struct cpucp_packet pkt; + int rc; + + memset(&pkt, 0, sizeof(pkt)); + + pkt.ctl = cpu_to_le32(CPUCP_PACKET_POWER_GET << + CPUCP_PKT_CTL_OPCODE_SHIFT); + pkt.sensor_index = __cpu_to_le16(sensor_index); + pkt.type = __cpu_to_le16(attr); + pkt.value = __cpu_to_le64(value); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), + 0, NULL); + + if (rc) + dev_err(hdev->dev, + "Failed to set power of sensor %d, error %d\n", + sensor_index, rc); + + return rc; +} + +int hl_get_power(struct hl_device *hdev, + int sensor_index, u32 attr, long *value) +{ + struct cpucp_packet pkt; + u64 result; + int rc; + + memset(&pkt, 0, sizeof(pkt)); + + pkt.ctl = cpu_to_le32(CPUCP_PACKET_POWER_GET << + CPUCP_PKT_CTL_OPCODE_SHIFT); + pkt.sensor_index = __cpu_to_le16(sensor_index); + pkt.type = __cpu_to_le16(attr); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), + 0, &result); + + *value = (long) result; + + if (rc) { + dev_err(hdev->dev, + "Failed to get power of sensor %d, error %d\n", + sensor_index, rc); + *value = 0; + } + + return rc; +} + int hl_hwmon_init(struct hl_device *hdev) { struct device *dev = hdev->pdev ? &hdev->pdev->dev : hdev->dev; diff --git a/drivers/misc/habanalabs/common/irq.c b/drivers/misc/habanalabs/common/irq.c index 39b14a93339314feb8ee4b0b2903027515dfc4e8..96d82b6826745ad9b13908df334ea512c7fd477d 100644 --- a/drivers/misc/habanalabs/common/irq.c +++ b/drivers/misc/habanalabs/common/irq.c @@ -141,10 +141,13 @@ static void handle_user_cq(struct hl_device *hdev, struct hl_user_interrupt *user_cq) { struct hl_user_pending_interrupt *pend; + ktime_t now = ktime_get(); spin_lock(&user_cq->wait_list_lock); - list_for_each_entry(pend, &user_cq->wait_list_head, wait_list_node) + list_for_each_entry(pend, &user_cq->wait_list_head, wait_list_node) { + pend->fence.timestamp = now; complete_all(&pend->fence.completion); + } spin_unlock(&user_cq->wait_list_lock); } diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c index 33986933aa9ea658cc6010cbd7db681c8b409cee..504973330e2e5935987bcbe9c0e8fdf5358e28ce 100644 --- a/drivers/misc/habanalabs/common/memory.c +++ b/drivers/misc/habanalabs/common/memory.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright 2016-2019 HabanaLabs, Ltd. + * Copyright 2016-2021 HabanaLabs, Ltd. * All Rights Reserved. */ @@ -11,6 +11,7 @@ #include #include +#include #define HL_MMU_DEBUG 0 @@ -347,6 +348,12 @@ static int free_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args) return -EINVAL; } + if (phys_pg_pack->exporting_cnt) { + dev_dbg(hdev->dev, "handle %u is exported, cannot free\n", handle); + spin_unlock(&vm->idr_lock); + return -EINVAL; + } + /* * must remove from idr before the freeing of the physical * pages as the refcount of the pool is also the trigger of the @@ -1487,13 +1494,487 @@ int hl_hw_block_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma) return 0; } +static int set_dma_sg(struct scatterlist *sg, u64 bar_address, u64 chunk_size, + struct device *dev, enum dma_data_direction dir) +{ + dma_addr_t addr; + int rc; + + addr = dma_map_resource(dev, bar_address, chunk_size, dir, + DMA_ATTR_SKIP_CPU_SYNC); + rc = dma_mapping_error(dev, addr); + if (rc) + return rc; + + sg_set_page(sg, NULL, chunk_size, 0); + sg_dma_address(sg) = addr; + sg_dma_len(sg) = chunk_size; + + return 0; +} + +static struct sg_table *alloc_sgt_from_device_pages(struct hl_device *hdev, u64 *pages, u64 npages, + u64 page_size, struct device *dev, + enum dma_data_direction dir) +{ + u64 chunk_size, bar_address, dma_max_seg_size; + struct asic_fixed_properties *prop; + int rc, i, j, nents, cur_page; + struct scatterlist *sg; + struct sg_table *sgt; + + prop = &hdev->asic_prop; + + dma_max_seg_size = dma_get_max_seg_size(dev); + + /* We would like to align the max segment size to PAGE_SIZE, so the + * SGL will contain aligned addresses that can be easily mapped to + * an MMU + */ + dma_max_seg_size = ALIGN_DOWN(dma_max_seg_size, PAGE_SIZE); + if (dma_max_seg_size < PAGE_SIZE) { + dev_err_ratelimited(hdev->dev, + "dma_max_seg_size %llu can't be smaller than PAGE_SIZE\n", + dma_max_seg_size); + return ERR_PTR(-EINVAL); + } + + sgt = kzalloc(sizeof(*sgt), GFP_KERNEL); + if (!sgt) + return ERR_PTR(-ENOMEM); + + /* If the size of each page is larger than the dma max segment size, + * then we can't combine pages and the number of entries in the SGL + * will just be the + * * + */ + if (page_size > dma_max_seg_size) + nents = npages * DIV_ROUND_UP_ULL(page_size, dma_max_seg_size); + else + /* Get number of non-contiguous chunks */ + for (i = 1, nents = 1, chunk_size = page_size ; i < npages ; i++) { + if (pages[i - 1] + page_size != pages[i] || + chunk_size + page_size > dma_max_seg_size) { + nents++; + chunk_size = page_size; + continue; + } + + chunk_size += page_size; + } + + rc = sg_alloc_table(sgt, nents, GFP_KERNEL | __GFP_ZERO); + if (rc) + goto error_free; + + cur_page = 0; + + if (page_size > dma_max_seg_size) { + u64 size_left, cur_device_address = 0; + + size_left = page_size; + + /* Need to split each page into the number of chunks of + * dma_max_seg_size + */ + for_each_sgtable_dma_sg(sgt, sg, i) { + if (size_left == page_size) + cur_device_address = + pages[cur_page] - prop->dram_base_address; + else + cur_device_address += dma_max_seg_size; + + chunk_size = min(size_left, dma_max_seg_size); + + bar_address = hdev->dram_pci_bar_start + cur_device_address; + + rc = set_dma_sg(sg, bar_address, chunk_size, dev, dir); + if (rc) + goto error_unmap; + + if (size_left > dma_max_seg_size) { + size_left -= dma_max_seg_size; + } else { + cur_page++; + size_left = page_size; + } + } + } else { + /* Merge pages and put them into the scatterlist */ + for_each_sgtable_dma_sg(sgt, sg, i) { + chunk_size = page_size; + for (j = cur_page + 1 ; j < npages ; j++) { + if (pages[j - 1] + page_size != pages[j] || + chunk_size + page_size > dma_max_seg_size) + break; + + chunk_size += page_size; + } + + bar_address = hdev->dram_pci_bar_start + + (pages[cur_page] - prop->dram_base_address); + + rc = set_dma_sg(sg, bar_address, chunk_size, dev, dir); + if (rc) + goto error_unmap; + + cur_page = j; + } + } + + /* Because we are not going to include a CPU list we want to have some + * chance that other users will detect this by setting the orig_nents + * to 0 and using only nents (length of DMA list) when going over the + * sgl + */ + sgt->orig_nents = 0; + + return sgt; + +error_unmap: + for_each_sgtable_dma_sg(sgt, sg, i) { + if (!sg_dma_len(sg)) + continue; + + dma_unmap_resource(dev, sg_dma_address(sg), + sg_dma_len(sg), dir, + DMA_ATTR_SKIP_CPU_SYNC); + } + + sg_free_table(sgt); + +error_free: + kfree(sgt); + return ERR_PTR(rc); +} + +static int hl_dmabuf_attach(struct dma_buf *dmabuf, + struct dma_buf_attachment *attachment) +{ + struct hl_dmabuf_priv *hl_dmabuf; + struct hl_device *hdev; + int rc; + + hl_dmabuf = dmabuf->priv; + hdev = hl_dmabuf->ctx->hdev; + + rc = pci_p2pdma_distance_many(hdev->pdev, &attachment->dev, 1, true); + + if (rc < 0) + attachment->peer2peer = false; + return 0; +} + +static struct sg_table *hl_map_dmabuf(struct dma_buf_attachment *attachment, + enum dma_data_direction dir) +{ + struct dma_buf *dma_buf = attachment->dmabuf; + struct hl_vm_phys_pg_pack *phys_pg_pack; + struct hl_dmabuf_priv *hl_dmabuf; + struct hl_device *hdev; + struct sg_table *sgt; + + hl_dmabuf = dma_buf->priv; + hdev = hl_dmabuf->ctx->hdev; + phys_pg_pack = hl_dmabuf->phys_pg_pack; + + if (!attachment->peer2peer) { + dev_dbg(hdev->dev, "Failed to map dmabuf because p2p is disabled\n"); + return ERR_PTR(-EPERM); + } + + if (phys_pg_pack) + sgt = alloc_sgt_from_device_pages(hdev, + phys_pg_pack->pages, + phys_pg_pack->npages, + phys_pg_pack->page_size, + attachment->dev, + dir); + else + sgt = alloc_sgt_from_device_pages(hdev, + &hl_dmabuf->device_address, + 1, + hl_dmabuf->dmabuf->size, + attachment->dev, + dir); + + if (IS_ERR(sgt)) + dev_err(hdev->dev, "failed (%ld) to initialize sgt for dmabuf\n", PTR_ERR(sgt)); + + return sgt; +} + +static void hl_unmap_dmabuf(struct dma_buf_attachment *attachment, + struct sg_table *sgt, + enum dma_data_direction dir) +{ + struct scatterlist *sg; + int i; + + /* The memory behind the dma-buf has *always* resided on the device itself, i.e. it lives + * only in the 'device' domain (after all, it maps a PCI bar address which points to the + * device memory). + * + * Therefore, it was never in the 'CPU' domain and hence, there is no need to perform + * a sync of the memory to the CPU's cache, as it never resided inside that cache. + */ + for_each_sgtable_dma_sg(sgt, sg, i) + dma_unmap_resource(attachment->dev, sg_dma_address(sg), + sg_dma_len(sg), dir, + DMA_ATTR_SKIP_CPU_SYNC); + + /* Need to restore orig_nents because sg_free_table use that field */ + sgt->orig_nents = sgt->nents; + sg_free_table(sgt); + kfree(sgt); +} + +static void hl_release_dmabuf(struct dma_buf *dmabuf) +{ + struct hl_dmabuf_priv *hl_dmabuf = dmabuf->priv; + struct hl_ctx *ctx = hl_dmabuf->ctx; + struct hl_device *hdev = ctx->hdev; + struct hl_vm *vm = &hdev->vm; + + if (hl_dmabuf->phys_pg_pack) { + spin_lock(&vm->idr_lock); + hl_dmabuf->phys_pg_pack->exporting_cnt--; + spin_unlock(&vm->idr_lock); + } + + hl_ctx_put(hl_dmabuf->ctx); + + kfree(hl_dmabuf); +} + +static const struct dma_buf_ops habanalabs_dmabuf_ops = { + .attach = hl_dmabuf_attach, + .map_dma_buf = hl_map_dmabuf, + .unmap_dma_buf = hl_unmap_dmabuf, + .release = hl_release_dmabuf, +}; + +static int export_dmabuf_common(struct hl_ctx *ctx, + struct hl_dmabuf_priv *hl_dmabuf, + u64 total_size, int flags, int *dmabuf_fd) +{ + DEFINE_DMA_BUF_EXPORT_INFO(exp_info); + struct hl_device *hdev = ctx->hdev; + int rc, fd; + + exp_info.ops = &habanalabs_dmabuf_ops; + exp_info.size = total_size; + exp_info.flags = flags; + exp_info.priv = hl_dmabuf; + + hl_dmabuf->dmabuf = dma_buf_export(&exp_info); + if (IS_ERR(hl_dmabuf->dmabuf)) { + dev_err(hdev->dev, "failed to export dma-buf\n"); + return PTR_ERR(hl_dmabuf->dmabuf); + } + + fd = dma_buf_fd(hl_dmabuf->dmabuf, flags); + if (fd < 0) { + dev_err(hdev->dev, "failed to get a file descriptor for a dma-buf\n"); + rc = fd; + goto err_dma_buf_put; + } + + hl_dmabuf->ctx = ctx; + hl_ctx_get(hdev, hl_dmabuf->ctx); + + *dmabuf_fd = fd; + + return 0; + +err_dma_buf_put: + dma_buf_put(hl_dmabuf->dmabuf); + return rc; +} + +/** + * export_dmabuf_from_addr() - export a dma-buf object for the given memory + * address and size. + * @ctx: pointer to the context structure. + * @device_addr: device memory physical address. + * @size: size of device memory. + * @flags: DMA-BUF file/FD flags. + * @dmabuf_fd: pointer to result FD that represents the dma-buf object. + * + * Create and export a dma-buf object for an existing memory allocation inside + * the device memory, and return a FD which is associated with the dma-buf + * object. + * + * Return: 0 on success, non-zero for failure. + */ +static int export_dmabuf_from_addr(struct hl_ctx *ctx, u64 device_addr, + u64 size, int flags, int *dmabuf_fd) +{ + struct hl_dmabuf_priv *hl_dmabuf; + struct hl_device *hdev = ctx->hdev; + struct asic_fixed_properties *prop; + u64 bar_address; + int rc; + + prop = &hdev->asic_prop; + + if (!IS_ALIGNED(device_addr, PAGE_SIZE)) { + dev_dbg(hdev->dev, + "exported device memory address 0x%llx should be aligned to 0x%lx\n", + device_addr, PAGE_SIZE); + return -EINVAL; + } + + if (size < PAGE_SIZE) { + dev_dbg(hdev->dev, + "exported device memory size %llu should be equal to or greater than %lu\n", + size, PAGE_SIZE); + return -EINVAL; + } + + if (device_addr < prop->dram_user_base_address || + device_addr + size > prop->dram_end_address || + device_addr + size < device_addr) { + dev_dbg(hdev->dev, + "DRAM memory range 0x%llx (+0x%llx) is outside of DRAM boundaries\n", + device_addr, size); + return -EINVAL; + } + + bar_address = hdev->dram_pci_bar_start + + (device_addr - prop->dram_base_address); + + if (bar_address + size > + hdev->dram_pci_bar_start + prop->dram_pci_bar_size || + bar_address + size < bar_address) { + dev_dbg(hdev->dev, + "DRAM memory range 0x%llx (+0x%llx) is outside of PCI BAR boundaries\n", + device_addr, size); + return -EINVAL; + } + + hl_dmabuf = kzalloc(sizeof(*hl_dmabuf), GFP_KERNEL); + if (!hl_dmabuf) + return -ENOMEM; + + hl_dmabuf->device_address = device_addr; + + rc = export_dmabuf_common(ctx, hl_dmabuf, size, flags, dmabuf_fd); + if (rc) + goto err_free_dmabuf_wrapper; + + return 0; + +err_free_dmabuf_wrapper: + kfree(hl_dmabuf); + return rc; +} + +/** + * export_dmabuf_from_handle() - export a dma-buf object for the given memory + * handle. + * @ctx: pointer to the context structure. + * @handle: device memory allocation handle. + * @flags: DMA-BUF file/FD flags. + * @dmabuf_fd: pointer to result FD that represents the dma-buf object. + * + * Create and export a dma-buf object for an existing memory allocation inside + * the device memory, and return a FD which is associated with the dma-buf + * object. + * + * Return: 0 on success, non-zero for failure. + */ +static int export_dmabuf_from_handle(struct hl_ctx *ctx, u64 handle, int flags, + int *dmabuf_fd) +{ + struct hl_vm_phys_pg_pack *phys_pg_pack; + struct hl_dmabuf_priv *hl_dmabuf; + struct hl_device *hdev = ctx->hdev; + struct asic_fixed_properties *prop; + struct hl_vm *vm = &hdev->vm; + u64 bar_address; + int rc, i; + + prop = &hdev->asic_prop; + + if (upper_32_bits(handle)) { + dev_dbg(hdev->dev, "no match for handle 0x%llx\n", handle); + return -EINVAL; + } + + spin_lock(&vm->idr_lock); + + phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, (u32) handle); + if (!phys_pg_pack) { + spin_unlock(&vm->idr_lock); + dev_dbg(hdev->dev, "no match for handle 0x%x\n", (u32) handle); + return -EINVAL; + } + + /* increment now to avoid freeing device memory while exporting */ + phys_pg_pack->exporting_cnt++; + + spin_unlock(&vm->idr_lock); + + if (phys_pg_pack->vm_type != VM_TYPE_PHYS_PACK) { + dev_dbg(hdev->dev, "handle 0x%llx does not represent DRAM memory\n", handle); + rc = -EINVAL; + goto err_dec_exporting_cnt; + } + + for (i = 0 ; i < phys_pg_pack->npages ; i++) { + + bar_address = hdev->dram_pci_bar_start + + (phys_pg_pack->pages[i] - + prop->dram_base_address); + + if (bar_address + phys_pg_pack->page_size > + hdev->dram_pci_bar_start + prop->dram_pci_bar_size || + bar_address + phys_pg_pack->page_size < bar_address) { + + dev_dbg(hdev->dev, + "DRAM memory range 0x%llx (+0x%x) is outside of PCI BAR boundaries\n", + phys_pg_pack->pages[i], + phys_pg_pack->page_size); + + rc = -EINVAL; + goto err_dec_exporting_cnt; + } + } + + hl_dmabuf = kzalloc(sizeof(*hl_dmabuf), GFP_KERNEL); + if (!hl_dmabuf) { + rc = -ENOMEM; + goto err_dec_exporting_cnt; + } + + hl_dmabuf->phys_pg_pack = phys_pg_pack; + + rc = export_dmabuf_common(ctx, hl_dmabuf, phys_pg_pack->total_size, + flags, dmabuf_fd); + if (rc) + goto err_free_dmabuf_wrapper; + + return 0; + +err_free_dmabuf_wrapper: + kfree(hl_dmabuf); + +err_dec_exporting_cnt: + spin_lock(&vm->idr_lock); + phys_pg_pack->exporting_cnt--; + spin_unlock(&vm->idr_lock); + + return rc; +} + static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args) { struct hl_device *hdev = hpriv->hdev; struct hl_ctx *ctx = hpriv->ctx; u64 block_handle, device_addr = 0; u32 handle = 0, block_size; - int rc; + int rc, dmabuf_fd = -EBADF; switch (args->in.op) { case HL_MEM_OP_ALLOC: @@ -1542,6 +2023,16 @@ static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args) args->out.block_size = block_size; break; + case HL_MEM_OP_EXPORT_DMABUF_FD: + rc = export_dmabuf_from_addr(ctx, + args->in.export_dmabuf_fd.handle, + args->in.export_dmabuf_fd.mem_size, + args->in.flags, + &dmabuf_fd); + memset(args, 0, sizeof(*args)); + args->out.fd = dmabuf_fd; + break; + default: dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n"); rc = -ENOTTY; @@ -1560,7 +2051,7 @@ int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data) struct hl_ctx *ctx = hpriv->ctx; u64 block_handle, device_addr = 0; u32 handle = 0, block_size; - int rc; + int rc, dmabuf_fd = -EBADF; if (!hl_device_operational(hdev, &status)) { dev_warn_ratelimited(hdev->dev, @@ -1651,6 +2142,22 @@ int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data) args->out.block_size = block_size; break; + case HL_MEM_OP_EXPORT_DMABUF_FD: + if (hdev->asic_prop.dram_supports_virtual_memory) + rc = export_dmabuf_from_handle(ctx, + args->in.export_dmabuf_fd.handle, + args->in.flags, + &dmabuf_fd); + else + rc = export_dmabuf_from_addr(ctx, + args->in.export_dmabuf_fd.handle, + args->in.export_dmabuf_fd.mem_size, + args->in.flags, + &dmabuf_fd); + memset(args, 0, sizeof(*args)); + args->out.fd = dmabuf_fd; + break; + default: dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n"); rc = -ENOTTY; diff --git a/drivers/misc/habanalabs/common/mmu/mmu.c b/drivers/misc/habanalabs/common/mmu/mmu.c index 792d25b79ea68b5c82b2365f43854f02a613bf8f..aa96917f62e57bc78c3462dcbf0466173009cbc9 100644 --- a/drivers/misc/habanalabs/common/mmu/mmu.c +++ b/drivers/misc/habanalabs/common/mmu/mmu.c @@ -501,23 +501,25 @@ static void hl_mmu_pa_page_with_offset(struct hl_ctx *ctx, u64 virt_addr, if ((hops->range_type == HL_VA_RANGE_TYPE_DRAM) && !is_power_of_2(prop->dram_page_size)) { - unsigned long dram_page_size = prop->dram_page_size; - u64 page_offset_mask; - u64 phys_addr_mask; - u32 bit; + u64 dram_page_size, dram_base, abs_phys_addr, abs_virt_addr, + page_id, page_start; + u32 page_off; /* - * find last set bit in page_size to cover all bits of page - * offset. note that 1 has to be added to bit index. - * note that the internal ulong variable is used to avoid - * alignment issue. + * Bit arithmetics cannot be used for non power of two page + * sizes. In addition, since bit arithmetics is not used, + * we cannot ignore dram base. All that shall be considerd. */ - bit = find_last_bit(&dram_page_size, - sizeof(dram_page_size) * BITS_PER_BYTE) + 1; - page_offset_mask = (BIT_ULL(bit) - 1); - phys_addr_mask = ~page_offset_mask; - *phys_addr = (tmp_phys_addr & phys_addr_mask) | - (virt_addr & page_offset_mask); + + dram_page_size = prop->dram_page_size; + dram_base = prop->dram_base_address; + abs_phys_addr = tmp_phys_addr - dram_base; + abs_virt_addr = virt_addr - dram_base; + page_id = DIV_ROUND_DOWN_ULL(abs_phys_addr, dram_page_size); + page_start = page_id * dram_page_size; + div_u64_rem(abs_virt_addr, dram_page_size, &page_off); + + *phys_addr = page_start + page_off + dram_base; } else { /* * find the correct hop shift field in hl_mmu_properties diff --git a/drivers/misc/habanalabs/common/sysfs.c b/drivers/misc/habanalabs/common/sysfs.c index 34f9f2779962a15a0c62142bc47719a1cb7e3c62..42c1769ad25d9fc971bb754a8c5b62b4f46e5499 100644 --- a/drivers/misc/habanalabs/common/sysfs.c +++ b/drivers/misc/habanalabs/common/sysfs.c @@ -206,12 +206,12 @@ static ssize_t soft_reset_store(struct device *dev, goto out; } - if (!hdev->allow_external_soft_reset) { - dev_err(hdev->dev, "Device does not support soft-reset\n"); + if (!hdev->allow_inference_soft_reset) { + dev_err(hdev->dev, "Device does not support inference soft-reset\n"); goto out; } - dev_warn(hdev->dev, "Soft-Reset requested through sysfs\n"); + dev_warn(hdev->dev, "Inference Soft-Reset requested through sysfs\n"); hl_device_reset(hdev, 0); diff --git a/drivers/misc/habanalabs/gaudi/Makefile b/drivers/misc/habanalabs/gaudi/Makefile index c9f4703cff249df0d836f6e2e492f22a555ca043..10577c33a81685cf0062e72ba014e44bc89804d7 100644 --- a/drivers/misc/habanalabs/gaudi/Makefile +++ b/drivers/misc/habanalabs/gaudi/Makefile @@ -1,3 +1,3 @@ # SPDX-License-Identifier: GPL-2.0-only -HL_GAUDI_FILES := gaudi/gaudi.o gaudi/gaudi_hwmgr.o gaudi/gaudi_security.o \ +HL_GAUDI_FILES := gaudi/gaudi.o gaudi/gaudi_security.o \ gaudi/gaudi_coresight.o diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 14da87b38e83522f7fceec0ef9b1eaf52f0848f0..825737dfe3815f6c38b2e6571fc922a68ae0c011 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -661,6 +661,9 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev) prop->server_type = HL_SERVER_TYPE_UNKNOWN; + prop->clk_pll_index = HL_GAUDI_MME_PLL; + prop->max_freq_value = GAUDI_MAX_CLK_FREQ; + return 0; } @@ -795,6 +798,7 @@ static int gaudi_early_init(struct hl_device *hdev) } prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID); + hdev->dram_pci_bar_start = pci_resource_start(pdev, HBM_BAR_ID); /* If FW security is enabled at this point it means no access to ELBI */ if (hdev->asic_prop.fw_security_enabled) { @@ -1837,8 +1841,6 @@ static int gaudi_sw_init(struct hl_device *hdev) gaudi->cpucp_info_get = gaudi_cpucp_info_get; - gaudi->max_freq_value = GAUDI_MAX_CLK_FREQ; - hdev->asic_specific = gaudi; /* Create DMA pool for small allocations */ @@ -2616,7 +2618,7 @@ static void gaudi_init_e2e(struct hl_device *hdev) static void gaudi_init_hbm_cred(struct hl_device *hdev) { - uint32_t hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd; + u32 hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd; if (hdev->asic_prop.fw_security_enabled) return; @@ -7932,6 +7934,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, { struct gaudi_device *gaudi = hdev->asic_specific; u32 ctl = le32_to_cpu(eq_entry->hdr.ctl); + u32 fw_fatal_err_flag = 0; u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK) >> EQ_CTL_EVENT_TYPE_SHIFT); bool reset_required; @@ -7972,6 +7975,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR: gaudi_print_irq_info(hdev, event_type, true); gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data); + fw_fatal_err_flag = HL_RESET_FW_FATAL_ERR; goto reset_device; case GAUDI_EVENT_GIC500: @@ -7979,6 +7983,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, case GAUDI_EVENT_L2_RAM_ECC: case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17: gaudi_print_irq_info(hdev, event_type, false); + fw_fatal_err_flag = HL_RESET_FW_FATAL_ERR; goto reset_device; case GAUDI_EVENT_HBM0_SPI_0: @@ -7989,6 +7994,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, gaudi_hbm_read_interrupts(hdev, gaudi_hbm_event_to_dev(event_type), &eq_entry->hbm_ecc_data); + fw_fatal_err_flag = HL_RESET_FW_FATAL_ERR; goto reset_device; case GAUDI_EVENT_HBM0_SPI_1: @@ -8171,9 +8177,9 @@ static void gaudi_handle_eqe(struct hl_device *hdev, reset_device: if (hdev->asic_prop.fw_security_enabled) - hl_device_reset(hdev, HL_RESET_HARD | HL_RESET_FW); + hl_device_reset(hdev, HL_RESET_HARD | HL_RESET_FW | fw_fatal_err_flag); else if (hdev->hard_reset_on_fw_events) - hl_device_reset(hdev, HL_RESET_HARD); + hl_device_reset(hdev, HL_RESET_HARD | fw_fatal_err_flag); else hl_fw_unmask_irq(hdev, event_type); } @@ -9439,9 +9445,9 @@ static const struct hl_asic_funcs gaudi_funcs = { .debugfs_read64 = gaudi_debugfs_read64, .debugfs_write64 = gaudi_debugfs_write64, .debugfs_read_dma = gaudi_debugfs_read_dma, - .add_device_attr = gaudi_add_device_attr, + .add_device_attr = hl_add_device_attr, .handle_eqe = gaudi_handle_eqe, - .set_pll_profile = gaudi_set_pll_profile, + .set_pll_profile = hl_set_pll_profile, .get_events_stat = gaudi_get_events_stat, .read_pte = gaudi_read_pte, .write_pte = gaudi_write_pte, @@ -9465,7 +9471,7 @@ static const struct hl_asic_funcs gaudi_funcs = { .halt_coresight = gaudi_halt_coresight, .ctx_init = gaudi_ctx_init, .ctx_fini = gaudi_ctx_fini, - .get_clk_rate = gaudi_get_clk_rate, + .get_clk_rate = hl_get_clk_rate, .get_queue_id_for_cq = gaudi_get_queue_id_for_cq, .load_firmware_to_device = gaudi_load_firmware_to_device, .load_boot_fit_to_device = gaudi_load_boot_fit_to_device, diff --git a/drivers/misc/habanalabs/gaudi/gaudiP.h b/drivers/misc/habanalabs/gaudi/gaudiP.h index bbbf1c343e753f74c8a20f7212c159a41f89c2b6..f325e36a71e615d29229f3ac58f8b17c5df4afd3 100644 --- a/drivers/misc/habanalabs/gaudi/gaudiP.h +++ b/drivers/misc/habanalabs/gaudi/gaudiP.h @@ -319,7 +319,6 @@ struct gaudi_internal_qman_info { * the actual number of internal queues because they are not in * consecutive order. * @hbm_bar_cur_addr: current address of HBM PCI bar. - * @max_freq_value: current max clk frequency. * @events: array that holds all event id's * @events_stat: array that holds histogram of all received events. * @events_stat_aggregate: same as events_stat but doesn't get cleared on reset @@ -345,7 +344,6 @@ struct gaudi_device { struct gaudi_collective_properties collective_props; u64 hbm_bar_cur_addr; - u64 max_freq_value; u32 events[GAUDI_EVENT_SIZE]; u32 events_stat[GAUDI_EVENT_SIZE]; @@ -359,10 +357,8 @@ void gaudi_init_security(struct hl_device *hdev); void gaudi_ack_protection_bits_errors(struct hl_device *hdev); void gaudi_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_attr_grp); -void gaudi_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq); int gaudi_debug_coresight(struct hl_device *hdev, void *data); void gaudi_halt_coresight(struct hl_device *hdev); -int gaudi_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk); void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid); #endif /* GAUDIP_H_ */ diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 031c1849da14d438f66a8edda857910d4d2395f5..5536e8c27bd54cf91bb9819bb6d20e350528cebd 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -471,6 +471,8 @@ int goya_set_fixed_properties(struct hl_device *hdev) prop->server_type = HL_SERVER_TYPE_UNKNOWN; + prop->clk_pll_index = HL_GOYA_MME_PLL; + return 0; } @@ -622,6 +624,7 @@ static int goya_early_init(struct hl_device *hdev) } prop->dram_pci_bar_size = pci_resource_len(pdev, DDR_BAR_ID); + hdev->dram_pci_bar_start = pci_resource_start(pdev, DDR_BAR_ID); /* If FW security is enabled at this point it means no access to ELBI */ if (hdev->asic_prop.fw_security_enabled) { @@ -959,7 +962,7 @@ static int goya_sw_init(struct hl_device *hdev) spin_lock_init(&goya->hw_queues_lock); hdev->supports_coresight = true; hdev->supports_soft_reset = true; - hdev->allow_external_soft_reset = true; + hdev->allow_inference_soft_reset = true; hdev->supports_wait_for_multi_cs = false; hdev->asic_funcs->set_pci_memory_regions(hdev); @@ -4829,6 +4832,12 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry) case GOYA_ASYNC_EVENT_ID_PLL0 ... GOYA_ASYNC_EVENT_ID_PLL6: case GOYA_ASYNC_EVENT_ID_AXI_ECC: case GOYA_ASYNC_EVENT_ID_L2_RAM_ECC: + goya_print_irq_info(hdev, event_type, false); + if (hdev->hard_reset_on_fw_events) + hl_device_reset(hdev, (HL_RESET_HARD | + HL_RESET_FW_FATAL_ERR)); + break; + case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET: goya_print_irq_info(hdev, event_type, false); if (hdev->hard_reset_on_fw_events) @@ -5649,7 +5658,7 @@ static const struct hl_asic_funcs goya_funcs = { .halt_coresight = goya_halt_coresight, .ctx_init = goya_ctx_init, .ctx_fini = goya_ctx_fini, - .get_clk_rate = goya_get_clk_rate, + .get_clk_rate = hl_get_clk_rate, .get_queue_id_for_cq = goya_get_queue_id_for_cq, .load_firmware_to_device = goya_load_firmware_to_device, .load_boot_fit_to_device = goya_load_boot_fit_to_device, diff --git a/drivers/misc/habanalabs/goya/goyaP.h b/drivers/misc/habanalabs/goya/goyaP.h index 0b05da614729d93c738b6e81237c6293e47d2d08..97add7b04f8216b89a25a3de7a6783038c74d990 100644 --- a/drivers/misc/habanalabs/goya/goyaP.h +++ b/drivers/misc/habanalabs/goya/goyaP.h @@ -235,7 +235,6 @@ void goya_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, void *vaddr); void goya_mmu_remove_device_cpu_mappings(struct hl_device *hdev); -int goya_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk); u32 goya_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx); u64 goya_get_device_time(struct hl_device *hdev); diff --git a/drivers/misc/habanalabs/goya/goya_hwmgr.c b/drivers/misc/habanalabs/goya/goya_hwmgr.c index 7d007125727ffef8f1b153c77c1f1778247322ac..59b2624ff81ad9da744c19b270f459c3b41e8cbb 100644 --- a/drivers/misc/habanalabs/goya/goya_hwmgr.c +++ b/drivers/misc/habanalabs/goya/goya_hwmgr.c @@ -32,37 +32,6 @@ void goya_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq) } } -int goya_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk) -{ - long value; - - if (!hl_device_operational(hdev, NULL)) - return -ENODEV; - - value = hl_get_frequency(hdev, HL_GOYA_MME_PLL, false); - - if (value < 0) { - dev_err(hdev->dev, "Failed to retrieve device max clock %ld\n", - value); - return value; - } - - *max_clk = (value / 1000 / 1000); - - value = hl_get_frequency(hdev, HL_GOYA_MME_PLL, true); - - if (value < 0) { - dev_err(hdev->dev, - "Failed to retrieve device current clock %ld\n", - value); - return value; - } - - *cur_clk = (value / 1000 / 1000); - - return 0; -} - static ssize_t mme_clk_show(struct device *dev, struct device_attribute *attr, char *buf) { diff --git a/drivers/misc/habanalabs/include/common/cpucp_if.h b/drivers/misc/habanalabs/include/common/cpucp_if.h index 9ff6a448f0d443c876cec06920b46b9c6b4701ad..ae13231fda943db35b79480df08473c14a245f31 100644 --- a/drivers/misc/habanalabs/include/common/cpucp_if.h +++ b/drivers/misc/habanalabs/include/common/cpucp_if.h @@ -542,11 +542,14 @@ enum cpucp_packet_rc { */ enum cpucp_temp_type { cpucp_temp_input, + cpucp_temp_min = 4, + cpucp_temp_min_hyst, cpucp_temp_max = 6, cpucp_temp_max_hyst, cpucp_temp_crit, cpucp_temp_crit_hyst, cpucp_temp_offset = 19, + cpucp_temp_lowest = 21, cpucp_temp_highest = 22, cpucp_temp_reset_history = 23 }; @@ -555,6 +558,7 @@ enum cpucp_in_attributes { cpucp_in_input, cpucp_in_min, cpucp_in_max, + cpucp_in_lowest = 6, cpucp_in_highest = 7, cpucp_in_reset_history }; @@ -563,6 +567,7 @@ enum cpucp_curr_attributes { cpucp_curr_input, cpucp_curr_min, cpucp_curr_max, + cpucp_curr_lowest = 6, cpucp_curr_highest = 7, cpucp_curr_reset_history }; @@ -598,6 +603,16 @@ enum cpucp_pll_type_attributes { cpucp_pll_pci, }; +/* + * cpucp_power_type aligns with hwmon_power_attributes + * defined in Linux kernel hwmon.h file + */ +enum cpucp_power_type { + CPUCP_POWER_INPUT = 8, + CPUCP_POWER_INPUT_HIGHEST = 9, + CPUCP_POWER_RESET_INPUT_HISTORY = 11 +}; + /* * MSI type enumeration table for all ASICs and future SW versions. * For future ASIC-LKD compatibility, we can only add new enumerations. @@ -731,6 +746,9 @@ struct cpucp_security_info { * @pll_map: Bit map of supported PLLs for current ASIC version. * @mme_binning_mask: MME binning mask, * (0 = functional, 1 = binned) + * @dram_binning_mask: DRAM binning mask, 1 bit per dram instance + * (0 = functional 1 = binned) + * @memory_repair_flag: eFuse flag indicating memory repair */ struct cpucp_info { struct cpucp_sensor sensors[CPUCP_MAX_SENSORS]; @@ -749,7 +767,9 @@ struct cpucp_info { __le64 reserved3; __le64 reserved4; __u8 reserved5; - __u8 pad[7]; + __u8 dram_binning_mask; + __u8 memory_repair_flag; + __u8 pad[5]; struct cpucp_security_info sec_info; __le32 reserved6; __u8 pll_map[PLL_MAP_LEN]; diff --git a/drivers/misc/habanalabs/include/common/hl_boot_if.h b/drivers/misc/habanalabs/include/common/hl_boot_if.h index 3099653234e4a3aa0487ced5100ac80975f8dc20..2626df6ef3ef76b4dfc40f1cb06a1d8066bc31fe 100644 --- a/drivers/misc/habanalabs/include/common/hl_boot_if.h +++ b/drivers/misc/habanalabs/include/common/hl_boot_if.h @@ -15,6 +15,28 @@ #define VERSION_MAX_LEN 128 +enum cpu_boot_err { + CPU_BOOT_ERR_DRAM_INIT_FAIL = 0, + CPU_BOOT_ERR_FIT_CORRUPTED = 1, + CPU_BOOT_ERR_TS_INIT_FAIL = 2, + CPU_BOOT_ERR_DRAM_SKIPPED = 3, + CPU_BOOT_ERR_BMC_WAIT_SKIPPED = 4, + CPU_BOOT_ERR_NIC_DATA_NOT_RDY = 5, + CPU_BOOT_ERR_NIC_FW_FAIL = 6, + CPU_BOOT_ERR_SECURITY_NOT_RDY = 7, + CPU_BOOT_ERR_SECURITY_FAIL = 8, + CPU_BOOT_ERR_EFUSE_FAIL = 9, + CPU_BOOT_ERR_PRI_IMG_VER_FAIL = 10, + CPU_BOOT_ERR_SEC_IMG_VER_FAIL = 11, + CPU_BOOT_ERR_PLL_FAIL = 12, + CPU_BOOT_ERR_DEVICE_UNUSABLE_FAIL = 13, + CPU_BOOT_ERR_BOOT_FW_CRIT_ERR = 18, + CPU_BOOT_ERR_BINNING_FAIL = 19, + CPU_BOOT_ERR_ENABLED = 31, + CPU_BOOT_ERR_SCND_EN = 63, + CPU_BOOT_ERR_LAST = 64 /* we have 2 registers of 32 bits */ +}; + /* * CPU error bits in BOOT_ERROR registers * @@ -78,25 +100,13 @@ * CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL Device is unusable and customer support * should be contacted. * - * CPU_BOOT_ERR0_ARC0_HALT_ACK_NOT_RCVD HALT ACK from ARC0 is not received - * within specified retries after issuing - * HALT request. ARC0 appears to be in bad - * reset. - * - * CPU_BOOT_ERR0_ARC1_HALT_ACK_NOT_RCVD HALT ACK from ARC1 is not received - * within specified retries after issuing - * HALT request. ARC1 appears to be in bad - * reset. + * CPU_BOOT_ERR0_BOOT_FW_CRIT_ERR Critical error was detected during + * the execution of ppboot or preboot. + * for example: stack overflow. * - * CPU_BOOT_ERR0_ARC0_RUN_ACK_NOT_RCVD RUN ACK from ARC0 is not received - * within specified timeout after issuing - * RUN request. ARC0 appears to be in bad - * reset. - * - * CPU_BOOT_ERR0_ARC1_RUN_ACK_NOT_RCVD RUN ACK from ARC1 is not received - * within specified timeout after issuing - * RUN request. ARC1 appears to be in bad - * reset. + * CPU_BOOT_ERR0_BINNING_FAIL Binning settings failed, meaning + * malfunctioning components might still be + * in use. * * CPU_BOOT_ERR0_ENABLED Error registers enabled. * This is a main indication that the @@ -104,26 +114,57 @@ * registers. Meaning the error bits are * not garbage, but actual error statuses. */ -#define CPU_BOOT_ERR0_DRAM_INIT_FAIL (1 << 0) -#define CPU_BOOT_ERR0_FIT_CORRUPTED (1 << 1) -#define CPU_BOOT_ERR0_TS_INIT_FAIL (1 << 2) -#define CPU_BOOT_ERR0_DRAM_SKIPPED (1 << 3) -#define CPU_BOOT_ERR0_BMC_WAIT_SKIPPED (1 << 4) -#define CPU_BOOT_ERR0_NIC_DATA_NOT_RDY (1 << 5) -#define CPU_BOOT_ERR0_NIC_FW_FAIL (1 << 6) -#define CPU_BOOT_ERR0_SECURITY_NOT_RDY (1 << 7) -#define CPU_BOOT_ERR0_SECURITY_FAIL (1 << 8) -#define CPU_BOOT_ERR0_EFUSE_FAIL (1 << 9) -#define CPU_BOOT_ERR0_PRI_IMG_VER_FAIL (1 << 10) -#define CPU_BOOT_ERR0_SEC_IMG_VER_FAIL (1 << 11) -#define CPU_BOOT_ERR0_PLL_FAIL (1 << 12) -#define CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL (1 << 13) -#define CPU_BOOT_ERR0_ARC0_HALT_ACK_NOT_RCVD (1 << 14) -#define CPU_BOOT_ERR0_ARC1_HALT_ACK_NOT_RCVD (1 << 15) -#define CPU_BOOT_ERR0_ARC0_RUN_ACK_NOT_RCVD (1 << 16) -#define CPU_BOOT_ERR0_ARC1_RUN_ACK_NOT_RCVD (1 << 17) -#define CPU_BOOT_ERR0_ENABLED (1 << 31) -#define CPU_BOOT_ERR1_ENABLED (1 << 31) +#define CPU_BOOT_ERR0_DRAM_INIT_FAIL (1 << CPU_BOOT_ERR_DRAM_INIT_FAIL) +#define CPU_BOOT_ERR0_FIT_CORRUPTED (1 << CPU_BOOT_ERR_FIT_CORRUPTED) +#define CPU_BOOT_ERR0_TS_INIT_FAIL (1 << CPU_BOOT_ERR_TS_INIT_FAIL) +#define CPU_BOOT_ERR0_DRAM_SKIPPED (1 << CPU_BOOT_ERR_DRAM_SKIPPED) +#define CPU_BOOT_ERR0_BMC_WAIT_SKIPPED (1 << CPU_BOOT_ERR_BMC_WAIT_SKIPPED) +#define CPU_BOOT_ERR0_NIC_DATA_NOT_RDY (1 << CPU_BOOT_ERR_NIC_DATA_NOT_RDY) +#define CPU_BOOT_ERR0_NIC_FW_FAIL (1 << CPU_BOOT_ERR_NIC_FW_FAIL) +#define CPU_BOOT_ERR0_SECURITY_NOT_RDY (1 << CPU_BOOT_ERR_SECURITY_NOT_RDY) +#define CPU_BOOT_ERR0_SECURITY_FAIL (1 << CPU_BOOT_ERR_SECURITY_FAIL) +#define CPU_BOOT_ERR0_EFUSE_FAIL (1 << CPU_BOOT_ERR_EFUSE_FAIL) +#define CPU_BOOT_ERR0_PRI_IMG_VER_FAIL (1 << CPU_BOOT_ERR_PRI_IMG_VER_FAIL) +#define CPU_BOOT_ERR0_SEC_IMG_VER_FAIL (1 << CPU_BOOT_ERR_SEC_IMG_VER_FAIL) +#define CPU_BOOT_ERR0_PLL_FAIL (1 << CPU_BOOT_ERR_PLL_FAIL) +#define CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL (1 << CPU_BOOT_ERR_DEVICE_UNUSABLE_FAIL) +#define CPU_BOOT_ERR0_BOOT_FW_CRIT_ERR (1 << CPU_BOOT_ERR_BOOT_FW_CRIT_ERR) +#define CPU_BOOT_ERR0_BINNING_FAIL (1 << CPU_BOOT_ERR_BINNING_FAIL) +#define CPU_BOOT_ERR0_ENABLED (1 << CPU_BOOT_ERR_ENABLED) +#define CPU_BOOT_ERR1_ENABLED (1 << CPU_BOOT_ERR_ENABLED) + +enum cpu_boot_dev_sts { + CPU_BOOT_DEV_STS_SECURITY_EN = 0, + CPU_BOOT_DEV_STS_DEBUG_EN = 1, + CPU_BOOT_DEV_STS_WATCHDOG_EN = 2, + CPU_BOOT_DEV_STS_DRAM_INIT_EN = 3, + CPU_BOOT_DEV_STS_BMC_WAIT_EN = 4, + CPU_BOOT_DEV_STS_E2E_CRED_EN = 5, + CPU_BOOT_DEV_STS_HBM_CRED_EN = 6, + CPU_BOOT_DEV_STS_RL_EN = 7, + CPU_BOOT_DEV_STS_SRAM_SCR_EN = 8, + CPU_BOOT_DEV_STS_DRAM_SCR_EN = 9, + CPU_BOOT_DEV_STS_FW_HARD_RST_EN = 10, + CPU_BOOT_DEV_STS_PLL_INFO_EN = 11, + CPU_BOOT_DEV_STS_SP_SRAM_EN = 12, + CPU_BOOT_DEV_STS_CLK_GATE_EN = 13, + CPU_BOOT_DEV_STS_HBM_ECC_EN = 14, + CPU_BOOT_DEV_STS_PKT_PI_ACK_EN = 15, + CPU_BOOT_DEV_STS_FW_LD_COM_EN = 16, + CPU_BOOT_DEV_STS_FW_IATU_CONF_EN = 17, + CPU_BOOT_DEV_STS_FW_NIC_MAC_EN = 18, + CPU_BOOT_DEV_STS_DYN_PLL_EN = 19, + CPU_BOOT_DEV_STS_GIC_PRIVILEGED_EN = 20, + CPU_BOOT_DEV_STS_EQ_INDEX_EN = 21, + CPU_BOOT_DEV_STS_MULTI_IRQ_POLL_EN = 22, + CPU_BOOT_DEV_STS_FW_NIC_STAT_XPCS91_EN = 23, + CPU_BOOT_DEV_STS_FW_NIC_STAT_EXT_EN = 24, + CPU_BOOT_DEV_STS_IS_IDLE_CHECK_EN = 25, + CPU_BOOT_DEV_STS_MAP_HWMON_EN = 26, + CPU_BOOT_DEV_STS_ENABLED = 31, + CPU_BOOT_DEV_STS_SCND_EN = 63, + CPU_BOOT_DEV_STS_LAST = 64 /* we have 2 registers of 32 bits */ +}; /* * BOOT DEVICE STATUS bits in BOOT_DEVICE_STS registers @@ -233,7 +274,7 @@ * was not served before. * Initialized in: linux * - * CPU_BOOT_DEV_STS0_MULTI_IRQ_POLL_EN Use multiple scratchpad interfaces to + * CPU_BOOT_DEV_STS0_MULTI_IRQ_POLL_EN Use multiple scratchpad interfaces to * prevent IRQs overriding each other. * Initialized in: linux * @@ -252,6 +293,11 @@ * where a bit is set if the engine is not idle. * Initialized in: linux * + * CPU_BOOT_DEV_STS0_MAP_HWMON_EN + * If set, means f/w supports proprietary + * HWMON enum mapping to cpucp enums. + * Initialized in: linux + * * CPU_BOOT_DEV_STS0_ENABLED Device status register enabled. * This is a main indication that the * running FW populates the device status @@ -261,34 +307,35 @@ * Initialized in: preboot * */ -#define CPU_BOOT_DEV_STS0_SECURITY_EN (1 << 0) -#define CPU_BOOT_DEV_STS0_DEBUG_EN (1 << 1) -#define CPU_BOOT_DEV_STS0_WATCHDOG_EN (1 << 2) -#define CPU_BOOT_DEV_STS0_DRAM_INIT_EN (1 << 3) -#define CPU_BOOT_DEV_STS0_BMC_WAIT_EN (1 << 4) -#define CPU_BOOT_DEV_STS0_E2E_CRED_EN (1 << 5) -#define CPU_BOOT_DEV_STS0_HBM_CRED_EN (1 << 6) -#define CPU_BOOT_DEV_STS0_RL_EN (1 << 7) -#define CPU_BOOT_DEV_STS0_SRAM_SCR_EN (1 << 8) -#define CPU_BOOT_DEV_STS0_DRAM_SCR_EN (1 << 9) -#define CPU_BOOT_DEV_STS0_FW_HARD_RST_EN (1 << 10) -#define CPU_BOOT_DEV_STS0_PLL_INFO_EN (1 << 11) -#define CPU_BOOT_DEV_STS0_SP_SRAM_EN (1 << 12) -#define CPU_BOOT_DEV_STS0_CLK_GATE_EN (1 << 13) -#define CPU_BOOT_DEV_STS0_HBM_ECC_EN (1 << 14) -#define CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN (1 << 15) -#define CPU_BOOT_DEV_STS0_FW_LD_COM_EN (1 << 16) -#define CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN (1 << 17) -#define CPU_BOOT_DEV_STS0_FW_NIC_MAC_EN (1 << 18) -#define CPU_BOOT_DEV_STS0_DYN_PLL_EN (1 << 19) -#define CPU_BOOT_DEV_STS0_GIC_PRIVILEGED_EN (1 << 20) -#define CPU_BOOT_DEV_STS0_EQ_INDEX_EN (1 << 21) -#define CPU_BOOT_DEV_STS0_MULTI_IRQ_POLL_EN (1 << 22) -#define CPU_BOOT_DEV_STS0_FW_NIC_STAT_XPCS91_EN (1 << 23) -#define CPU_BOOT_DEV_STS0_FW_NIC_STAT_EXT_EN (1 << 24) -#define CPU_BOOT_DEV_STS0_IS_IDLE_CHECK_EN (1 << 25) -#define CPU_BOOT_DEV_STS0_ENABLED (1 << 31) -#define CPU_BOOT_DEV_STS1_ENABLED (1 << 31) +#define CPU_BOOT_DEV_STS0_SECURITY_EN (1 << CPU_BOOT_DEV_STS_SECURITY_EN) +#define CPU_BOOT_DEV_STS0_DEBUG_EN (1 << CPU_BOOT_DEV_STS_DEBUG_EN) +#define CPU_BOOT_DEV_STS0_WATCHDOG_EN (1 << CPU_BOOT_DEV_STS_WATCHDOG_EN) +#define CPU_BOOT_DEV_STS0_DRAM_INIT_EN (1 << CPU_BOOT_DEV_STS_DRAM_INIT_EN) +#define CPU_BOOT_DEV_STS0_BMC_WAIT_EN (1 << CPU_BOOT_DEV_STS_BMC_WAIT_EN) +#define CPU_BOOT_DEV_STS0_E2E_CRED_EN (1 << CPU_BOOT_DEV_STS_E2E_CRED_EN) +#define CPU_BOOT_DEV_STS0_HBM_CRED_EN (1 << CPU_BOOT_DEV_STS_HBM_CRED_EN) +#define CPU_BOOT_DEV_STS0_RL_EN (1 << CPU_BOOT_DEV_STS_RL_EN) +#define CPU_BOOT_DEV_STS0_SRAM_SCR_EN (1 << CPU_BOOT_DEV_STS_SRAM_SCR_EN) +#define CPU_BOOT_DEV_STS0_DRAM_SCR_EN (1 << CPU_BOOT_DEV_STS_DRAM_SCR_EN) +#define CPU_BOOT_DEV_STS0_FW_HARD_RST_EN (1 << CPU_BOOT_DEV_STS_FW_HARD_RST_EN) +#define CPU_BOOT_DEV_STS0_PLL_INFO_EN (1 << CPU_BOOT_DEV_STS_PLL_INFO_EN) +#define CPU_BOOT_DEV_STS0_SP_SRAM_EN (1 << CPU_BOOT_DEV_STS_SP_SRAM_EN) +#define CPU_BOOT_DEV_STS0_CLK_GATE_EN (1 << CPU_BOOT_DEV_STS_CLK_GATE_EN) +#define CPU_BOOT_DEV_STS0_HBM_ECC_EN (1 << CPU_BOOT_DEV_STS_HBM_ECC_EN) +#define CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN (1 << CPU_BOOT_DEV_STS_PKT_PI_ACK_EN) +#define CPU_BOOT_DEV_STS0_FW_LD_COM_EN (1 << CPU_BOOT_DEV_STS_FW_LD_COM_EN) +#define CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN (1 << CPU_BOOT_DEV_STS_FW_IATU_CONF_EN) +#define CPU_BOOT_DEV_STS0_FW_NIC_MAC_EN (1 << CPU_BOOT_DEV_STS_FW_NIC_MAC_EN) +#define CPU_BOOT_DEV_STS0_DYN_PLL_EN (1 << CPU_BOOT_DEV_STS_DYN_PLL_EN) +#define CPU_BOOT_DEV_STS0_GIC_PRIVILEGED_EN (1 << CPU_BOOT_DEV_STS_GIC_PRIVILEGED_EN) +#define CPU_BOOT_DEV_STS0_EQ_INDEX_EN (1 << CPU_BOOT_DEV_STS_EQ_INDEX_EN) +#define CPU_BOOT_DEV_STS0_MULTI_IRQ_POLL_EN (1 << CPU_BOOT_DEV_STS_MULTI_IRQ_POLL_EN) +#define CPU_BOOT_DEV_STS0_FW_NIC_STAT_XPCS91_EN (1 << CPU_BOOT_DEV_STS_FW_NIC_STAT_XPCS91_EN) +#define CPU_BOOT_DEV_STS0_FW_NIC_STAT_EXT_EN (1 << CPU_BOOT_DEV_STS_FW_NIC_STAT_EXT_EN) +#define CPU_BOOT_DEV_STS0_IS_IDLE_CHECK_EN (1 << CPU_BOOT_DEV_STS_IS_IDLE_CHECK_EN) +#define CPU_BOOT_DEV_STS0_MAP_HWMON_EN (1 << CPU_BOOT_DEV_STS_MAP_HWMON_EN) +#define CPU_BOOT_DEV_STS0_ENABLED (1 << CPU_BOOT_DEV_STS_ENABLED) +#define CPU_BOOT_DEV_STS1_ENABLED (1 << CPU_BOOT_DEV_STS_ENABLED) enum cpu_boot_status { CPU_BOOT_STATUS_NA = 0, /* Default value after reset of chip */ @@ -405,6 +452,8 @@ struct cpu_dyn_regs { enum comms_msg_type { HL_COMMS_DESC_TYPE = 0, HL_COMMS_RESET_CAUSE_TYPE = 1, + HL_COMMS_FW_CFG_SKIP_TYPE = 2, + HL_COMMS_BINNING_CONF_TYPE = 3, }; /* TODO: remove this struct after the code is updated to use message */ @@ -464,6 +513,9 @@ struct lkd_fw_comms_msg { struct { __u8 reset_cause; }; + struct { + __u8 fw_cfg_skip; /* 1 - skip, 0 - don't skip */ + }; }; }; @@ -507,8 +559,6 @@ struct lkd_fw_comms_msg { * COMMS_SKIP_BMC Perform actions required for BMC-less servers. * Do not wait for BMC response. * - * COMMS_LOW_PLL_OPP Initialize PLLs for low OPP. - * * COMMS_PREP_DESC_ELBI Same as COMMS_PREP_DESC only that the memory * space is allocated in a ELBI access only * address range. @@ -524,7 +574,6 @@ enum comms_cmd { COMMS_RST_DEV = 6, COMMS_GOTO_WFE = 7, COMMS_SKIP_BMC = 8, - COMMS_LOW_PLL_OPP = 9, COMMS_PREP_DESC_ELBI = 10, COMMS_INVLD_LAST }; diff --git a/drivers/misc/habanalabs/include/gaudi/gaudi_fw_if.h b/drivers/misc/habanalabs/include/gaudi/gaudi_fw_if.h index 34ca4fe50d91ea4270d6babe7f3bb8df620e1d1f..2dba02757d378189a2676346379aeb29a53947d8 100644 --- a/drivers/misc/habanalabs/include/gaudi/gaudi_fw_if.h +++ b/drivers/misc/habanalabs/include/gaudi/gaudi_fw_if.h @@ -8,8 +8,6 @@ #ifndef GAUDI_FW_IF_H #define GAUDI_FW_IF_H -#include - #define GAUDI_EVENT_QUEUE_MSI_IDX 8 #define GAUDI_NIC_PORT1_MSI_IDX 10 #define GAUDI_NIC_PORT3_MSI_IDX 12 @@ -78,13 +76,13 @@ struct gaudi_nic_status { __u32 high_ber_cnt; }; -struct gaudi_flops_2_data { +struct gaudi_cold_rst_data { union { struct { - __u32 spsram_init_done : 1; - __u32 reserved : 31; + u32 spsram_init_done : 1; + u32 reserved : 31; }; - __u32 data; + __le32 data; }; }; diff --git a/drivers/misc/habanalabs/include/gaudi/gaudi_reg_map.h b/drivers/misc/habanalabs/include/gaudi/gaudi_reg_map.h index b9bd5a7f71eb6202bf500b08405b5ef2c15e5344..92f25c2ae083e94f78995d423f4069d98f5d7b78 100644 --- a/drivers/misc/habanalabs/include/gaudi/gaudi_reg_map.h +++ b/drivers/misc/habanalabs/include/gaudi/gaudi_reg_map.h @@ -33,6 +33,7 @@ #define mmRDWR_TEST mmPSOC_GLOBAL_CONF_SCRATCHPAD_30 #define mmBTL_ID mmPSOC_GLOBAL_CONF_SCRATCHPAD_31 #define mmPREBOOT_PCIE_EN mmPSOC_GLOBAL_CONF_COLD_RST_FLOPS_1 +#define mmCOLD_RST_DATA mmPSOC_GLOBAL_CONF_COLD_RST_FLOPS_2 #define mmUPD_PENDING_STS mmPSOC_GLOBAL_CONF_COLD_RST_FLOPS_3 #endif /* GAUDI_REG_MAP_H_ */ diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index d13bb8c1b45058b81bc2ba52f9c07a64f8a2ce3d..00b30959049951c0f0d8d05b6884e293bc085c72 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -272,6 +272,16 @@ enum hl_gaudi_pll_index { HL_GAUDI_PLL_MAX }; +/** + * enum hl_device_status - Device status information. + * @HL_DEVICE_STATUS_OPERATIONAL: Device is operational. + * @HL_DEVICE_STATUS_IN_RESET: Device is currently during reset. + * @HL_DEVICE_STATUS_MALFUNCTION: Device is unusable. + * @HL_DEVICE_STATUS_NEEDS_RESET: Device needs reset because auto reset was disabled. + * @HL_DEVICE_STATUS_IN_DEVICE_CREATION: Device is operational but its creation is still in + * progress. + * @HL_DEVICE_STATUS_LAST: Last status. + */ enum hl_device_status { HL_DEVICE_STATUS_OPERATIONAL, HL_DEVICE_STATUS_IN_RESET, @@ -556,33 +566,30 @@ enum gaudi_dcores { HL_GAUDI_ES_DCORE }; +/** + * struct hl_info_args - Main structure to retrieve device related information. + * @return_pointer: User space address of the relevant structure related to HL_INFO_* operation + * mentioned in @op. + * @return_size: Size of the structure used in @return_pointer, just like "size" in "snprintf", it + * limits how many bytes the kernel can write. For hw_events array, the size should be + * hl_info_hw_ip_info.num_of_events * sizeof(__u32). + * @op: Defines which type of information to be retrieved. Refer HL_INFO_* for details. + * @dcore_id: DCORE id for which the information is relevant (for Gaudi refer to enum gaudi_dcores). + * @ctx_id: Context ID of the user. Currently not in use. + * @period_ms: Period value, in milliseconds, for utilization rate in range 100ms - 1000ms in 100 ms + * resolution. Currently not in use. + * @pll_index: Index as defined in hl__pll_index enumeration. + * @pad: Padding to 64 bit. + */ struct hl_info_args { - /* Location of relevant struct in userspace */ __u64 return_pointer; - /* - * The size of the return value. Just like "size" in "snprintf", - * it limits how many bytes the kernel can write - * - * For hw_events array, the size should be - * hl_info_hw_ip_info.num_of_events * sizeof(__u32) - */ __u32 return_size; - - /* HL_INFO_* */ __u32 op; union { - /* Dcore id for which the information is relevant. - * For Gaudi refer to 'enum gaudi_dcores' - */ __u32 dcore_id; - /* Context ID - Currently not in use */ __u32 ctx_id; - /* Period value for utilization rate (100ms - 1000ms, in 100ms - * resolution. - */ __u32 period_ms; - /* PLL frequency retrieval */ __u32 pll_index; }; @@ -890,11 +897,7 @@ struct hl_wait_cs_in { */ __u64 addr; /* Target value for completion comparison */ - __u32 target; - /* Absolute timeout to wait for interrupt - * in microseconds - */ - __u32 interrupt_timeout_us; + __u64 target; }; }; @@ -910,7 +913,12 @@ struct hl_wait_cs_in { /* Multi CS API info- valid entries in multi-CS array */ __u8 seq_arr_len; - __u8 pad[7]; + __u8 pad[3]; + + /* Absolute timeout to wait for an interrupt in microseconds. + * Relevant only when HL_WAIT_CS_FLAGS_INTERRUPT is set + */ + __u32 interrupt_timeout_us; }; #define HL_WAIT_CS_STATUS_COMPLETED 0 @@ -952,6 +960,10 @@ union hl_wait_cs_args { #define HL_MEM_OP_UNMAP 3 /* Opcode to map a hw block */ #define HL_MEM_OP_MAP_BLOCK 4 +/* Opcode to create DMA-BUF object for an existing device memory allocation + * and to export an FD of that DMA-BUF back to the caller + */ +#define HL_MEM_OP_EXPORT_DMABUF_FD 5 /* Memory flags */ #define HL_MEM_CONTIGUOUS 0x1 @@ -1023,11 +1035,26 @@ struct hl_mem_in { /* Virtual address returned from HL_MEM_OP_MAP */ __u64 device_virt_addr; } unmap; + + /* HL_MEM_OP_EXPORT_DMABUF_FD */ + struct { + /* Handle returned from HL_MEM_OP_ALLOC. In Gaudi, + * where we don't have MMU for the device memory, the + * driver expects a physical address (instead of + * a handle) in the device memory space. + */ + __u64 handle; + /* Size of memory allocation. Relevant only for GAUDI */ + __u64 mem_size; + } export_dmabuf_fd; }; /* HL_MEM_OP_* */ __u32 op; - /* HL_MEM_* flags */ + /* HL_MEM_* flags. + * For the HL_MEM_OP_EXPORT_DMABUF_FD opcode, this field holds the + * DMA-BUF file/FD flags. + */ __u32 flags; /* Context ID - Currently not in use */ __u32 ctx_id; @@ -1064,6 +1091,13 @@ struct hl_mem_out { __u32 pad; }; + + /* Returned in HL_MEM_OP_EXPORT_DMABUF_FD. Represents the + * DMA-BUF object that was created to describe a memory + * allocation on the device's memory space. The FD should be + * passed to the importer driver + */ + __s32 fd; }; };