Merge tag 'misc-habanalabs-next-2021-10-18' of...

Merge tag 'misc-habanalabs-next-2021-10-18' of https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux into char-misc-next Oded writes: This tag contains habanalabs driver changes for v5.16: - Add a new uAPI (under the memory ioctl) to request from the driver to export a DMA-BUF object that represents a memory region on the device's DRAM. This is needed to enable peer-to-peer over PCIe between habana device and an RDMA adapter (e.g. mlnx5 or efa rdma adapter). - Add debugfs node to dynamically configure CS timeout. Up until now, it was only configurable through kernel module parameter. - Fetch more comprehensive power information from the firmware. - Always take timestamp when waiting for user interrupt, as the user needs that information to optimize the graph runtime compilation. - Modify user interrupt to look on 64-bit user value as fence, instead of 32-bit. - Bypass reset in case of repeated h/w error event after device reset. This is to prevent endless loop of resets to the device. - Fix several bugs in multi CS completion code. - Fix race condition in fd close/open. - Update to latest firmware headers - Add select CRC32 in kconfig - Small fixes, cosmetics * tag 'misc-habanalabs-next-2021-10-18' of https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux: (25 commits) habanalabs: refactor fence handling in hl_cs_poll_fences habanalabs: context cleanup cosmetics habanalabs: simplify wait for interrupt with timestamp flow habanalabs: initialize hpriv fields before adding new node habanalabs: Unify frequency set/get functionality habanalabs: select CRC32 habanalabs: add support for dma-buf exporter habanalabs: define uAPI to export FD for DMA-BUF habanalabs: fix NULL pointer dereference habanalabs: fix race condition in multi CS completion habanalabs: use only u32 habanalabs: update firmware files habanalabs: bypass reset for continuous h/w error event habanalabs: take timestamp on wait for interrupt habanalabs: prevent race between fd close/open habanalabs: refactor reset log message habanalabs: define soft-reset as inference op habanalabs: fix debugfs device memory MMU VA translation habanalabs: add support for a long interrupt target value habanalabs: remove redundant cs validity checks ...

Merge tag 'misc-habanalabs-next-2021-10-18' of...
Merge tag 'misc-habanalabs-next-2021-10-18' of https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux into char-misc-next Oded writes: This tag contains habanalabs driver changes for v5.16: - Add a new uAPI (under the memory ioctl) to request from the driver to export a DMA-BUF object that represents a memory region on the device's DRAM. This is needed to enable peer-to-peer over PCIe between habana device and an RDMA adapter (e.g. mlnx5 or efa rdma adapter). - Add debugfs node to dynamically configure CS timeout. Up until now, it was only configurable through kernel module parameter. - Fetch more comprehensive power information from the firmware. - Always take timestamp when waiting for user interrupt, as the user needs that information to optimize the graph runtime compilation. - Modify user interrupt to look on 64-bit user value as fence, instead of 32-bit. - Bypass reset in case of repeated h/w error event after device reset. This is to prevent endless loop of resets to the device. - Fix several bugs in multi CS completion code. - Fix race condition in fd close/open. - Update to latest firmware headers - Add select CRC32 in kconfig - Small fixes, cosmetics * tag 'misc-habanalabs-next-2021-10-18' of https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux: (25 commits) habanalabs: refactor fence handling in hl_cs_poll_fences habanalabs: context cleanup cosmetics habanalabs: simplify wait for interrupt with timestamp flow habanalabs: initialize hpriv fields before adding new node habanalabs: Unify frequency set/get functionality habanalabs: select CRC32 habanalabs: add support for dma-buf exporter habanalabs: define uAPI to export FD for DMA-BUF habanalabs: fix NULL pointer dereference habanalabs: fix race condition in multi CS completion habanalabs: use only u32 habanalabs: update firmware files habanalabs: bypass reset for continuous h/w error event habanalabs: take timestamp on wait for interrupt habanalabs: prevent race between fd close/open habanalabs: refactor reset log message habanalabs: define soft-reset as inference op habanalabs: fix debugfs device memory MMU VA translation habanalabs: add support for a long interrupt target value habanalabs: remove redundant cs validity checks ...
be24dd48 · Greg Kroah-Hartman · 2b74240b · b2faac38 · be24dd48 · be24dd48
27 changed file
--- a/Documentation/ABI/testing/debugfs-driver-habanalabs
+++ b/Documentation/ABI/testing/debugfs-driver-habanalabs
@@ -226,6 +226,12 @@ Description:    Gets the state dump occurring on a CS timeout or failure.
                Writing an integer X discards X state dumps, so that the
                next read would return X+1-st newest state dump.

+What:           /sys/kernel/debug/habanalabs/hl<n>/timeout_locked
+Date:           Sep 2021
+KernelVersion:  5.16
+Contact:        obitton@habana.ai
+Description:    Sets the command submission timeout value in seconds.
+
 What:           /sys/kernel/debug/habanalabs/hl<n>/stop_on_err
 Date:           Mar 2020
 KernelVersion:  5.6

--- a/drivers/misc/habanalabs/Kconfig
+++ b/drivers/misc/habanalabs/Kconfig
@@ -8,6 +8,8 @@ config HABANA_AI
 	depends on PCI && HAS_IOMEM
 	select GENERIC_ALLOCATOR
 	select HWMON
+	select DMA_SHARED_BUFFER
+	select CRC32
 	help
 	  Enables PCIe card driver for Habana's AI Processors (AIP) that are
 	  designed to accelerate Deep Learning inference and training workloads.

--- a/drivers/misc/habanalabs/common/Makefile
+++ b/drivers/misc/habanalabs/common/Makefile
@@ -11,4 +11,4 @@ HL_COMMON_FILES := common/habanalabs_drv.o common/device.o common/context.o \
 		common/command_buffer.o common/hw_queue.o common/irq.o \
 		common/sysfs.o common/hwmon.o common/memory.o \
 		common/command_submission.o common/firmware_if.o \
-		common/state_dump.o
+		common/state_dump.o common/hwmgr.o
--- a/drivers/misc/habanalabs/common/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -143,6 +143,7 @@ static void hl_fence_init(struct hl_fence *fence, u64 sequence)
 	fence->cs_sequence = sequence;
 	fence->error = 0;
 	fence->timestamp = ktime_set(0, 0);
+	fence->mcs_handling_done = false;
 	init_completion(&fence->completion);
 }

@@ -431,11 +432,10 @@ static void cs_handle_tdr(struct hl_device *hdev, struct hl_cs *cs)
 	/* Don't cancel TDR in case this CS was timedout because we might be
 	 * running from the TDR context
 	 */
-	if (cs && (cs->timedout ||
-			hdev->timeout_jiffies == MAX_SCHEDULE_TIMEOUT))
+	if (cs->timedout || hdev->timeout_jiffies == MAX_SCHEDULE_TIMEOUT)
 		return;

-	if (cs && cs->tdr_active)
+	if (cs->tdr_active)
 		cancel_delayed_work_sync(&cs->work_tdr);

 	spin_lock(&hdev->cs_mirror_lock);
@@ -536,10 +536,21 @@ static void complete_multi_cs(struct hl_device *hdev, struct hl_cs *cs)
 				mcs_compl->timestamp =
 						ktime_to_ns(fence->timestamp);
 			complete_all(&mcs_compl->completion);
+
+			/*
+			 * Setting mcs_handling_done inside the lock ensures
+			 * at least one fence have mcs_handling_done set to
+			 * true before wait for mcs finish. This ensures at
+			 * least one CS will be set as completed when polling
+			 * mcs fences.
+			 */
+			fence->mcs_handling_done = true;
 		}

 		spin_unlock(&mcs_compl->lock);
 	}
+	/* In case CS completed without mcs completion initialized */
+	fence->mcs_handling_done = true;
 }

 static inline void cs_release_sob_reset_handler(struct hl_device *hdev,
@@ -2371,32 +2382,48 @@ static int hl_cs_poll_fences(struct multi_cs_data *mcs_data)
 			break;
 		}

-		mcs_data->stream_master_qid_map |= fence->stream_master_qid_map;
-
-		if (status == CS_WAIT_STATUS_BUSY)
-			continue;
-
-		mcs_data->completion_bitmap |= BIT(i);
-
-		/*
-		 * best effort to extract timestamp. few notes:
-		 * - if even single fence is gone we cannot extract timestamp
-		 *   (as fence not exist anymore)
-		 * - for all completed CSs we take the earliest timestamp.
-		 *   for this we have to validate that:
-		 *       1. given timestamp was indeed set
-		 *       2. the timestamp is earliest of all timestamps so far
-		 */
+		switch (status) {
+		case CS_WAIT_STATUS_BUSY:
+			/* CS did not finished, keep waiting on its QID*/
+			mcs_data->stream_master_qid_map |=
+					fence->stream_master_qid_map;
+			break;
+		case CS_WAIT_STATUS_COMPLETED:
+			/*
+			 * Using mcs_handling_done to avoid possibility of mcs_data
+			 * returns to user indicating CS completed before it finished
+			 * all of its mcs handling, to avoid race the next time the
+			 * user waits for mcs.
+			 */
+			if (!fence->mcs_handling_done)
+				break;

-		if (status == CS_WAIT_STATUS_GONE) {
+			mcs_data->completion_bitmap |= BIT(i);
+			/*
+			 * For all completed CSs we take the earliest timestamp.
+			 * For this we have to validate that the timestamp is
+			 * earliest of all timestamps so far.
+			 */
+			if (mcs_data->update_ts &&
+					(ktime_compare(fence->timestamp, first_cs_time) < 0))
+				first_cs_time = fence->timestamp;
+			break;
+		case CS_WAIT_STATUS_GONE:
 			mcs_data->update_ts = false;
 			mcs_data->gone_cs = true;
-		} else if (mcs_data->update_ts &&
-			(ktime_compare(fence->timestamp,
-						ktime_set(0, 0)) > 0) &&
-			(ktime_compare(fence->timestamp, first_cs_time) < 0)) {
-			first_cs_time = fence->timestamp;
+			/*
+			 * It is possible to get an old sequence numbers from user
+			 * which related to already completed CSs and their fences
+			 * already gone. In this case, CS set as completed but
+			 * no need to consider its QID for mcs completion.
+			 */
+			mcs_data->completion_bitmap |= BIT(i);
+			break;
+		default:
+			dev_err(hdev->dev, "Invalid fence status\n");
+			return -EINVAL;
 		}
+
 	}

 	hl_fences_put(mcs_data->fence_arr, arr_len);
@@ -2740,13 +2767,14 @@ static int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)

 static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
 				u32 timeout_us, u64 user_address,
-				u32 target_value, u16 interrupt_offset,
-				enum hl_cs_wait_status *status)
+				u64 target_value, u16 interrupt_offset,
+				enum hl_cs_wait_status *status,
+				u64 *timestamp)
 {
 	struct hl_user_pending_interrupt *pend;
 	struct hl_user_interrupt *interrupt;
 	unsigned long timeout, flags;
-	u32 completion_value;
+	u64 completion_value;
 	long completion_rc;
 	int rc = 0;

@@ -2780,15 +2808,17 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
 	/* We check for completion value as interrupt could have been received
 	 * before we added the node to the wait list
 	 */
-	if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 4)) {
+	if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 8)) {
 		dev_err(hdev->dev, "Failed to copy completion value from user\n");
 		rc = -EFAULT;
 		goto remove_pending_user_interrupt;
 	}

-	if (completion_value >= target_value)
+	if (completion_value >= target_value) {
 		*status = CS_WAIT_STATUS_COMPLETED;
-	else
+		/* There was no interrupt, we assume the completion is now. */
+		pend->fence.timestamp = ktime_get();
+	} else
 		*status = CS_WAIT_STATUS_BUSY;

 	if (!timeout_us || (*status == CS_WAIT_STATUS_COMPLETED))
@@ -2812,7 +2842,7 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
 		reinit_completion(&pend->fence.completion);
 		spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);

-		if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 4)) {
+		if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 8)) {
 			dev_err(hdev->dev, "Failed to copy completion value from user\n");
 			rc = -EFAULT;

@@ -2839,6 +2869,8 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
 	list_del(&pend->wait_list_node);
 	spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);

+	*timestamp = ktime_to_ns(pend->fence.timestamp);
+
 	kfree(pend);
 	hl_ctx_put(ctx);

@@ -2852,6 +2884,7 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data)
 	struct asic_fixed_properties *prop;
 	union hl_wait_cs_args *args = data;
 	enum hl_cs_wait_status status;
+	u64 timestamp;
 	int rc;

 	prop = &hdev->asic_prop;
@@ -2881,7 +2914,8 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data)

 	rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx,
 				args->in.interrupt_timeout_us, args->in.addr,
-				args->in.target, interrupt_offset, &status);
+				args->in.target, interrupt_offset, &status,
+				&timestamp);

 	if (rc) {
 		if (rc != -EINTR)
@@ -2893,6 +2927,11 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data)

 	memset(args, 0, sizeof(*args));

+	if (timestamp) {
+		args->out.timestamp_nsec = timestamp;
+		args->out.flags |= HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD;
+	}
+
 	switch (status) {
 	case CS_WAIT_STATUS_COMPLETED:
 		args->out.status = HL_WAIT_CS_STATUS_COMPLETED;

--- a/drivers/misc/habanalabs/common/context.c
+++ b/drivers/misc/habanalabs/common/context.c
@@ -181,12 +181,6 @@ int hl_ctx_create(struct hl_device *hdev, struct hl_fpriv *hpriv)
 	return rc;
 }

-void hl_ctx_free(struct hl_device *hdev, struct hl_ctx *ctx)
-{
-	if (kref_put(&ctx->refcount, hl_ctx_do_release) == 1)
-		return;
-}
-
 int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
 {
 	int rc = 0;
@@ -392,7 +386,7 @@ void hl_ctx_mgr_fini(struct hl_device *hdev, struct hl_ctx_mgr *mgr)
 	idp = &mgr->ctx_handles;

 	idr_for_each_entry(idp, ctx, id)
-		hl_ctx_free(hdev, ctx);
+		kref_put(&ctx->refcount, hl_ctx_do_release);

 	idr_destroy(&mgr->ctx_handles);
 	mutex_destroy(&mgr->ctx_lock);

--- a/drivers/misc/habanalabs/common/debugfs.c
+++ b/drivers/misc/habanalabs/common/debugfs.c
@@ -1167,6 +1167,45 @@ static ssize_t hl_state_dump_write(struct file *f, const char __user *buf,
 	return count;
 }

+static ssize_t hl_timeout_locked_read(struct file *f, char __user *buf,
+					size_t count, loff_t *ppos)
+{
+	struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
+	struct hl_device *hdev = entry->hdev;
+	char tmp_buf[200];
+	ssize_t rc;
+
+	if (*ppos)
+		return 0;
+
+	sprintf(tmp_buf, "%d\n",
+		jiffies_to_msecs(hdev->timeout_jiffies) / 1000);
+	rc = simple_read_from_buffer(buf, strlen(tmp_buf) + 1, ppos, tmp_buf,
+			strlen(tmp_buf) + 1);
+
+	return rc;
+}
+
+static ssize_t hl_timeout_locked_write(struct file *f, const char __user *buf,
+				     size_t count, loff_t *ppos)
+{
+	struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
+	struct hl_device *hdev = entry->hdev;
+	u32 value;
+	ssize_t rc;
+
+	rc = kstrtouint_from_user(buf, count, 10, &value);
+	if (rc)
+		return rc;
+
+	if (value)
+		hdev->timeout_jiffies = msecs_to_jiffies(value * 1000);
+	else
+		hdev->timeout_jiffies = MAX_SCHEDULE_TIMEOUT;
+
+	return count;
+}
+
 static const struct file_operations hl_data32b_fops = {
 	.owner = THIS_MODULE,
 	.read = hl_data_read32,
@@ -1240,6 +1279,12 @@ static const struct file_operations hl_state_dump_fops = {
 	.write = hl_state_dump_write
 };

+static const struct file_operations hl_timeout_locked_fops = {
+	.owner = THIS_MODULE,
+	.read = hl_timeout_locked_read,
+	.write = hl_timeout_locked_write
+};
+
 static const struct hl_info_list hl_debugfs_list[] = {
 	{"command_buffers", command_buffers_show, NULL},
 	{"command_submission", command_submission_show, NULL},
@@ -1421,6 +1466,12 @@ void hl_debugfs_add_device(struct hl_device *hdev)
 				dev_entry,
 				&hl_state_dump_fops);

+	debugfs_create_file("timeout_locked",
+				0644,
+				dev_entry->root,
+				dev_entry,
+				&hl_timeout_locked_fops);
+
 	for (i = 0, entry = dev_entry->entry_arr ; i < count ; i++, entry++) {
 		debugfs_create_file(hl_debugfs_list[i].name,
 					0444,

--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -69,13 +69,6 @@ static void hpriv_release(struct kref *ref)

 	mutex_destroy(&hpriv->restore_phase_mutex);

-	mutex_lock(&hdev->fpriv_list_lock);
-	list_del(&hpriv->dev_node);
-	hdev->compute_ctx = NULL;
-	mutex_unlock(&hdev->fpriv_list_lock);
-
-	kfree(hpriv);
-
 	if ((!hdev->pldm) && (hdev->pdev) &&
 			(!hdev->asic_funcs->is_device_idle(hdev,
 				idle_mask,
@@ -87,9 +80,32 @@ static void hpriv_release(struct kref *ref)
 		device_is_idle = false;
 	}

+	/* We need to remove the user from the list to make sure the reset process won't
+	 * try to kill the user process. Because, if we got here, it means there are no
+	 * more driver/device resources that the user process is occupying so there is
+	 * no need to kill it
+	 *
+	 * However, we can't set the compute_ctx to NULL at this stage. This is to prevent
+	 * a race between the release and opening the device again. We don't want to let
+	 * a user open the device while there a reset is about to happen.
+	 */
+	mutex_lock(&hdev->fpriv_list_lock);
+	list_del(&hpriv->dev_node);
+	mutex_unlock(&hdev->fpriv_list_lock);
+
 	if ((hdev->reset_if_device_not_idle && !device_is_idle)
 			|| hdev->reset_upon_device_release)
 		hl_device_reset(hdev, HL_RESET_DEVICE_RELEASE);
+
+	/* Now we can mark the compute_ctx as empty. Even if a reset is running in a different
+	 * thread, we don't care because the in_reset is marked so if a user will try to open
+	 * the device it will fail on that, even if compute_ctx is NULL.
+	 */
+	mutex_lock(&hdev->fpriv_list_lock);
+	hdev->compute_ctx = NULL;
+	mutex_unlock(&hdev->fpriv_list_lock);
+
+	kfree(hpriv);
 }

 void hl_hpriv_get(struct hl_fpriv *hpriv)
@@ -530,6 +546,19 @@ static void hl_device_heartbeat(struct work_struct *work)
 	return;

 reschedule:
+	/*
+	 * prev_reset_trigger tracks consecutive fatal h/w errors until first
+	 * heartbeat immediately post reset.
+	 * If control reached here, then at least one heartbeat work has been
+	 * scheduled since last reset/init cycle.
+	 * So if the device is not already in reset cycle, reset the flag
+	 * prev_reset_trigger as no reset occurred with HL_RESET_FW_FATAL_ERR
+	 * status for at least one heartbeat. From this point driver restarts
+	 * tracking future consecutive fatal errors.
+	 */
+	if (!(atomic_read(&hdev->in_reset)))
+		hdev->prev_reset_trigger = HL_RESET_TRIGGER_DEFAULT;
+
 	schedule_delayed_work(&hdev->work_heartbeat,
 			usecs_to_jiffies(HL_HEARTBEAT_PER_USEC));
 }
@@ -909,6 +938,65 @@ static void device_disable_open_processes(struct hl_device *hdev)
 	mutex_unlock(&hdev->fpriv_list_lock);
 }

+static void handle_reset_trigger(struct hl_device *hdev, u32 flags)
+{
+	u32 cur_reset_trigger = HL_RESET_TRIGGER_DEFAULT;
+
+	/*
+	 * 'reset cause' is being updated here, because getting here
+	 * means that it's the 1st time and the last time we're here
+	 * ('in_reset' makes sure of it). This makes sure that
+	 * 'reset_cause' will continue holding its 1st recorded reason!
+	 */
+	if (flags & HL_RESET_HEARTBEAT) {
+		hdev->curr_reset_cause = HL_RESET_CAUSE_HEARTBEAT;
+		cur_reset_trigger = HL_RESET_HEARTBEAT;
+	} else if (flags & HL_RESET_TDR) {
+		hdev->curr_reset_cause = HL_RESET_CAUSE_TDR;
+		cur_reset_trigger = HL_RESET_TDR;
+	} else if (flags & HL_RESET_FW_FATAL_ERR) {
+		hdev->curr_reset_cause = HL_RESET_CAUSE_UNKNOWN;
+		cur_reset_trigger = HL_RESET_FW_FATAL_ERR;
+	} else {
+		hdev->curr_reset_cause = HL_RESET_CAUSE_UNKNOWN;
+	}
+
+	/*
+	 * If reset cause is same twice, then reset_trigger_repeated
+	 * is set and if this reset is due to a fatal FW error
+	 * device is set to an unstable state.
+	 */
+	if (hdev->prev_reset_trigger != cur_reset_trigger) {
+		hdev->prev_reset_trigger = cur_reset_trigger;
+		hdev->reset_trigger_repeated = 0;
+	} else {
+		hdev->reset_trigger_repeated = 1;
+	}
+
+	/* If reset is due to heartbeat, device CPU is no responsive in
+	 * which case no point sending PCI disable message to it.
+	 *
+	 * If F/W is performing the reset, no need to send it a message to disable
+	 * PCI access
+	 */
+	if ((flags & HL_RESET_HARD) &&
+			!(flags & (HL_RESET_HEARTBEAT | HL_RESET_FW))) {
+		/* Disable PCI access from device F/W so he won't send
+		 * us additional interrupts. We disable MSI/MSI-X at
+		 * the halt_engines function and we can't have the F/W
+		 * sending us interrupts after that. We need to disable
+		 * the access here because if the device is marked
+		 * disable, the message won't be send. Also, in case
+		 * of heartbeat, the device CPU is marked as disable
+		 * so this message won't be sent
+		 */
+		if (hl_fw_send_pci_access_msg(hdev,
+				CPUCP_PACKET_DISABLE_PCI_ACCESS))
+			dev_warn(hdev->dev,
+				"Failed to disable PCI access by F/W\n");
+	}
+}
+
 /*
 * hl_device_reset - reset the device
 *
@@ -954,7 +1042,7 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
 		goto do_reset;
 	}

-	if (!hard_reset && !hdev->allow_external_soft_reset) {
+	if (!hard_reset && !hdev->allow_inference_soft_reset) {
 		hard_instead_soft = true;
 		hard_reset = true;
 	}
@@ -978,47 +1066,21 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
 		if (rc)
 			return 0;

-		/*
-		 * 'reset cause' is being updated here, because getting here
-		 * means that it's the 1st time and the last time we're here
-		 * ('in_reset' makes sure of it). This makes sure that
-		 * 'reset_cause' will continue holding its 1st recorded reason!
-		 */
-		if (flags & HL_RESET_HEARTBEAT)
-			hdev->curr_reset_cause = HL_RESET_CAUSE_HEARTBEAT;
-		else if (flags & HL_RESET_TDR)
-			hdev->curr_reset_cause = HL_RESET_CAUSE_TDR;
-		else
-			hdev->curr_reset_cause = HL_RESET_CAUSE_UNKNOWN;
-
-		/* If reset is due to heartbeat, device CPU is no responsive in
-		 * which case no point sending PCI disable message to it.
-		 *
-		 * If F/W is performing the reset, no need to send it a message to disable
-		 * PCI access
-		 */
-		if (hard_reset && !(flags & (HL_RESET_HEARTBEAT | HL_RESET_FW))) {
-			/* Disable PCI access from device F/W so he won't send
-			 * us additional interrupts. We disable MSI/MSI-X at
-			 * the halt_engines function and we can't have the F/W
-			 * sending us interrupts after that. We need to disable
-			 * the access here because if the device is marked
-			 * disable, the message won't be send. Also, in case
-			 * of heartbeat, the device CPU is marked as disable
-			 * so this message won't be sent
-			 */
-			if (hl_fw_send_pci_access_msg(hdev,
-					CPUCP_PACKET_DISABLE_PCI_ACCESS))
-				dev_warn(hdev->dev,
-					"Failed to disable PCI access by F/W\n");
-		}
+		handle_reset_trigger(hdev, flags);

 		/* This also blocks future CS/VM/JOB completion operations */
 		hdev->disabled = true;

 		take_release_locks(hdev);

-		dev_err(hdev->dev, "Going to RESET device!\n");
+		if (hard_reset)
+			dev_info(hdev->dev, "Going to reset device\n");
+		else if (flags & HL_RESET_DEVICE_RELEASE)
+			dev_info(hdev->dev,
+				"Going to reset device after it was released by user\n");
+		else
+			dev_info(hdev->dev,
+				"Going to reset compute engines of inference device\n");
 	}

 again:
@@ -1108,6 +1170,17 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
 		hdev->device_cpu_disabled = false;
 		hdev->hard_reset_pending = false;

+		if (hdev->reset_trigger_repeated &&
+				(hdev->prev_reset_trigger == HL_RESET_FW_FATAL_ERR)) {
+			/* if there 2 back to back resets from FW,
+			 * ensure driver puts the driver in a unusable state
+			 */
+			dev_crit(hdev->dev,
+				"Consecutive FW fatal errors received, stopping hard reset\n");
+			rc = -EIO;
+			goto out_err;
+		}
+
 		if (hdev->kernel_ctx) {
 			dev_crit(hdev->dev,
 				"kernel ctx was alive during hard reset, something is terribly wrong\n");

--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -2162,18 +2162,17 @@ static void hl_fw_linux_update_state(struct hl_device *hdev,
 }

 /**
- * hl_fw_dynamic_report_reset_cause - send a COMMS message with the cause
- *                                    of the newly triggered hard reset
+ * hl_fw_dynamic_send_msg - send a COMMS message with attached data
 *
 * @hdev: pointer to the habanalabs device structure
 * @fw_loader: managing structure for loading device's FW
- * @reset_cause: enumerated cause for the recent hard reset
+ * @msg_type: message type
+ * @data: data to be sent
 *
 * @return 0 on success, otherwise non-zero error code
 */
-static int hl_fw_dynamic_report_reset_cause(struct hl_device *hdev,
-		struct fw_load_mgr *fw_loader,
-		enum comms_reset_cause reset_cause)
+static int hl_fw_dynamic_send_msg(struct hl_device *hdev,
+		struct fw_load_mgr *fw_loader, u8 msg_type, void *data)
 {
 	struct lkd_msg_comms msg;
 	int rc;
@@ -2181,11 +2180,20 @@ static int hl_fw_dynamic_report_reset_cause(struct hl_device *hdev,
 	memset(&msg, 0, sizeof(msg));

 	/* create message to be sent */
-	msg.header.type = HL_COMMS_RESET_CAUSE_TYPE;
+	msg.header.type = msg_type;
 	msg.header.size = cpu_to_le16(sizeof(struct comms_msg_header));
 	msg.header.magic = cpu_to_le32(HL_COMMS_MSG_MAGIC);

-	msg.reset_cause = reset_cause;
+	switch (msg_type) {
+	case HL_COMMS_RESET_CAUSE_TYPE:
+		msg.reset_cause = *(__u8 *) data;
+		break;
+	default:
+		dev_err(hdev->dev,
+			"Send COMMS message - invalid message type %u\n",
+			msg_type);
+		return -EINVAL;
+	}

 	rc = hl_fw_dynamic_request_descriptor(hdev, fw_loader,
 			sizeof(struct lkd_msg_comms));
@@ -2252,8 +2260,8 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev,
 		goto protocol_err;

 	if (hdev->curr_reset_cause) {
-		rc = hl_fw_dynamic_report_reset_cause(hdev, fw_loader,
-				hdev->curr_reset_cause);
+		rc = hl_fw_dynamic_send_msg(hdev, fw_loader,
+				HL_COMMS_RESET_CAUSE_TYPE, &hdev->curr_reset_cause);
 		if (rc)
 			goto protocol_err;


--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -26,6 +26,7 @@
 #include <linux/sched/signal.h>
 #include <linux/io-64-nonatomic-lo-hi.h>
 #include <linux/coresight.h>
+#include <linux/dma-buf.h>

 #define HL_NAME				"habanalabs"

@@ -68,6 +69,9 @@

 #define HL_STATE_DUMP_HIST_LEN		5

+/* Default value for device reset trigger , an invalid value */
+#define HL_RESET_TRIGGER_DEFAULT	0xFF
+
 #define OBJ_NAMES_HASH_TABLE_BITS	7 /* 1 << 7 buckets */
 #define SYNC_TO_ENGINE_HASH_TABLE_BITS	7 /* 1 << 7 buckets */

@@ -132,13 +136,18 @@ enum hl_mmu_page_table_location {
 * - HL_RESET_FW
 *       F/W will perform the reset. No need to ask it to reset the device. This is relevant
 *       only when running with secured f/w
+ *
+ * - HL_RESET_FW_FATAL_ERR
+ *       Set if reset is due to a fatal error from FW
 */
+
 #define HL_RESET_HARD			(1 << 0)
 #define HL_RESET_FROM_RESET_THREAD	(1 << 1)
 #define HL_RESET_HEARTBEAT		(1 << 2)
 #define HL_RESET_TDR			(1 << 3)
 #define HL_RESET_DEVICE_RELEASE		(1 << 4)
 #define HL_RESET_FW			(1 << 5)
+#define HL_RESET_FW_FATAL_ERR		(1 << 6)

 #define HL_MAX_SOBS_PER_MONITOR	8

@@ -447,6 +456,9 @@ struct hl_hints_range {
 *                  for hints validity check.
 * device_dma_offset_for_host_access: the offset to add to host DMA addresses
 *                                    to enable the device to access them.
+ * @max_freq_value: current max clk frequency.
+ * @clk_pll_index: clock PLL index that specify which PLL determines the clock
+ *                 we display to the user
 * @mmu_pgt_size: MMU page tables total size.
 * @mmu_pte_size: PTE size in MMU page tables.
 * @mmu_hop_table_size: MMU hop table size.
@@ -543,6 +555,8 @@ struct asic_fixed_properties {
 	u64				cb_va_end_addr;
 	u64				dram_hints_align_mask;
 	u64				device_dma_offset_for_host_access;
+	u64				max_freq_value;
+	u32				clk_pll_index;
 	u32				mmu_pgt_size;
 	u32				mmu_pte_size;
 	u32				mmu_hop_table_size;
@@ -601,6 +615,9 @@ struct asic_fixed_properties {
 *                         masters QIDs that multi cs is waiting on
 * @error: mark this fence with error
 * @timestamp: timestamp upon completion
+ * @mcs_handling_done: indicates that corresponding command submission has
+ *                     finished msc handling, this does not mean it was part
+ *                     of the mcs
 */
 struct hl_fence {
 	struct completion	completion;
@@ -609,6 +626,7 @@ struct hl_fence {
 	u32			stream_master_qid_map;
 	int			error;
 	ktime_t			timestamp;
+	u8			mcs_handling_done;
 };

 /**
@@ -1352,6 +1370,23 @@ struct hl_cs_counters_atomic {
 	atomic64_t validation_drop_cnt;
 };

+/**
+ * struct hl_dmabuf_priv - a dma-buf private object.
+ * @dmabuf: pointer to dma-buf object.
+ * @ctx: pointer to the dma-buf owner's context.
+ * @phys_pg_pack: pointer to physical page pack if the dma-buf was exported for
+ *                memory allocation handle.
+ * @device_address: physical address of the device's memory. Relevant only
+ *                  if phys_pg_pack is NULL (dma-buf was exported from address).
+ *                  The total size can be taken from the dmabuf object.
+ */
+struct hl_dmabuf_priv {
+	struct dma_buf			*dmabuf;
+	struct hl_ctx			*ctx;
+	struct hl_vm_phys_pg_pack	*phys_pg_pack;
+	uint64_t			device_address;
+};
+
 /**
 * struct hl_ctx - user/kernel context.
 * @mem_hash: holds mapping from virtual address to virtual memory area
@@ -1662,6 +1697,7 @@ struct hl_vm_hw_block_list_node {
 * @npages: num physical pages in the pack.
 * @total_size: total size of all the pages in this list.
 * @mapping_cnt: number of shared mappings.
+ * @exporting_cnt: number of dma-buf exporting.
 * @asid: the context related to this list.
 * @page_size: size of each page in the pack.
 * @flags: HL_MEM_* flags related to this list.
@@ -1676,6 +1712,7 @@ struct hl_vm_phys_pg_pack {
 	u64			npages;
 	u64			total_size;
 	atomic_t		mapping_cnt;
+	u32			exporting_cnt;
 	u32			asid;
 	u32			page_size;
 	u32			flags;
@@ -2396,6 +2433,7 @@ struct multi_cs_data {
 *                          the error will be ignored by the driver during
 *                          device initialization. Mainly used to debug and
 *                          workaround firmware bugs
+ * @dram_pci_bar_start: start bus address of PCIe bar towards DRAM.
 * @last_successful_open_jif: timestamp (jiffies) of the last successful
 *                            device open.
 * @last_open_session_duration_jif: duration (jiffies) of the last device open
@@ -2440,8 +2478,12 @@ struct multi_cs_data {
 * @collective_mon_idx: helper index for collective initialization
 * @supports_coresight: is CoreSight supported.
 * @supports_soft_reset: is soft reset supported.
- * @allow_external_soft_reset: true if soft reset initiated by user or TDR is
- *                             allowed.
+ * @allow_inference_soft_reset: true if the ASIC supports soft reset that is
+ *                              initiated by user or TDR. This is only true
+ *                              in inference ASICs, as there is no real-world
+ *                              use-case of doing soft-reset in training (due
+ *                              to the fact that training runs on multiple
+ *                              devices)
 * @supports_cb_mapping: is mapping a CB to the device's MMU supported.
 * @needs_reset: true if reset_on_lockup is false and device should be reset
 *               due to lockup.
@@ -2452,6 +2494,10 @@ struct multi_cs_data {
 * @supports_staged_submission: true if staged submissions are supported
 * @curr_reset_cause: saves an enumerated reset cause when a hard reset is
 *                    triggered, and cleared after it is shared with preboot.
+ * @prev_reset_trigger: saves the previous trigger which caused a reset, overidden
+ *                      with a new value on next reset
+ * @reset_trigger_repeated: set if device reset is triggered more than once with
+ *                          same cause.
 * @skip_reset_on_timeout: Skip device reset if CS has timed out, wait for it to
 *                         complete instead.
 * @device_cpu_is_halted: Flag to indicate whether the device CPU was already
@@ -2537,6 +2583,7 @@ struct hl_device {
 	u64				max_power;
 	u64				clock_gating_mask;
 	u64				boot_error_status_mask;
+	u64				dram_pci_bar_start;
 	u64				last_successful_open_jif;
 	u64				last_open_session_duration_jif;
 	u64				open_counter;
@@ -2572,13 +2619,15 @@ struct hl_device {
 	u8				collective_mon_idx;
 	u8				supports_coresight;
 	u8				supports_soft_reset;
-	u8				allow_external_soft_reset;
+	u8				allow_inference_soft_reset;
 	u8				supports_cb_mapping;
 	u8				needs_reset;
 	u8				process_kill_trial_cnt;
 	u8				device_fini_pending;
 	u8				supports_staged_submission;
 	u8				curr_reset_cause;
+	u8				prev_reset_trigger;
+	u8				reset_trigger_repeated;
 	u8				skip_reset_on_timeout;
 	u8				device_cpu_is_halted;
 	u8				supports_wait_for_multi_cs;
@@ -2956,6 +3005,15 @@ int hl_set_voltage(struct hl_device *hdev,
 			int sensor_index, u32 attr, long value);
 int hl_set_current(struct hl_device *hdev,
 			int sensor_index, u32 attr, long value);
+int hl_set_power(struct hl_device *hdev,
+			int sensor_index, u32 attr, long value);
+int hl_get_power(struct hl_device *hdev,
+			int sensor_index, u32 attr, long *value);
+int hl_get_clk_rate(struct hl_device *hdev,
+			u32 *cur_clk, u32 *max_clk);
+void hl_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq);
+void hl_add_device_attr(struct hl_device *hdev,
+			struct attribute_group *dev_attr_grp);
 void hw_sob_get(struct hl_hw_sob *hw_sob);
 void hw_sob_put(struct hl_hw_sob *hw_sob);
 void hl_encaps_handle_do_release(struct kref *ref);

--- a/drivers/misc/habanalabs/common/habanalabs_drv.c
+++ b/drivers/misc/habanalabs/common/habanalabs_drv.c
@@ -225,6 +225,17 @@ int hl_device_open_ctrl(struct inode *inode, struct file *filp)
 	if (!hpriv)
 		return -ENOMEM;

+	/* Prevent other routines from reading partial hpriv data by
+	 * initializing hpriv fields before inserting it to the list
+	 */
+	hpriv->hdev = hdev;
+	filp->private_data = hpriv;
+	hpriv->filp = filp;
+	hpriv->is_control = true;
+	nonseekable_open(inode, filp);
+
+	hpriv->taskpid = find_get_pid(current->pid);
+
 	mutex_lock(&hdev->fpriv_list_lock);

 	if (!hl_device_operational(hdev, NULL)) {
@@ -238,19 +249,15 @@ int hl_device_open_ctrl(struct inode *inode, struct file *filp)
 	list_add(&hpriv->dev_node, &hdev->fpriv_list);
 	mutex_unlock(&hdev->fpriv_list_lock);

-	hpriv->hdev = hdev;
-	filp->private_data = hpriv;
-	hpriv->filp = filp;
-	hpriv->is_control = true;
-	nonseekable_open(inode, filp);
-
-	hpriv->taskpid = find_get_pid(current->pid);
-
 	return 0;

 out_err:
 	mutex_unlock(&hdev->fpriv_list_lock);
+	filp->private_data = NULL;
+	put_pid(hpriv->taskpid);
+
 	kfree(hpriv);
+
 	return rc;
 }

@@ -339,6 +346,7 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
 	set_driver_behavior_per_device(hdev);

 	hdev->curr_reset_cause = HL_RESET_CAUSE_UNKNOWN;
+	hdev->prev_reset_trigger = HL_RESET_TRIGGER_DEFAULT;

 	if (timeout_locked)
 		hdev->timeout_jiffies = msecs_to_jiffies(timeout_locked * 1000);

--- a/drivers/misc/habanalabs/gaudi/gaudi_hwmgr.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi_hwmgr.c
 // SPDX-License-Identifier: GPL-2.0

 /*
- * Copyright 2016-2018 HabanaLabs, Ltd.
+ * Copyright 2019-2021 HabanaLabs, Ltd.
 * All Rights Reserved.
 */

-#include "gaudiP.h"
-#include "../include/gaudi/gaudi_fw_if.h"
+#include "habanalabs.h"

-void gaudi_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq)
+void hl_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq)
 {
-	struct gaudi_device *gaudi = hdev->asic_specific;
-
-	if (freq == PLL_LAST)
-		hl_set_frequency(hdev, HL_GAUDI_MME_PLL, gaudi->max_freq_value);
+	hl_set_frequency(hdev, hdev->asic_prop.clk_pll_index,
+			hdev->asic_prop.max_freq_value);
 }

-int gaudi_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk)
+int hl_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk)
 {
 	long value;

 	if (!hl_device_operational(hdev, NULL))
 		return -ENODEV;

-	value = hl_get_frequency(hdev, HL_GAUDI_MME_PLL, false);
+	value = hl_get_frequency(hdev, hdev->asic_prop.clk_pll_index, false);

 	if (value < 0) {
 		dev_err(hdev->dev, "Failed to retrieve device max clock %ld\n",
@@ -33,7 +30,7 @@ int gaudi_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk)

 	*max_clk = (value / 1000 / 1000);

-	value = hl_get_frequency(hdev, HL_GAUDI_MME_PLL, true);
+	value = hl_get_frequency(hdev, hdev->asic_prop.clk_pll_index, true);

 	if (value < 0) {
 		dev_err(hdev->dev,
@@ -51,15 +48,14 @@ static ssize_t clk_max_freq_mhz_show(struct device *dev,
 		struct device_attribute *attr, char *buf)
 {
 	struct hl_device *hdev = dev_get_drvdata(dev);
-	struct gaudi_device *gaudi = hdev->asic_specific;
 	long value;

 	if (!hl_device_operational(hdev, NULL))
 		return -ENODEV;

-	value = hl_get_frequency(hdev, HL_GAUDI_MME_PLL, false);
+	value = hl_get_frequency(hdev, hdev->asic_prop.clk_pll_index, false);

-	gaudi->max_freq_value = value;
+	hdev->asic_prop.max_freq_value = value;

 	return sprintf(buf, "%lu\n", (value / 1000 / 1000));
 }
@@ -68,7 +64,6 @@ static ssize_t clk_max_freq_mhz_store(struct device *dev,
 		struct device_attribute *attr, const char *buf, size_t count)
 {
 	struct hl_device *hdev = dev_get_drvdata(dev);
-	struct gaudi_device *gaudi = hdev->asic_specific;
 	int rc;
 	u64 value;

@@ -83,9 +78,10 @@ static ssize_t clk_max_freq_mhz_store(struct device *dev,
 		goto fail;
 	}

-	gaudi->max_freq_value = value * 1000 * 1000;
+	hdev->asic_prop.max_freq_value = value * 1000 * 1000;

-	hl_set_frequency(hdev, HL_GAUDI_MME_PLL, gaudi->max_freq_value);
+	hl_set_frequency(hdev, hdev->asic_prop.clk_pll_index,
+			hdev->asic_prop.max_freq_value);

 fail:
 	return count;
@@ -100,7 +96,7 @@ static ssize_t clk_cur_freq_mhz_show(struct device *dev,
 	if (!hl_device_operational(hdev, NULL))
 		return -ENODEV;

-	value = hl_get_frequency(hdev, HL_GAUDI_MME_PLL, true);
+	value = hl_get_frequency(hdev, hdev->asic_prop.clk_pll_index, true);

 	return sprintf(buf, "%lu\n", (value / 1000 / 1000));
 }
@@ -108,14 +104,14 @@ static ssize_t clk_cur_freq_mhz_show(struct device *dev,
 static DEVICE_ATTR_RW(clk_max_freq_mhz);
 static DEVICE_ATTR_RO(clk_cur_freq_mhz);

-static struct attribute *gaudi_dev_attrs[] = {
+static struct attribute *hl_dev_attrs[] = {
 	&dev_attr_clk_max_freq_mhz.attr,
 	&dev_attr_clk_cur_freq_mhz.attr,
 	NULL,
 };

-void gaudi_add_device_attr(struct hl_device *hdev,
+void hl_add_device_attr(struct hl_device *hdev,
 			struct attribute_group *dev_attr_grp)
 {
-	dev_attr_grp->attrs = gaudi_dev_attrs;
+	dev_attr_grp->attrs = hl_dev_attrs;
 }
--- a/drivers/misc/habanalabs/common/hwmon.c
+++ b/drivers/misc/habanalabs/common/hwmon.c
@@ -113,6 +113,9 @@ static int hl_read(struct device *dev, enum hwmon_sensor_types type,
 {
 	struct hl_device *hdev = dev_get_drvdata(dev);
 	int rc;
+	u32 cpucp_attr;
+	bool use_cpucp_enum = (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
+				CPU_BOOT_DEV_STS0_MAP_HWMON_EN) ? true : false;

 	if (!hl_device_operational(hdev, NULL))
 		return -ENODEV;
@@ -121,65 +124,134 @@ static int hl_read(struct device *dev, enum hwmon_sensor_types type,
 	case hwmon_temp:
 		switch (attr) {
 		case hwmon_temp_input:
+			cpucp_attr = cpucp_temp_input;
+			break;
 		case hwmon_temp_max:
+			cpucp_attr = cpucp_temp_max;
+			break;
 		case hwmon_temp_crit:
+			cpucp_attr = cpucp_temp_crit;
+			break;
 		case hwmon_temp_max_hyst:
+			cpucp_attr = cpucp_temp_max_hyst;
+			break;
 		case hwmon_temp_crit_hyst:
+			cpucp_attr = cpucp_temp_crit_hyst;
+			break;
 		case hwmon_temp_offset:
+			cpucp_attr = cpucp_temp_offset;
+			break;
 		case hwmon_temp_highest:
+			cpucp_attr = cpucp_temp_highest;
 			break;
 		default:
 			return -EINVAL;
 		}

-		rc = hl_get_temperature(hdev, channel, attr, val);
+		if (use_cpucp_enum)
+			rc = hl_get_temperature(hdev, channel, cpucp_attr, val);
+		else
+			rc = hl_get_temperature(hdev, channel, attr, val);
 		break;
 	case hwmon_in:
 		switch (attr) {
 		case hwmon_in_input:
+			cpucp_attr = cpucp_in_input;
+			break;
 		case hwmon_in_min:
+			cpucp_attr = cpucp_in_min;
+			break;
 		case hwmon_in_max:
+			cpucp_attr = cpucp_in_max;
+			break;
 		case hwmon_in_highest:
+			cpucp_attr = cpucp_in_highest;
 			break;
 		default:
 			return -EINVAL;
 		}

-		rc = hl_get_voltage(hdev, channel, attr, val);
+		if (use_cpucp_enum)
+			rc = hl_get_voltage(hdev, channel, cpucp_attr, val);
+		else
+			rc = hl_get_voltage(hdev, channel, attr, val);
 		break;
 	case hwmon_curr:
 		switch (attr) {
 		case hwmon_curr_input:
+			cpucp_attr = cpucp_curr_input;
+			break;
 		case hwmon_curr_min:
+			cpucp_attr = cpucp_curr_min;
+			break;
 		case hwmon_curr_max:
+			cpucp_attr = cpucp_curr_max;
+			break;
 		case hwmon_curr_highest:
+			cpucp_attr = cpucp_curr_highest;
 			break;
 		default:
 			return -EINVAL;
 		}

-		rc = hl_get_current(hdev, channel, attr, val);
+		if (use_cpucp_enum)
+			rc = hl_get_current(hdev, channel, cpucp_attr, val);
+		else
+			rc = hl_get_current(hdev, channel, attr, val);
 		break;
 	case hwmon_fan:
 		switch (attr) {
 		case hwmon_fan_input:
+			cpucp_attr = cpucp_fan_input;
+			break;
 		case hwmon_fan_min:
+			cpucp_attr = cpucp_fan_min;
+			break;
 		case hwmon_fan_max:
+			cpucp_attr = cpucp_fan_max;
 			break;
 		default:
 			return -EINVAL;
 		}
-		rc = hl_get_fan_speed(hdev, channel, attr, val);
+
+		if (use_cpucp_enum)
+			rc = hl_get_fan_speed(hdev, channel, cpucp_attr, val);
+		else
+			rc = hl_get_fan_speed(hdev, channel, attr, val);
 		break;
 	case hwmon_pwm:
 		switch (attr) {
 		case hwmon_pwm_input:
+			cpucp_attr = cpucp_pwm_input;
+			break;
 		case hwmon_pwm_enable:
+			cpucp_attr = cpucp_pwm_enable;
+			break;
+		default:
+			return -EINVAL;
+		}
+
+		if (use_cpucp_enum)
+			rc = hl_get_pwm_info(hdev, channel, cpucp_attr, val);
+		else
+			rc = hl_get_pwm_info(hdev, channel, attr, val);
+		break;
+	case hwmon_power:
+		switch (attr) {
+		case hwmon_power_input:
+			cpucp_attr = CPUCP_POWER_INPUT;
+			break;
+		case hwmon_power_input_highest:
+			cpucp_attr = CPUCP_POWER_INPUT_HIGHEST;
 			break;
 		default:
 			return -EINVAL;
 		}
-		rc = hl_get_pwm_info(hdev, channel, attr, val);
+
+		if (use_cpucp_enum)
+			rc = hl_get_power(hdev, channel, cpucp_attr, val);
+		else
+			rc = hl_get_power(hdev, channel, attr, val);
 		break;
 	default:
 		return -EINVAL;
@@ -191,6 +263,9 @@ static int hl_write(struct device *dev, enum hwmon_sensor_types type,
 			u32 attr, int channel, long val)
 {
 	struct hl_device *hdev = dev_get_drvdata(dev);
+	u32 cpucp_attr;
+	bool use_cpucp_enum = (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
+				CPU_BOOT_DEV_STS0_MAP_HWMON_EN) ? true : false;

 	if (!hl_device_operational(hdev, NULL))
 		return -ENODEV;
@@ -199,40 +274,78 @@ static int hl_write(struct device *dev, enum hwmon_sensor_types type,
 	case hwmon_temp:
 		switch (attr) {
 		case hwmon_temp_offset:
+			cpucp_attr = cpucp_temp_offset;
+			break;
 		case hwmon_temp_reset_history:
+			cpucp_attr = cpucp_temp_reset_history;
 			break;
 		default:
 			return -EINVAL;
 		}
-		hl_set_temperature(hdev, channel, attr, val);
+
+		if (use_cpucp_enum)
+			hl_set_temperature(hdev, channel, cpucp_attr, val);
+		else
+			hl_set_temperature(hdev, channel, attr, val);
 		break;
 	case hwmon_pwm:
 		switch (attr) {
 		case hwmon_pwm_input:
+			cpucp_attr = cpucp_pwm_input;
+			break;
 		case hwmon_pwm_enable:
+			cpucp_attr = cpucp_pwm_enable;
 			break;
 		default:
 			return -EINVAL;
 		}
-		hl_set_pwm_info(hdev, channel, attr, val);
+
+		if (use_cpucp_enum)
+			hl_set_pwm_info(hdev, channel, cpucp_attr, val);
+		else
+			hl_set_pwm_info(hdev, channel, attr, val);
 		break;
 	case hwmon_in:
 		switch (attr) {
 		case hwmon_in_reset_history:
+			cpucp_attr = cpucp_in_reset_history;
 			break;
 		default:
 			return -EINVAL;
 		}
-		hl_set_voltage(hdev, channel, attr, val);
+
+		if (use_cpucp_enum)
+			hl_set_voltage(hdev, channel, cpucp_attr, val);
+		else
+			hl_set_voltage(hdev, channel, attr, val);
 		break;
 	case hwmon_curr:
 		switch (attr) {
 		case hwmon_curr_reset_history:
+			cpucp_attr = cpucp_curr_reset_history;
 			break;
 		default:
 			return -EINVAL;
 		}
-		hl_set_current(hdev, channel, attr, val);
+
+		if (use_cpucp_enum)
+			hl_set_current(hdev, channel, cpucp_attr, val);
+		else
+			hl_set_current(hdev, channel, attr, val);
+		break;
+	case hwmon_power:
+		switch (attr) {
+		case hwmon_power_reset_history:
+			cpucp_attr = CPUCP_POWER_RESET_INPUT_HISTORY;
+			break;
+		default:
+			return -EINVAL;
+		}
+
+		if (use_cpucp_enum)
+			hl_set_power(hdev, channel, cpucp_attr, val);
+		else
+			hl_set_power(hdev, channel, attr, val);
 		break;
 	default:
 		return -EINVAL;
@@ -296,6 +409,15 @@ static umode_t hl_is_visible(const void *data, enum hwmon_sensor_types type,
 			return 0644;
 		}
 		break;
+	case hwmon_power:
+		switch (attr) {
+		case hwmon_power_input:
+		case hwmon_power_input_highest:
+			return 0444;
+		case hwmon_power_reset_history:
+			return 0200;
+		}
+		break;
 	default:
 		break;
 	}
@@ -551,6 +673,60 @@ int hl_set_current(struct hl_device *hdev,
 	return rc;
 }

+int hl_set_power(struct hl_device *hdev,
+			int sensor_index, u32 attr, long value)
+{
+	struct cpucp_packet pkt;
+	int rc;
+
+	memset(&pkt, 0, sizeof(pkt));
+
+	pkt.ctl = cpu_to_le32(CPUCP_PACKET_POWER_GET <<
+				CPUCP_PKT_CTL_OPCODE_SHIFT);
+	pkt.sensor_index = __cpu_to_le16(sensor_index);
+	pkt.type = __cpu_to_le16(attr);
+	pkt.value = __cpu_to_le64(value);
+
+	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
+						0, NULL);
+
+	if (rc)
+		dev_err(hdev->dev,
+			"Failed to set power of sensor %d, error %d\n",
+			sensor_index, rc);
+
+	return rc;
+}
+
+int hl_get_power(struct hl_device *hdev,
+			int sensor_index, u32 attr, long *value)
+{
+	struct cpucp_packet pkt;
+	u64 result;
+	int rc;
+
+	memset(&pkt, 0, sizeof(pkt));
+
+	pkt.ctl = cpu_to_le32(CPUCP_PACKET_POWER_GET <<
+				CPUCP_PKT_CTL_OPCODE_SHIFT);
+	pkt.sensor_index = __cpu_to_le16(sensor_index);
+	pkt.type = __cpu_to_le16(attr);
+
+	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
+						0, &result);
+
+	*value = (long) result;
+
+	if (rc) {
+		dev_err(hdev->dev,
+			"Failed to get power of sensor %d, error %d\n",
+			sensor_index, rc);
+		*value = 0;
+	}
+
+	return rc;
+}
+
 int hl_hwmon_init(struct hl_device *hdev)
 {
 	struct device *dev = hdev->pdev ? &hdev->pdev->dev : hdev->dev;

--- a/drivers/misc/habanalabs/common/irq.c
+++ b/drivers/misc/habanalabs/common/irq.c
@@ -141,10 +141,13 @@ static void handle_user_cq(struct hl_device *hdev,
 			struct hl_user_interrupt *user_cq)
 {
 	struct hl_user_pending_interrupt *pend;
+	ktime_t now = ktime_get();

 	spin_lock(&user_cq->wait_list_lock);
-	list_for_each_entry(pend, &user_cq->wait_list_head, wait_list_node)
+	list_for_each_entry(pend, &user_cq->wait_list_head, wait_list_node) {
+		pend->fence.timestamp = now;
 		complete_all(&pend->fence.completion);
+	}
 	spin_unlock(&user_cq->wait_list_lock);
 }


--- a/drivers/misc/habanalabs/common/memory.c
+++ b/drivers/misc/habanalabs/common/memory.c
 // SPDX-License-Identifier: GPL-2.0

 /*
- * Copyright 2016-2019 HabanaLabs, Ltd.
+ * Copyright 2016-2021 HabanaLabs, Ltd.
 * All Rights Reserved.
 */

@@ -11,6 +11,7 @@

 #include <linux/uaccess.h>
 #include <linux/slab.h>
+#include <linux/pci-p2pdma.h>

 #define HL_MMU_DEBUG	0

@@ -347,6 +348,12 @@ static int free_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args)
 			return -EINVAL;
 		}

+		if (phys_pg_pack->exporting_cnt) {
+			dev_dbg(hdev->dev, "handle %u is exported, cannot free\n", handle);
+			spin_unlock(&vm->idr_lock);
+			return -EINVAL;
+		}
+
 		/*
 		 * must remove from idr before the freeing of the physical
 		 * pages as the refcount of the pool is also the trigger of the
@@ -1487,13 +1494,487 @@ int hl_hw_block_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma)
 	return 0;
 }

+static int set_dma_sg(struct scatterlist *sg, u64 bar_address, u64 chunk_size,
+			struct device *dev, enum dma_data_direction dir)
+{
+	dma_addr_t addr;
+	int rc;
+
+	addr = dma_map_resource(dev, bar_address, chunk_size, dir,
+				DMA_ATTR_SKIP_CPU_SYNC);
+	rc = dma_mapping_error(dev, addr);
+	if (rc)
+		return rc;
+
+	sg_set_page(sg, NULL, chunk_size, 0);
+	sg_dma_address(sg) = addr;
+	sg_dma_len(sg) = chunk_size;
+
+	return 0;
+}
+
+static struct sg_table *alloc_sgt_from_device_pages(struct hl_device *hdev, u64 *pages, u64 npages,
+						u64 page_size, struct device *dev,
+						enum dma_data_direction dir)
+{
+	u64 chunk_size, bar_address, dma_max_seg_size;
+	struct asic_fixed_properties *prop;
+	int rc, i, j, nents, cur_page;
+	struct scatterlist *sg;
+	struct sg_table *sgt;
+
+	prop = &hdev->asic_prop;
+
+	dma_max_seg_size = dma_get_max_seg_size(dev);
+
+	/* We would like to align the max segment size to PAGE_SIZE, so the
+	 * SGL will contain aligned addresses that can be easily mapped to
+	 * an MMU
+	 */
+	dma_max_seg_size = ALIGN_DOWN(dma_max_seg_size, PAGE_SIZE);
+	if (dma_max_seg_size < PAGE_SIZE) {
+		dev_err_ratelimited(hdev->dev,
+				"dma_max_seg_size %llu can't be smaller than PAGE_SIZE\n",
+				dma_max_seg_size);
+		return ERR_PTR(-EINVAL);
+	}
+
+	sgt = kzalloc(sizeof(*sgt), GFP_KERNEL);
+	if (!sgt)
+		return ERR_PTR(-ENOMEM);
+
+	/* If the size of each page is larger than the dma max segment size,
+	 * then we can't combine pages and the number of entries in the SGL
+	 * will just be the
+	 * <number of pages> * <chunks of max segment size in each page>
+	 */
+	if (page_size > dma_max_seg_size)
+		nents = npages * DIV_ROUND_UP_ULL(page_size, dma_max_seg_size);
+	else
+		/* Get number of non-contiguous chunks */
+		for (i = 1, nents = 1, chunk_size = page_size ; i < npages ; i++) {
+			if (pages[i - 1] + page_size != pages[i] ||
+					chunk_size + page_size > dma_max_seg_size) {
+				nents++;
+				chunk_size = page_size;
+				continue;
+			}
+
+			chunk_size += page_size;
+		}
+
+	rc = sg_alloc_table(sgt, nents, GFP_KERNEL | __GFP_ZERO);
+	if (rc)
+		goto error_free;
+
+	cur_page = 0;
+
+	if (page_size > dma_max_seg_size) {
+		u64 size_left, cur_device_address = 0;
+
+		size_left = page_size;
+
+		/* Need to split each page into the number of chunks of
+		 * dma_max_seg_size
+		 */
+		for_each_sgtable_dma_sg(sgt, sg, i) {
+			if (size_left == page_size)
+				cur_device_address =
+					pages[cur_page] - prop->dram_base_address;
+			else
+				cur_device_address += dma_max_seg_size;
+
+			chunk_size = min(size_left, dma_max_seg_size);
+
+			bar_address = hdev->dram_pci_bar_start + cur_device_address;
+
+			rc = set_dma_sg(sg, bar_address, chunk_size, dev, dir);
+			if (rc)
+				goto error_unmap;
+
+			if (size_left > dma_max_seg_size) {
+				size_left -= dma_max_seg_size;
+			} else {
+				cur_page++;
+				size_left = page_size;
+			}
+		}
+	} else {
+		/* Merge pages and put them into the scatterlist */
+		for_each_sgtable_dma_sg(sgt, sg, i) {
+			chunk_size = page_size;
+			for (j = cur_page + 1 ; j < npages ; j++) {
+				if (pages[j - 1] + page_size != pages[j] ||
+						chunk_size + page_size > dma_max_seg_size)
+					break;
+
+				chunk_size += page_size;
+			}
+
+			bar_address = hdev->dram_pci_bar_start +
+					(pages[cur_page] - prop->dram_base_address);
+
+			rc = set_dma_sg(sg, bar_address, chunk_size, dev, dir);
+			if (rc)
+				goto error_unmap;
+
+			cur_page = j;
+		}
+	}
+
+	/* Because we are not going to include a CPU list we want to have some
+	 * chance that other users will detect this by setting the orig_nents
+	 * to 0 and using only nents (length of DMA list) when going over the
+	 * sgl
+	 */
+	sgt->orig_nents = 0;
+
+	return sgt;
+
+error_unmap:
+	for_each_sgtable_dma_sg(sgt, sg, i) {
+		if (!sg_dma_len(sg))
+			continue;
+
+		dma_unmap_resource(dev, sg_dma_address(sg),
+					sg_dma_len(sg), dir,
+					DMA_ATTR_SKIP_CPU_SYNC);
+	}
+
+	sg_free_table(sgt);
+
+error_free:
+	kfree(sgt);
+	return ERR_PTR(rc);
+}
+
+static int hl_dmabuf_attach(struct dma_buf *dmabuf,
+				struct dma_buf_attachment *attachment)
+{
+	struct hl_dmabuf_priv *hl_dmabuf;
+	struct hl_device *hdev;
+	int rc;
+
+	hl_dmabuf = dmabuf->priv;
+	hdev = hl_dmabuf->ctx->hdev;
+
+	rc = pci_p2pdma_distance_many(hdev->pdev, &attachment->dev, 1, true);
+
+	if (rc < 0)
+		attachment->peer2peer = false;
+	return 0;
+}
+
+static struct sg_table *hl_map_dmabuf(struct dma_buf_attachment *attachment,
+					enum dma_data_direction dir)
+{
+	struct dma_buf *dma_buf = attachment->dmabuf;
+	struct hl_vm_phys_pg_pack *phys_pg_pack;
+	struct hl_dmabuf_priv *hl_dmabuf;
+	struct hl_device *hdev;
+	struct sg_table *sgt;
+
+	hl_dmabuf = dma_buf->priv;
+	hdev = hl_dmabuf->ctx->hdev;
+	phys_pg_pack = hl_dmabuf->phys_pg_pack;
+
+	if (!attachment->peer2peer) {
+		dev_dbg(hdev->dev, "Failed to map dmabuf because p2p is disabled\n");
+		return ERR_PTR(-EPERM);
+	}
+
+	if (phys_pg_pack)
+		sgt = alloc_sgt_from_device_pages(hdev,
+						phys_pg_pack->pages,
+						phys_pg_pack->npages,
+						phys_pg_pack->page_size,
+						attachment->dev,
+						dir);
+	else
+		sgt = alloc_sgt_from_device_pages(hdev,
+						&hl_dmabuf->device_address,
+						1,
+						hl_dmabuf->dmabuf->size,
+						attachment->dev,
+						dir);
+
+	if (IS_ERR(sgt))
+		dev_err(hdev->dev, "failed (%ld) to initialize sgt for dmabuf\n", PTR_ERR(sgt));
+
+	return sgt;
+}
+
+static void hl_unmap_dmabuf(struct dma_buf_attachment *attachment,
+				  struct sg_table *sgt,
+				  enum dma_data_direction dir)
+{
+	struct scatterlist *sg;
+	int i;
+
+	/* The memory behind the dma-buf has *always* resided on the device itself, i.e. it lives
+	 * only in the 'device' domain (after all, it maps a PCI bar address which points to the
+	 * device memory).
+	 *
+	 * Therefore, it was never in the 'CPU' domain and hence, there is no need to perform
+	 * a sync of the memory to the CPU's cache, as it never resided inside that cache.
+	 */
+	for_each_sgtable_dma_sg(sgt, sg, i)
+		dma_unmap_resource(attachment->dev, sg_dma_address(sg),
+					sg_dma_len(sg), dir,
+					DMA_ATTR_SKIP_CPU_SYNC);
+
+	/* Need to restore orig_nents because sg_free_table use that field */
+	sgt->orig_nents = sgt->nents;
+	sg_free_table(sgt);
+	kfree(sgt);
+}
+
+static void hl_release_dmabuf(struct dma_buf *dmabuf)
+{
+	struct hl_dmabuf_priv *hl_dmabuf = dmabuf->priv;
+	struct hl_ctx *ctx = hl_dmabuf->ctx;
+	struct hl_device *hdev = ctx->hdev;
+	struct hl_vm *vm = &hdev->vm;
+
+	if (hl_dmabuf->phys_pg_pack) {
+		spin_lock(&vm->idr_lock);
+		hl_dmabuf->phys_pg_pack->exporting_cnt--;
+		spin_unlock(&vm->idr_lock);
+	}
+
+	hl_ctx_put(hl_dmabuf->ctx);
+
+	kfree(hl_dmabuf);
+}
+
+static const struct dma_buf_ops habanalabs_dmabuf_ops = {
+	.attach = hl_dmabuf_attach,
+	.map_dma_buf = hl_map_dmabuf,
+	.unmap_dma_buf = hl_unmap_dmabuf,
+	.release = hl_release_dmabuf,
+};
+
+static int export_dmabuf_common(struct hl_ctx *ctx,
+				struct hl_dmabuf_priv *hl_dmabuf,
+				u64 total_size, int flags, int *dmabuf_fd)
+{
+	DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
+	struct hl_device *hdev = ctx->hdev;
+	int rc, fd;
+
+	exp_info.ops = &habanalabs_dmabuf_ops;
+	exp_info.size = total_size;
+	exp_info.flags = flags;
+	exp_info.priv = hl_dmabuf;
+
+	hl_dmabuf->dmabuf = dma_buf_export(&exp_info);
+	if (IS_ERR(hl_dmabuf->dmabuf)) {
+		dev_err(hdev->dev, "failed to export dma-buf\n");
+		return PTR_ERR(hl_dmabuf->dmabuf);
+	}
+
+	fd = dma_buf_fd(hl_dmabuf->dmabuf, flags);
+	if (fd < 0) {
+		dev_err(hdev->dev, "failed to get a file descriptor for a dma-buf\n");
+		rc = fd;
+		goto err_dma_buf_put;
+	}
+
+	hl_dmabuf->ctx = ctx;
+	hl_ctx_get(hdev, hl_dmabuf->ctx);
+
+	*dmabuf_fd = fd;
+
+	return 0;
+
+err_dma_buf_put:
+	dma_buf_put(hl_dmabuf->dmabuf);
+	return rc;
+}
+
+/**
+ * export_dmabuf_from_addr() - export a dma-buf object for the given memory
+ *                             address and size.
+ * @ctx: pointer to the context structure.
+ * @device_addr:  device memory physical address.
+ * @size: size of device memory.
+ * @flags: DMA-BUF file/FD flags.
+ * @dmabuf_fd: pointer to result FD that represents the dma-buf object.
+ *
+ * Create and export a dma-buf object for an existing memory allocation inside
+ * the device memory, and return a FD which is associated with the dma-buf
+ * object.
+ *
+ * Return: 0 on success, non-zero for failure.
+ */
+static int export_dmabuf_from_addr(struct hl_ctx *ctx, u64 device_addr,
+					u64 size, int flags, int *dmabuf_fd)
+{
+	struct hl_dmabuf_priv *hl_dmabuf;
+	struct hl_device *hdev = ctx->hdev;
+	struct asic_fixed_properties *prop;
+	u64 bar_address;
+	int rc;
+
+	prop = &hdev->asic_prop;
+
+	if (!IS_ALIGNED(device_addr, PAGE_SIZE)) {
+		dev_dbg(hdev->dev,
+			"exported device memory address 0x%llx should be aligned to 0x%lx\n",
+			device_addr, PAGE_SIZE);
+		return -EINVAL;
+	}
+
+	if (size < PAGE_SIZE) {
+		dev_dbg(hdev->dev,
+			"exported device memory size %llu should be equal to or greater than %lu\n",
+			size, PAGE_SIZE);
+		return -EINVAL;
+	}
+
+	if (device_addr < prop->dram_user_base_address ||
+				device_addr + size > prop->dram_end_address ||
+				device_addr + size < device_addr) {
+		dev_dbg(hdev->dev,
+			"DRAM memory range 0x%llx (+0x%llx) is outside of DRAM boundaries\n",
+			device_addr, size);
+		return -EINVAL;
+	}
+
+	bar_address = hdev->dram_pci_bar_start +
+			(device_addr - prop->dram_base_address);
+
+	if (bar_address + size >
+			hdev->dram_pci_bar_start + prop->dram_pci_bar_size ||
+			bar_address + size < bar_address) {
+		dev_dbg(hdev->dev,
+			"DRAM memory range 0x%llx (+0x%llx) is outside of PCI BAR boundaries\n",
+			device_addr, size);
+		return -EINVAL;
+	}
+
+	hl_dmabuf = kzalloc(sizeof(*hl_dmabuf), GFP_KERNEL);
+	if (!hl_dmabuf)
+		return -ENOMEM;
+
+	hl_dmabuf->device_address = device_addr;
+
+	rc = export_dmabuf_common(ctx, hl_dmabuf, size, flags, dmabuf_fd);
+	if (rc)
+		goto err_free_dmabuf_wrapper;
+
+	return 0;
+
+err_free_dmabuf_wrapper:
+	kfree(hl_dmabuf);
+	return rc;
+}
+
+/**
+ * export_dmabuf_from_handle() - export a dma-buf object for the given memory
+ *                               handle.
+ * @ctx: pointer to the context structure.
+ * @handle: device memory allocation handle.
+ * @flags: DMA-BUF file/FD flags.
+ * @dmabuf_fd: pointer to result FD that represents the dma-buf object.
+ *
+ * Create and export a dma-buf object for an existing memory allocation inside
+ * the device memory, and return a FD which is associated with the dma-buf
+ * object.
+ *
+ * Return: 0 on success, non-zero for failure.
+ */
+static int export_dmabuf_from_handle(struct hl_ctx *ctx, u64 handle, int flags,
+					int *dmabuf_fd)
+{
+	struct hl_vm_phys_pg_pack *phys_pg_pack;
+	struct hl_dmabuf_priv *hl_dmabuf;
+	struct hl_device *hdev = ctx->hdev;
+	struct asic_fixed_properties *prop;
+	struct hl_vm *vm = &hdev->vm;
+	u64 bar_address;
+	int rc, i;
+
+	prop = &hdev->asic_prop;
+
+	if (upper_32_bits(handle)) {
+		dev_dbg(hdev->dev, "no match for handle 0x%llx\n", handle);
+		return -EINVAL;
+	}
+
+	spin_lock(&vm->idr_lock);
+
+	phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, (u32) handle);
+	if (!phys_pg_pack) {
+		spin_unlock(&vm->idr_lock);
+		dev_dbg(hdev->dev, "no match for handle 0x%x\n", (u32) handle);
+		return -EINVAL;
+	}
+
+	/* increment now to avoid freeing device memory while exporting */
+	phys_pg_pack->exporting_cnt++;
+
+	spin_unlock(&vm->idr_lock);
+
+	if (phys_pg_pack->vm_type != VM_TYPE_PHYS_PACK) {
+		dev_dbg(hdev->dev, "handle 0x%llx does not represent DRAM memory\n", handle);
+		rc = -EINVAL;
+		goto err_dec_exporting_cnt;
+	}
+
+	for (i = 0 ; i < phys_pg_pack->npages ; i++) {
+
+		bar_address = hdev->dram_pci_bar_start +
+						(phys_pg_pack->pages[i] -
+						prop->dram_base_address);
+
+		if (bar_address + phys_pg_pack->page_size >
+			hdev->dram_pci_bar_start + prop->dram_pci_bar_size ||
+			bar_address + phys_pg_pack->page_size < bar_address) {
+
+			dev_dbg(hdev->dev,
+				"DRAM memory range 0x%llx (+0x%x) is outside of PCI BAR boundaries\n",
+				phys_pg_pack->pages[i],
+				phys_pg_pack->page_size);
+
+			rc = -EINVAL;
+			goto err_dec_exporting_cnt;
+		}
+	}
+
+	hl_dmabuf = kzalloc(sizeof(*hl_dmabuf), GFP_KERNEL);
+	if (!hl_dmabuf) {
+		rc = -ENOMEM;
+		goto err_dec_exporting_cnt;
+	}
+
+	hl_dmabuf->phys_pg_pack = phys_pg_pack;
+
+	rc = export_dmabuf_common(ctx, hl_dmabuf, phys_pg_pack->total_size,
+				flags, dmabuf_fd);
+	if (rc)
+		goto err_free_dmabuf_wrapper;
+
+	return 0;
+
+err_free_dmabuf_wrapper:
+	kfree(hl_dmabuf);
+
+err_dec_exporting_cnt:
+	spin_lock(&vm->idr_lock);
+	phys_pg_pack->exporting_cnt--;
+	spin_unlock(&vm->idr_lock);
+
+	return rc;
+}
+
 static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args)
 {
 	struct hl_device *hdev = hpriv->hdev;
 	struct hl_ctx *ctx = hpriv->ctx;
 	u64 block_handle, device_addr = 0;
 	u32 handle = 0, block_size;
-	int rc;
+	int rc, dmabuf_fd = -EBADF;

 	switch (args->in.op) {
 	case HL_MEM_OP_ALLOC:
@@ -1542,6 +2023,16 @@ static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args)
 		args->out.block_size = block_size;
 		break;

+	case HL_MEM_OP_EXPORT_DMABUF_FD:
+		rc = export_dmabuf_from_addr(ctx,
+				args->in.export_dmabuf_fd.handle,
+				args->in.export_dmabuf_fd.mem_size,
+				args->in.flags,
+				&dmabuf_fd);
+		memset(args, 0, sizeof(*args));
+		args->out.fd = dmabuf_fd;
+		break;
+
 	default:
 		dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n");
 		rc = -ENOTTY;
@@ -1560,7 +2051,7 @@ int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data)
 	struct hl_ctx *ctx = hpriv->ctx;
 	u64 block_handle, device_addr = 0;
 	u32 handle = 0, block_size;
-	int rc;
+	int rc, dmabuf_fd = -EBADF;

 	if (!hl_device_operational(hdev, &status)) {
 		dev_warn_ratelimited(hdev->dev,
@@ -1651,6 +2142,22 @@ int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data)
 		args->out.block_size = block_size;
 		break;

+	case HL_MEM_OP_EXPORT_DMABUF_FD:
+		if (hdev->asic_prop.dram_supports_virtual_memory)
+			rc = export_dmabuf_from_handle(ctx,
+					args->in.export_dmabuf_fd.handle,
+					args->in.flags,
+					&dmabuf_fd);
+		else
+			rc = export_dmabuf_from_addr(ctx,
+					args->in.export_dmabuf_fd.handle,
+					args->in.export_dmabuf_fd.mem_size,
+					args->in.flags,
+					&dmabuf_fd);
+		memset(args, 0, sizeof(*args));
+		args->out.fd = dmabuf_fd;
+		break;
+
 	default:
 		dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n");
 		rc = -ENOTTY;

--- a/drivers/misc/habanalabs/common/mmu/mmu.c
+++ b/drivers/misc/habanalabs/common/mmu/mmu.c
@@ -501,23 +501,25 @@ static void hl_mmu_pa_page_with_offset(struct hl_ctx *ctx, u64 virt_addr,

 	if ((hops->range_type == HL_VA_RANGE_TYPE_DRAM) &&
 			!is_power_of_2(prop->dram_page_size)) {
-		unsigned long dram_page_size = prop->dram_page_size;
-		u64 page_offset_mask;
-		u64 phys_addr_mask;
-		u32 bit;
+		u64 dram_page_size, dram_base, abs_phys_addr, abs_virt_addr,
+			page_id, page_start;
+		u32 page_off;

 		/*
-		 * find last set bit in page_size to cover all bits of page
-		 * offset. note that 1 has to be added to bit index.
-		 * note that the internal ulong variable is used to avoid
-		 * alignment issue.
+		 * Bit arithmetics cannot be used for non power of two page
+		 * sizes. In addition, since bit arithmetics is not used,
+		 * we cannot ignore dram base. All that shall be considerd.
 		 */
-		bit = find_last_bit(&dram_page_size,
-					sizeof(dram_page_size) * BITS_PER_BYTE) + 1;
-		page_offset_mask = (BIT_ULL(bit) - 1);
-		phys_addr_mask = ~page_offset_mask;
-		*phys_addr = (tmp_phys_addr & phys_addr_mask) |
-				(virt_addr & page_offset_mask);
+
+		dram_page_size = prop->dram_page_size;
+		dram_base = prop->dram_base_address;
+		abs_phys_addr = tmp_phys_addr - dram_base;
+		abs_virt_addr = virt_addr - dram_base;
+		page_id = DIV_ROUND_DOWN_ULL(abs_phys_addr, dram_page_size);
+		page_start = page_id * dram_page_size;
+		div_u64_rem(abs_virt_addr, dram_page_size, &page_off);
+
+		*phys_addr = page_start + page_off + dram_base;
 	} else {
 		/*
 		 * find the correct hop shift field in hl_mmu_properties

--- a/drivers/misc/habanalabs/common/sysfs.c
+++ b/drivers/misc/habanalabs/common/sysfs.c
@@ -206,12 +206,12 @@ static ssize_t soft_reset_store(struct device *dev,
 		goto out;
 	}

-	if (!hdev->allow_external_soft_reset) {
-		dev_err(hdev->dev, "Device does not support soft-reset\n");
+	if (!hdev->allow_inference_soft_reset) {
+		dev_err(hdev->dev, "Device does not support inference soft-reset\n");
 		goto out;
 	}

-	dev_warn(hdev->dev, "Soft-Reset requested through sysfs\n");
+	dev_warn(hdev->dev, "Inference Soft-Reset requested through sysfs\n");

 	hl_device_reset(hdev, 0);


--- a/drivers/misc/habanalabs/gaudi/Makefile
+++ b/drivers/misc/habanalabs/gaudi/Makefile
 # SPDX-License-Identifier: GPL-2.0-only
-HL_GAUDI_FILES := gaudi/gaudi.o gaudi/gaudi_hwmgr.o gaudi/gaudi_security.o \
+HL_GAUDI_FILES := gaudi/gaudi.o gaudi/gaudi_security.o \
 	gaudi/gaudi_coresight.o
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -661,6 +661,9 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev)

 	prop->server_type = HL_SERVER_TYPE_UNKNOWN;

+	prop->clk_pll_index = HL_GAUDI_MME_PLL;
+	prop->max_freq_value = GAUDI_MAX_CLK_FREQ;
+
 	return 0;
 }

@@ -795,6 +798,7 @@ static int gaudi_early_init(struct hl_device *hdev)
 	}

 	prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
+	hdev->dram_pci_bar_start = pci_resource_start(pdev, HBM_BAR_ID);

 	/* If FW security is enabled at this point it means no access to ELBI */
 	if (hdev->asic_prop.fw_security_enabled) {
@@ -1837,8 +1841,6 @@ static int gaudi_sw_init(struct hl_device *hdev)

 	gaudi->cpucp_info_get = gaudi_cpucp_info_get;

-	gaudi->max_freq_value = GAUDI_MAX_CLK_FREQ;
-
 	hdev->asic_specific = gaudi;

 	/* Create DMA pool for small allocations */
@@ -2616,7 +2618,7 @@ static void gaudi_init_e2e(struct hl_device *hdev)

 static void gaudi_init_hbm_cred(struct hl_device *hdev)
 {
-	uint32_t hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
+	u32 hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;

 	if (hdev->asic_prop.fw_security_enabled)
 		return;
@@ -7932,6 +7934,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
 {
 	struct gaudi_device *gaudi = hdev->asic_specific;
 	u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
+	u32 fw_fatal_err_flag = 0;
 	u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
 			>> EQ_CTL_EVENT_TYPE_SHIFT);
 	bool reset_required;
@@ -7972,6 +7975,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
 	case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR:
 		gaudi_print_irq_info(hdev, event_type, true);
 		gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
+		fw_fatal_err_flag = HL_RESET_FW_FATAL_ERR;
 		goto reset_device;

 	case GAUDI_EVENT_GIC500:
@@ -7979,6 +7983,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
 	case GAUDI_EVENT_L2_RAM_ECC:
 	case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
 		gaudi_print_irq_info(hdev, event_type, false);
+		fw_fatal_err_flag = HL_RESET_FW_FATAL_ERR;
 		goto reset_device;

 	case GAUDI_EVENT_HBM0_SPI_0:
@@ -7989,6 +7994,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
 		gaudi_hbm_read_interrupts(hdev,
 				gaudi_hbm_event_to_dev(event_type),
 				&eq_entry->hbm_ecc_data);
+		fw_fatal_err_flag = HL_RESET_FW_FATAL_ERR;
 		goto reset_device;

 	case GAUDI_EVENT_HBM0_SPI_1:
@@ -8171,9 +8177,9 @@ static void gaudi_handle_eqe(struct hl_device *hdev,

 reset_device:
 	if (hdev->asic_prop.fw_security_enabled)
-		hl_device_reset(hdev, HL_RESET_HARD | HL_RESET_FW);
+		hl_device_reset(hdev, HL_RESET_HARD | HL_RESET_FW | fw_fatal_err_flag);
 	else if (hdev->hard_reset_on_fw_events)
-		hl_device_reset(hdev, HL_RESET_HARD);
+		hl_device_reset(hdev, HL_RESET_HARD | fw_fatal_err_flag);
 	else
 		hl_fw_unmask_irq(hdev, event_type);
 }
@@ -9439,9 +9445,9 @@ static const struct hl_asic_funcs gaudi_funcs = {
 	.debugfs_read64 = gaudi_debugfs_read64,
 	.debugfs_write64 = gaudi_debugfs_write64,
 	.debugfs_read_dma = gaudi_debugfs_read_dma,
-	.add_device_attr = gaudi_add_device_attr,
+	.add_device_attr = hl_add_device_attr,
 	.handle_eqe = gaudi_handle_eqe,
-	.set_pll_profile = gaudi_set_pll_profile,
+	.set_pll_profile = hl_set_pll_profile,
 	.get_events_stat = gaudi_get_events_stat,
 	.read_pte = gaudi_read_pte,
 	.write_pte = gaudi_write_pte,
@@ -9465,7 +9471,7 @@ static const struct hl_asic_funcs gaudi_funcs = {
 	.halt_coresight = gaudi_halt_coresight,
 	.ctx_init = gaudi_ctx_init,
 	.ctx_fini = gaudi_ctx_fini,
-	.get_clk_rate = gaudi_get_clk_rate,
+	.get_clk_rate = hl_get_clk_rate,
 	.get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
 	.load_firmware_to_device = gaudi_load_firmware_to_device,
 	.load_boot_fit_to_device = gaudi_load_boot_fit_to_device,

--- a/drivers/misc/habanalabs/gaudi/gaudiP.h
+++ b/drivers/misc/habanalabs/gaudi/gaudiP.h
@@ -319,7 +319,6 @@ struct gaudi_internal_qman_info {
 *                  the actual number of internal queues because they are not in
 *                  consecutive order.
 * @hbm_bar_cur_addr: current address of HBM PCI bar.
- * @max_freq_value: current max clk frequency.
 * @events: array that holds all event id's
 * @events_stat: array that holds histogram of all received events.
 * @events_stat_aggregate: same as events_stat but doesn't get cleared on reset
@@ -345,7 +344,6 @@ struct gaudi_device {
 	struct gaudi_collective_properties collective_props;

 	u64				hbm_bar_cur_addr;
-	u64				max_freq_value;

 	u32				events[GAUDI_EVENT_SIZE];
 	u32				events_stat[GAUDI_EVENT_SIZE];
@@ -359,10 +357,8 @@ void gaudi_init_security(struct hl_device *hdev);
 void gaudi_ack_protection_bits_errors(struct hl_device *hdev);
 void gaudi_add_device_attr(struct hl_device *hdev,
 			struct attribute_group *dev_attr_grp);
-void gaudi_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq);
 int gaudi_debug_coresight(struct hl_device *hdev, void *data);
 void gaudi_halt_coresight(struct hl_device *hdev);
-int gaudi_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk);
 void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid);

 #endif /* GAUDIP_H_ */
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -471,6 +471,8 @@ int goya_set_fixed_properties(struct hl_device *hdev)

 	prop->server_type = HL_SERVER_TYPE_UNKNOWN;

+	prop->clk_pll_index = HL_GOYA_MME_PLL;
+
 	return 0;
 }

@@ -622,6 +624,7 @@ static int goya_early_init(struct hl_device *hdev)
 	}

 	prop->dram_pci_bar_size = pci_resource_len(pdev, DDR_BAR_ID);
+	hdev->dram_pci_bar_start = pci_resource_start(pdev, DDR_BAR_ID);

 	/* If FW security is enabled at this point it means no access to ELBI */
 	if (hdev->asic_prop.fw_security_enabled) {
@@ -959,7 +962,7 @@ static int goya_sw_init(struct hl_device *hdev)
 	spin_lock_init(&goya->hw_queues_lock);
 	hdev->supports_coresight = true;
 	hdev->supports_soft_reset = true;
-	hdev->allow_external_soft_reset = true;
+	hdev->allow_inference_soft_reset = true;
 	hdev->supports_wait_for_multi_cs = false;

 	hdev->asic_funcs->set_pci_memory_regions(hdev);
@@ -4829,6 +4832,12 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
 	case GOYA_ASYNC_EVENT_ID_PLL0 ... GOYA_ASYNC_EVENT_ID_PLL6:
 	case GOYA_ASYNC_EVENT_ID_AXI_ECC:
 	case GOYA_ASYNC_EVENT_ID_L2_RAM_ECC:
+		goya_print_irq_info(hdev, event_type, false);
+		if (hdev->hard_reset_on_fw_events)
+			hl_device_reset(hdev, (HL_RESET_HARD |
+						HL_RESET_FW_FATAL_ERR));
+		break;
+
 	case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET:
 		goya_print_irq_info(hdev, event_type, false);
 		if (hdev->hard_reset_on_fw_events)
@@ -5649,7 +5658,7 @@ static const struct hl_asic_funcs goya_funcs = {
 	.halt_coresight = goya_halt_coresight,
 	.ctx_init = goya_ctx_init,
 	.ctx_fini = goya_ctx_fini,
-	.get_clk_rate = goya_get_clk_rate,
+	.get_clk_rate = hl_get_clk_rate,
 	.get_queue_id_for_cq = goya_get_queue_id_for_cq,
 	.load_firmware_to_device = goya_load_firmware_to_device,
 	.load_boot_fit_to_device = goya_load_boot_fit_to_device,

--- a/drivers/misc/habanalabs/goya/goyaP.h
+++ b/drivers/misc/habanalabs/goya/goyaP.h
@@ -235,7 +235,6 @@ void goya_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
 					void *vaddr);
 void goya_mmu_remove_device_cpu_mappings(struct hl_device *hdev);

-int goya_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk);
 u32 goya_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx);
 u64 goya_get_device_time(struct hl_device *hdev);


--- a/drivers/misc/habanalabs/goya/goya_hwmgr.c
+++ b/drivers/misc/habanalabs/goya/goya_hwmgr.c
@@ -32,37 +32,6 @@ void goya_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq)
 	}
 }

-int goya_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk)
-{
-	long value;
-
-	if (!hl_device_operational(hdev, NULL))
-		return -ENODEV;
-
-	value = hl_get_frequency(hdev, HL_GOYA_MME_PLL, false);
-
-	if (value < 0) {
-		dev_err(hdev->dev, "Failed to retrieve device max clock %ld\n",
-			value);
-		return value;
-	}
-
-	*max_clk = (value / 1000 / 1000);
-
-	value = hl_get_frequency(hdev, HL_GOYA_MME_PLL, true);
-
-	if (value < 0) {
-		dev_err(hdev->dev,
-			"Failed to retrieve device current clock %ld\n",
-			value);
-		return value;
-	}
-
-	*cur_clk = (value / 1000 / 1000);
-
-	return 0;
-}
-
 static ssize_t mme_clk_show(struct device *dev, struct device_attribute *attr,
 				char *buf)
 {

--- a/drivers/misc/habanalabs/include/common/cpucp_if.h
+++ b/drivers/misc/habanalabs/include/common/cpucp_if.h
@@ -542,11 +542,14 @@ enum cpucp_packet_rc {
 */
 enum cpucp_temp_type {
 	cpucp_temp_input,
+	cpucp_temp_min = 4,
+	cpucp_temp_min_hyst,
 	cpucp_temp_max = 6,
 	cpucp_temp_max_hyst,
 	cpucp_temp_crit,
 	cpucp_temp_crit_hyst,
 	cpucp_temp_offset = 19,
+	cpucp_temp_lowest = 21,
 	cpucp_temp_highest = 22,
 	cpucp_temp_reset_history = 23
 };
@@ -555,6 +558,7 @@ enum cpucp_in_attributes {
 	cpucp_in_input,
 	cpucp_in_min,
 	cpucp_in_max,
+	cpucp_in_lowest = 6,
 	cpucp_in_highest = 7,
 	cpucp_in_reset_history
 };
@@ -563,6 +567,7 @@ enum cpucp_curr_attributes {
 	cpucp_curr_input,
 	cpucp_curr_min,
 	cpucp_curr_max,
+	cpucp_curr_lowest = 6,
 	cpucp_curr_highest = 7,
 	cpucp_curr_reset_history
 };
@@ -598,6 +603,16 @@ enum cpucp_pll_type_attributes {
 	cpucp_pll_pci,
 };

+/*
+ * cpucp_power_type aligns with hwmon_power_attributes
+ * defined in Linux kernel hwmon.h file
+ */
+enum cpucp_power_type {
+	CPUCP_POWER_INPUT = 8,
+	CPUCP_POWER_INPUT_HIGHEST = 9,
+	CPUCP_POWER_RESET_INPUT_HISTORY = 11
+};
+
 /*
 * MSI type enumeration table for all ASICs and future SW versions.
 * For future ASIC-LKD compatibility, we can only add new enumerations.
@@ -731,6 +746,9 @@ struct cpucp_security_info {
 * @pll_map: Bit map of supported PLLs for current ASIC version.
 * @mme_binning_mask: MME binning mask,
 *                   (0 = functional, 1 = binned)
+ * @dram_binning_mask: DRAM binning mask, 1 bit per dram instance
+ *                     (0 = functional 1 = binned)
+ * @memory_repair_flag: eFuse flag indicating memory repair
 */
 struct cpucp_info {
 	struct cpucp_sensor sensors[CPUCP_MAX_SENSORS];
@@ -749,7 +767,9 @@ struct cpucp_info {
 	__le64 reserved3;
 	__le64 reserved4;
 	__u8 reserved5;
-	__u8 pad[7];
+	__u8 dram_binning_mask;
+	__u8 memory_repair_flag;
+	__u8 pad[5];
 	struct cpucp_security_info sec_info;
 	__le32 reserved6;
 	__u8 pll_map[PLL_MAP_LEN];

--- a/drivers/misc/habanalabs/include/common/hl_boot_if.h
+++ b/drivers/misc/habanalabs/include/common/hl_boot_if.h
@@ -15,6 +15,28 @@

 #define VERSION_MAX_LEN			128

+enum cpu_boot_err {
+	CPU_BOOT_ERR_DRAM_INIT_FAIL = 0,
+	CPU_BOOT_ERR_FIT_CORRUPTED = 1,
+	CPU_BOOT_ERR_TS_INIT_FAIL = 2,
+	CPU_BOOT_ERR_DRAM_SKIPPED = 3,
+	CPU_BOOT_ERR_BMC_WAIT_SKIPPED = 4,
+	CPU_BOOT_ERR_NIC_DATA_NOT_RDY = 5,
+	CPU_BOOT_ERR_NIC_FW_FAIL = 6,
+	CPU_BOOT_ERR_SECURITY_NOT_RDY = 7,
+	CPU_BOOT_ERR_SECURITY_FAIL = 8,
+	CPU_BOOT_ERR_EFUSE_FAIL = 9,
+	CPU_BOOT_ERR_PRI_IMG_VER_FAIL = 10,
+	CPU_BOOT_ERR_SEC_IMG_VER_FAIL = 11,
+	CPU_BOOT_ERR_PLL_FAIL = 12,
+	CPU_BOOT_ERR_DEVICE_UNUSABLE_FAIL = 13,
+	CPU_BOOT_ERR_BOOT_FW_CRIT_ERR = 18,
+	CPU_BOOT_ERR_BINNING_FAIL = 19,
+	CPU_BOOT_ERR_ENABLED = 31,
+	CPU_BOOT_ERR_SCND_EN = 63,
+	CPU_BOOT_ERR_LAST = 64 /* we have 2 registers of 32 bits */
+};
+
 /*
 * CPU error bits in BOOT_ERROR registers
 *
@@ -78,25 +100,13 @@
 * CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL	Device is unusable and customer support
 *					should be contacted.
 *
- * CPU_BOOT_ERR0_ARC0_HALT_ACK_NOT_RCVD	HALT ACK from ARC0 is not received
- *					within specified retries after issuing
- *					HALT request. ARC0 appears to be in bad
- *					reset.
- *
- * CPU_BOOT_ERR0_ARC1_HALT_ACK_NOT_RCVD	HALT ACK from ARC1 is not received
- *					within specified retries after issuing
- *					HALT request. ARC1 appears to be in bad
- *					reset.
+ * CPU_BOOT_ERR0_BOOT_FW_CRIT_ERR	Critical error was detected during
+ *					the execution of ppboot or preboot.
+ *					for example: stack overflow.
 *
- * CPU_BOOT_ERR0_ARC0_RUN_ACK_NOT_RCVD	RUN ACK from ARC0 is not received
- *					within specified timeout after issuing
- *					RUN request. ARC0 appears to be in bad
- *					reset.
- *
- * CPU_BOOT_ERR0_ARC1_RUN_ACK_NOT_RCVD	RUN ACK from ARC1 is not received
- *					within specified timeout after issuing
- *					RUN request. ARC1 appears to be in bad
- *					reset.
+ * CPU_BOOT_ERR0_BINNING_FAIL		Binning settings failed, meaning
+ *					malfunctioning components might still be
+ *					in use.
 *
 * CPU_BOOT_ERR0_ENABLED		Error registers enabled.
 *					This is a main indication that the
@@ -104,26 +114,57 @@
 *					registers. Meaning the error bits are
 *					not garbage, but actual error statuses.
 */
-#define CPU_BOOT_ERR0_DRAM_INIT_FAIL		(1 << 0)
-#define CPU_BOOT_ERR0_FIT_CORRUPTED		(1 << 1)
-#define CPU_BOOT_ERR0_TS_INIT_FAIL		(1 << 2)
-#define CPU_BOOT_ERR0_DRAM_SKIPPED		(1 << 3)
-#define CPU_BOOT_ERR0_BMC_WAIT_SKIPPED		(1 << 4)
-#define CPU_BOOT_ERR0_NIC_DATA_NOT_RDY		(1 << 5)
-#define CPU_BOOT_ERR0_NIC_FW_FAIL		(1 << 6)
-#define CPU_BOOT_ERR0_SECURITY_NOT_RDY		(1 << 7)
-#define CPU_BOOT_ERR0_SECURITY_FAIL		(1 << 8)
-#define CPU_BOOT_ERR0_EFUSE_FAIL		(1 << 9)
-#define CPU_BOOT_ERR0_PRI_IMG_VER_FAIL		(1 << 10)
-#define CPU_BOOT_ERR0_SEC_IMG_VER_FAIL		(1 << 11)
-#define CPU_BOOT_ERR0_PLL_FAIL			(1 << 12)
-#define CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL	(1 << 13)
-#define CPU_BOOT_ERR0_ARC0_HALT_ACK_NOT_RCVD	(1 << 14)
-#define CPU_BOOT_ERR0_ARC1_HALT_ACK_NOT_RCVD	(1 << 15)
-#define CPU_BOOT_ERR0_ARC0_RUN_ACK_NOT_RCVD	(1 << 16)
-#define CPU_BOOT_ERR0_ARC1_RUN_ACK_NOT_RCVD	(1 << 17)
-#define CPU_BOOT_ERR0_ENABLED			(1 << 31)
-#define CPU_BOOT_ERR1_ENABLED			(1 << 31)
+#define CPU_BOOT_ERR0_DRAM_INIT_FAIL		(1 << CPU_BOOT_ERR_DRAM_INIT_FAIL)
+#define CPU_BOOT_ERR0_FIT_CORRUPTED		(1 << CPU_BOOT_ERR_FIT_CORRUPTED)
+#define CPU_BOOT_ERR0_TS_INIT_FAIL		(1 << CPU_BOOT_ERR_TS_INIT_FAIL)
+#define CPU_BOOT_ERR0_DRAM_SKIPPED		(1 << CPU_BOOT_ERR_DRAM_SKIPPED)
+#define CPU_BOOT_ERR0_BMC_WAIT_SKIPPED		(1 << CPU_BOOT_ERR_BMC_WAIT_SKIPPED)
+#define CPU_BOOT_ERR0_NIC_DATA_NOT_RDY		(1 << CPU_BOOT_ERR_NIC_DATA_NOT_RDY)
+#define CPU_BOOT_ERR0_NIC_FW_FAIL		(1 << CPU_BOOT_ERR_NIC_FW_FAIL)
+#define CPU_BOOT_ERR0_SECURITY_NOT_RDY		(1 << CPU_BOOT_ERR_SECURITY_NOT_RDY)
+#define CPU_BOOT_ERR0_SECURITY_FAIL		(1 << CPU_BOOT_ERR_SECURITY_FAIL)
+#define CPU_BOOT_ERR0_EFUSE_FAIL		(1 << CPU_BOOT_ERR_EFUSE_FAIL)
+#define CPU_BOOT_ERR0_PRI_IMG_VER_FAIL		(1 << CPU_BOOT_ERR_PRI_IMG_VER_FAIL)
+#define CPU_BOOT_ERR0_SEC_IMG_VER_FAIL		(1 << CPU_BOOT_ERR_SEC_IMG_VER_FAIL)
+#define CPU_BOOT_ERR0_PLL_FAIL			(1 << CPU_BOOT_ERR_PLL_FAIL)
+#define CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL	(1 << CPU_BOOT_ERR_DEVICE_UNUSABLE_FAIL)
+#define CPU_BOOT_ERR0_BOOT_FW_CRIT_ERR		(1 << CPU_BOOT_ERR_BOOT_FW_CRIT_ERR)
+#define CPU_BOOT_ERR0_BINNING_FAIL		(1 << CPU_BOOT_ERR_BINNING_FAIL)
+#define CPU_BOOT_ERR0_ENABLED			(1 << CPU_BOOT_ERR_ENABLED)
+#define CPU_BOOT_ERR1_ENABLED			(1 << CPU_BOOT_ERR_ENABLED)
+
+enum cpu_boot_dev_sts {
+	CPU_BOOT_DEV_STS_SECURITY_EN = 0,
+	CPU_BOOT_DEV_STS_DEBUG_EN = 1,
+	CPU_BOOT_DEV_STS_WATCHDOG_EN = 2,
+	CPU_BOOT_DEV_STS_DRAM_INIT_EN = 3,
+	CPU_BOOT_DEV_STS_BMC_WAIT_EN = 4,
+	CPU_BOOT_DEV_STS_E2E_CRED_EN = 5,
+	CPU_BOOT_DEV_STS_HBM_CRED_EN = 6,
+	CPU_BOOT_DEV_STS_RL_EN = 7,
+	CPU_BOOT_DEV_STS_SRAM_SCR_EN = 8,
+	CPU_BOOT_DEV_STS_DRAM_SCR_EN = 9,
+	CPU_BOOT_DEV_STS_FW_HARD_RST_EN = 10,
+	CPU_BOOT_DEV_STS_PLL_INFO_EN = 11,
+	CPU_BOOT_DEV_STS_SP_SRAM_EN = 12,
+	CPU_BOOT_DEV_STS_CLK_GATE_EN = 13,
+	CPU_BOOT_DEV_STS_HBM_ECC_EN = 14,
+	CPU_BOOT_DEV_STS_PKT_PI_ACK_EN = 15,
+	CPU_BOOT_DEV_STS_FW_LD_COM_EN = 16,
+	CPU_BOOT_DEV_STS_FW_IATU_CONF_EN = 17,
+	CPU_BOOT_DEV_STS_FW_NIC_MAC_EN = 18,
+	CPU_BOOT_DEV_STS_DYN_PLL_EN = 19,
+	CPU_BOOT_DEV_STS_GIC_PRIVILEGED_EN = 20,
+	CPU_BOOT_DEV_STS_EQ_INDEX_EN = 21,
+	CPU_BOOT_DEV_STS_MULTI_IRQ_POLL_EN = 22,
+	CPU_BOOT_DEV_STS_FW_NIC_STAT_XPCS91_EN = 23,
+	CPU_BOOT_DEV_STS_FW_NIC_STAT_EXT_EN = 24,
+	CPU_BOOT_DEV_STS_IS_IDLE_CHECK_EN = 25,
+	CPU_BOOT_DEV_STS_MAP_HWMON_EN = 26,
+	CPU_BOOT_DEV_STS_ENABLED = 31,
+	CPU_BOOT_DEV_STS_SCND_EN = 63,
+	CPU_BOOT_DEV_STS_LAST = 64 /* we have 2 registers of 32 bits */
+};

 /*
 * BOOT DEVICE STATUS bits in BOOT_DEVICE_STS registers
@@ -233,7 +274,7 @@
 *					was not served before.
 *					Initialized in: linux
 *
- * CPU_BOOT_DEV_STS0_MULTI_IRQ_POLL_EN  Use multiple scratchpad interfaces to
+ * CPU_BOOT_DEV_STS0_MULTI_IRQ_POLL_EN	Use multiple scratchpad interfaces to
 *					prevent IRQs overriding each other.
 *					Initialized in: linux
 *
@@ -252,6 +293,11 @@
 *					where a bit is set if the engine is not idle.
 *					Initialized in: linux
 *
+ * CPU_BOOT_DEV_STS0_MAP_HWMON_EN
+ *					If set, means f/w supports proprietary
+ *					HWMON enum mapping to cpucp enums.
+ *					Initialized in: linux
+ *
 * CPU_BOOT_DEV_STS0_ENABLED		Device status register enabled.
 *					This is a main indication that the
 *					running FW populates the device status
@@ -261,34 +307,35 @@
 *					Initialized in: preboot
 *
 */
-#define CPU_BOOT_DEV_STS0_SECURITY_EN			(1 << 0)
-#define CPU_BOOT_DEV_STS0_DEBUG_EN			(1 << 1)
-#define CPU_BOOT_DEV_STS0_WATCHDOG_EN			(1 << 2)
-#define CPU_BOOT_DEV_STS0_DRAM_INIT_EN			(1 << 3)
-#define CPU_BOOT_DEV_STS0_BMC_WAIT_EN			(1 << 4)
-#define CPU_BOOT_DEV_STS0_E2E_CRED_EN			(1 << 5)
-#define CPU_BOOT_DEV_STS0_HBM_CRED_EN			(1 << 6)
-#define CPU_BOOT_DEV_STS0_RL_EN				(1 << 7)
-#define CPU_BOOT_DEV_STS0_SRAM_SCR_EN			(1 << 8)
-#define CPU_BOOT_DEV_STS0_DRAM_SCR_EN			(1 << 9)
-#define CPU_BOOT_DEV_STS0_FW_HARD_RST_EN		(1 << 10)
-#define CPU_BOOT_DEV_STS0_PLL_INFO_EN			(1 << 11)
-#define CPU_BOOT_DEV_STS0_SP_SRAM_EN			(1 << 12)
-#define CPU_BOOT_DEV_STS0_CLK_GATE_EN			(1 << 13)
-#define CPU_BOOT_DEV_STS0_HBM_ECC_EN			(1 << 14)
-#define CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN			(1 << 15)
-#define CPU_BOOT_DEV_STS0_FW_LD_COM_EN			(1 << 16)
-#define CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN		(1 << 17)
-#define CPU_BOOT_DEV_STS0_FW_NIC_MAC_EN			(1 << 18)
-#define CPU_BOOT_DEV_STS0_DYN_PLL_EN			(1 << 19)
-#define CPU_BOOT_DEV_STS0_GIC_PRIVILEGED_EN		(1 << 20)
-#define CPU_BOOT_DEV_STS0_EQ_INDEX_EN			(1 << 21)
-#define CPU_BOOT_DEV_STS0_MULTI_IRQ_POLL_EN		(1 << 22)
-#define CPU_BOOT_DEV_STS0_FW_NIC_STAT_XPCS91_EN		(1 << 23)
-#define CPU_BOOT_DEV_STS0_FW_NIC_STAT_EXT_EN		(1 << 24)
-#define CPU_BOOT_DEV_STS0_IS_IDLE_CHECK_EN		(1 << 25)
-#define CPU_BOOT_DEV_STS0_ENABLED			(1 << 31)
-#define CPU_BOOT_DEV_STS1_ENABLED			(1 << 31)
+#define CPU_BOOT_DEV_STS0_SECURITY_EN		(1 << CPU_BOOT_DEV_STS_SECURITY_EN)
+#define CPU_BOOT_DEV_STS0_DEBUG_EN		(1 << CPU_BOOT_DEV_STS_DEBUG_EN)
+#define CPU_BOOT_DEV_STS0_WATCHDOG_EN		(1 << CPU_BOOT_DEV_STS_WATCHDOG_EN)
+#define CPU_BOOT_DEV_STS0_DRAM_INIT_EN		(1 << CPU_BOOT_DEV_STS_DRAM_INIT_EN)
+#define CPU_BOOT_DEV_STS0_BMC_WAIT_EN		(1 << CPU_BOOT_DEV_STS_BMC_WAIT_EN)
+#define CPU_BOOT_DEV_STS0_E2E_CRED_EN		(1 << CPU_BOOT_DEV_STS_E2E_CRED_EN)
+#define CPU_BOOT_DEV_STS0_HBM_CRED_EN		(1 << CPU_BOOT_DEV_STS_HBM_CRED_EN)
+#define CPU_BOOT_DEV_STS0_RL_EN			(1 << CPU_BOOT_DEV_STS_RL_EN)
+#define CPU_BOOT_DEV_STS0_SRAM_SCR_EN		(1 << CPU_BOOT_DEV_STS_SRAM_SCR_EN)
+#define CPU_BOOT_DEV_STS0_DRAM_SCR_EN		(1 << CPU_BOOT_DEV_STS_DRAM_SCR_EN)
+#define CPU_BOOT_DEV_STS0_FW_HARD_RST_EN	(1 << CPU_BOOT_DEV_STS_FW_HARD_RST_EN)
+#define CPU_BOOT_DEV_STS0_PLL_INFO_EN		(1 << CPU_BOOT_DEV_STS_PLL_INFO_EN)
+#define CPU_BOOT_DEV_STS0_SP_SRAM_EN		(1 << CPU_BOOT_DEV_STS_SP_SRAM_EN)
+#define CPU_BOOT_DEV_STS0_CLK_GATE_EN		(1 << CPU_BOOT_DEV_STS_CLK_GATE_EN)
+#define CPU_BOOT_DEV_STS0_HBM_ECC_EN		(1 << CPU_BOOT_DEV_STS_HBM_ECC_EN)
+#define CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN		(1 << CPU_BOOT_DEV_STS_PKT_PI_ACK_EN)
+#define CPU_BOOT_DEV_STS0_FW_LD_COM_EN		(1 << CPU_BOOT_DEV_STS_FW_LD_COM_EN)
+#define CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN	(1 << CPU_BOOT_DEV_STS_FW_IATU_CONF_EN)
+#define CPU_BOOT_DEV_STS0_FW_NIC_MAC_EN		(1 << CPU_BOOT_DEV_STS_FW_NIC_MAC_EN)
+#define CPU_BOOT_DEV_STS0_DYN_PLL_EN		(1 << CPU_BOOT_DEV_STS_DYN_PLL_EN)
+#define CPU_BOOT_DEV_STS0_GIC_PRIVILEGED_EN	(1 << CPU_BOOT_DEV_STS_GIC_PRIVILEGED_EN)
+#define CPU_BOOT_DEV_STS0_EQ_INDEX_EN		(1 << CPU_BOOT_DEV_STS_EQ_INDEX_EN)
+#define CPU_BOOT_DEV_STS0_MULTI_IRQ_POLL_EN	(1 << CPU_BOOT_DEV_STS_MULTI_IRQ_POLL_EN)
+#define CPU_BOOT_DEV_STS0_FW_NIC_STAT_XPCS91_EN	(1 << CPU_BOOT_DEV_STS_FW_NIC_STAT_XPCS91_EN)
+#define CPU_BOOT_DEV_STS0_FW_NIC_STAT_EXT_EN	(1 << CPU_BOOT_DEV_STS_FW_NIC_STAT_EXT_EN)
+#define CPU_BOOT_DEV_STS0_IS_IDLE_CHECK_EN	(1 << CPU_BOOT_DEV_STS_IS_IDLE_CHECK_EN)
+#define CPU_BOOT_DEV_STS0_MAP_HWMON_EN		(1 << CPU_BOOT_DEV_STS_MAP_HWMON_EN)
+#define CPU_BOOT_DEV_STS0_ENABLED		(1 << CPU_BOOT_DEV_STS_ENABLED)
+#define CPU_BOOT_DEV_STS1_ENABLED		(1 << CPU_BOOT_DEV_STS_ENABLED)

 enum cpu_boot_status {
 	CPU_BOOT_STATUS_NA = 0,		/* Default value after reset of chip */
@@ -405,6 +452,8 @@ struct cpu_dyn_regs {
 enum comms_msg_type {
 	HL_COMMS_DESC_TYPE = 0,
 	HL_COMMS_RESET_CAUSE_TYPE = 1,
+	HL_COMMS_FW_CFG_SKIP_TYPE = 2,
+	HL_COMMS_BINNING_CONF_TYPE = 3,
 };

 /* TODO: remove this struct after the code is updated to use message */
@@ -464,6 +513,9 @@ struct lkd_fw_comms_msg {
 		struct {
 			__u8 reset_cause;
 		};
+		struct {
+			__u8 fw_cfg_skip; /* 1 - skip, 0 - don't skip */
+		};
 	};
 };

@@ -507,8 +559,6 @@ struct lkd_fw_comms_msg {
 * COMMS_SKIP_BMC		Perform actions required for BMC-less servers.
 *				Do not wait for BMC response.
 *
- * COMMS_LOW_PLL_OPP		Initialize PLLs for low OPP.
- *
 * COMMS_PREP_DESC_ELBI		Same as COMMS_PREP_DESC only that the memory
 *				space is allocated in a ELBI access only
 *				address range.
@@ -524,7 +574,6 @@ enum comms_cmd {
 	COMMS_RST_DEV = 6,
 	COMMS_GOTO_WFE = 7,
 	COMMS_SKIP_BMC = 8,
-	COMMS_LOW_PLL_OPP = 9,
 	COMMS_PREP_DESC_ELBI = 10,
 	COMMS_INVLD_LAST
 };

--- a/drivers/misc/habanalabs/include/gaudi/gaudi_fw_if.h
+++ b/drivers/misc/habanalabs/include/gaudi/gaudi_fw_if.h
@@ -8,8 +8,6 @@
 #ifndef GAUDI_FW_IF_H
 #define GAUDI_FW_IF_H

-#include <linux/types.h>
-
 #define GAUDI_EVENT_QUEUE_MSI_IDX	8
 #define GAUDI_NIC_PORT1_MSI_IDX		10
 #define GAUDI_NIC_PORT3_MSI_IDX		12
@@ -78,13 +76,13 @@ struct gaudi_nic_status {
 	__u32 high_ber_cnt;
 };

-struct gaudi_flops_2_data {
+struct gaudi_cold_rst_data {
 	union {
 		struct {
-			__u32 spsram_init_done : 1;
-			__u32 reserved : 31;
+			u32 spsram_init_done : 1;
+			u32 reserved : 31;
 		};
-		__u32 data;
+		__le32 data;
 	};
 };


--- a/drivers/misc/habanalabs/include/gaudi/gaudi_reg_map.h
+++ b/drivers/misc/habanalabs/include/gaudi/gaudi_reg_map.h
@@ -33,6 +33,7 @@
 #define mmRDWR_TEST			mmPSOC_GLOBAL_CONF_SCRATCHPAD_30
 #define mmBTL_ID			mmPSOC_GLOBAL_CONF_SCRATCHPAD_31
 #define mmPREBOOT_PCIE_EN		mmPSOC_GLOBAL_CONF_COLD_RST_FLOPS_1
+#define mmCOLD_RST_DATA			mmPSOC_GLOBAL_CONF_COLD_RST_FLOPS_2
 #define mmUPD_PENDING_STS		mmPSOC_GLOBAL_CONF_COLD_RST_FLOPS_3

 #endif /* GAUDI_REG_MAP_H_ */
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -272,6 +272,16 @@ enum hl_gaudi_pll_index {
 	HL_GAUDI_PLL_MAX
 };

+/**
+ * enum hl_device_status - Device status information.
+ * @HL_DEVICE_STATUS_OPERATIONAL: Device is operational.
+ * @HL_DEVICE_STATUS_IN_RESET: Device is currently during reset.
+ * @HL_DEVICE_STATUS_MALFUNCTION: Device is unusable.
+ * @HL_DEVICE_STATUS_NEEDS_RESET: Device needs reset because auto reset was disabled.
+ * @HL_DEVICE_STATUS_IN_DEVICE_CREATION: Device is operational but its creation is still in
+ *                                       progress.
+ * @HL_DEVICE_STATUS_LAST: Last status.
+ */
 enum hl_device_status {
 	HL_DEVICE_STATUS_OPERATIONAL,
 	HL_DEVICE_STATUS_IN_RESET,
@@ -556,33 +566,30 @@ enum gaudi_dcores {
 	HL_GAUDI_ES_DCORE
 };

+/**
+ * struct hl_info_args - Main structure to retrieve device related information.
+ * @return_pointer: User space address of the relevant structure related to HL_INFO_* operation
+ *                  mentioned in @op.
+ * @return_size: Size of the structure used in @return_pointer, just like "size" in "snprintf", it
+ *               limits how many bytes the kernel can write. For hw_events array, the size should be
+ *               hl_info_hw_ip_info.num_of_events * sizeof(__u32).
+ * @op: Defines which type of information to be retrieved. Refer HL_INFO_* for details.
+ * @dcore_id: DCORE id for which the information is relevant (for Gaudi refer to enum gaudi_dcores).
+ * @ctx_id: Context ID of the user. Currently not in use.
+ * @period_ms: Period value, in milliseconds, for utilization rate in range 100ms - 1000ms in 100 ms
+ *             resolution. Currently not in use.
+ * @pll_index: Index as defined in hl_<asic type>_pll_index enumeration.
+ * @pad: Padding to 64 bit.
+ */
 struct hl_info_args {
-	/* Location of relevant struct in userspace */
 	__u64 return_pointer;
-	/*
-	 * The size of the return value. Just like "size" in "snprintf",
-	 * it limits how many bytes the kernel can write
-	 *
-	 * For hw_events array, the size should be
-	 * hl_info_hw_ip_info.num_of_events * sizeof(__u32)
-	 */
 	__u32 return_size;
-
-	/* HL_INFO_* */
 	__u32 op;

 	union {
-		/* Dcore id for which the information is relevant.
-		 * For Gaudi refer to 'enum gaudi_dcores'
-		 */
 		__u32 dcore_id;
-		/* Context ID - Currently not in use */
 		__u32 ctx_id;
-		/* Period value for utilization rate (100ms - 1000ms, in 100ms
-		 * resolution.
-		 */
 		__u32 period_ms;
-		/* PLL frequency retrieval */
 		__u32 pll_index;
 	};

@@ -890,11 +897,7 @@ struct hl_wait_cs_in {
 			 */
 			__u64 addr;
 			/* Target value for completion comparison */
-			__u32 target;
-			/* Absolute timeout to wait for interrupt
-			 * in microseconds
-			 */
-			__u32 interrupt_timeout_us;
+			__u64 target;
 		};
 	};

@@ -910,7 +913,12 @@ struct hl_wait_cs_in {

 	/* Multi CS API info- valid entries in multi-CS array */
 	__u8 seq_arr_len;
-	__u8 pad[7];
+	__u8 pad[3];
+
+	/* Absolute timeout to wait for an interrupt in microseconds.
+	 * Relevant only when HL_WAIT_CS_FLAGS_INTERRUPT is set
+	 */
+	__u32 interrupt_timeout_us;
 };

 #define HL_WAIT_CS_STATUS_COMPLETED	0
@@ -952,6 +960,10 @@ union hl_wait_cs_args {
 #define HL_MEM_OP_UNMAP			3
 /* Opcode to map a hw block */
 #define HL_MEM_OP_MAP_BLOCK		4
+/* Opcode to create DMA-BUF object for an existing device memory allocation
+ * and to export an FD of that DMA-BUF back to the caller
+ */
+#define HL_MEM_OP_EXPORT_DMABUF_FD	5

 /* Memory flags */
 #define HL_MEM_CONTIGUOUS	0x1
@@ -1023,11 +1035,26 @@ struct hl_mem_in {
 			/* Virtual address returned from HL_MEM_OP_MAP */
 			__u64 device_virt_addr;
 		} unmap;
+
+		/* HL_MEM_OP_EXPORT_DMABUF_FD */
+		struct {
+			/* Handle returned from HL_MEM_OP_ALLOC. In Gaudi,
+			 * where we don't have MMU for the device memory, the
+			 * driver expects a physical address (instead of
+			 * a handle) in the device memory space.
+			 */
+			__u64 handle;
+			/* Size of memory allocation. Relevant only for GAUDI */
+			__u64 mem_size;
+		} export_dmabuf_fd;
 	};

 	/* HL_MEM_OP_* */
 	__u32 op;
-	/* HL_MEM_* flags */
+	/* HL_MEM_* flags.
+	 * For the HL_MEM_OP_EXPORT_DMABUF_FD opcode, this field holds the
+	 * DMA-BUF file/FD flags.
+	 */
 	__u32 flags;
 	/* Context ID - Currently not in use */
 	__u32 ctx_id;
@@ -1064,6 +1091,13 @@ struct hl_mem_out {

 			__u32 pad;
 		};
+
+		/* Returned in HL_MEM_OP_EXPORT_DMABUF_FD. Represents the
+		 * DMA-BUF object that was created to describe a memory
+		 * allocation on the device's memory space. The FD should be
+		 * passed to the importer driver
+		 */
+		__s32 fd;
 	};
 };