diff --git a/drivers/misc/habanalabs/Makefile b/drivers/misc/habanalabs/Makefile index 482f6227dbba184ee8e810211f75f8145b1eb5fe..421ebd903069bf5ada429ec1886abf642069fe21 100644 --- a/drivers/misc/habanalabs/Makefile +++ b/drivers/misc/habanalabs/Makefile @@ -13,3 +13,6 @@ habanalabs-$(CONFIG_DEBUG_FS) += debugfs.o include $(src)/goya/Makefile habanalabs-y += $(HL_GOYA_FILES) + +include $(src)/gaudi/Makefile +habanalabs-y += $(HL_GAUDI_FILES) diff --git a/drivers/misc/habanalabs/gaudi/Makefile b/drivers/misc/habanalabs/gaudi/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..b30b523881a01203e9f1fd4ad3bc1503273c0677 --- /dev/null +++ b/drivers/misc/habanalabs/gaudi/Makefile @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only +subdir-ccflags-y += -I$(src) + +HL_GAUDI_FILES := gaudi/gaudi.o \ No newline at end of file diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c new file mode 100644 index 0000000000000000000000000000000000000000..8f3591e23a3cd664e92c1961ac93f952ea85a97d --- /dev/null +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -0,0 +1,7360 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright 2016-2020 HabanaLabs, Ltd. + * All Rights Reserved. + */ + +#include "gaudiP.h" +#include "include/hw_ip/mmu/mmu_general.h" +#include "include/hw_ip/mmu/mmu_v1_1.h" +#include "include/gaudi/gaudi_masks.h" +#include "include/gaudi/gaudi_fw_if.h" +#include "include/gaudi/gaudi_reg_map.h" +#include "include/gaudi/gaudi_async_ids_map.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Gaudi security scheme: + * + * 1. Host is protected by: + * - Range registers + * - MMU + * + * 2. DDR is protected by: + * - Range registers (protect the first 512MB) + * + * 3. Configuration is protected by: + * - Range registers + * - Protection bits + * + * MMU is always enabled. + * + * QMAN DMA channels 0,1,5 (PCI DMAN): + * - DMA is not secured. + * - PQ and CQ are secured. + * - CP is secured: The driver needs to parse CB but WREG should be allowed + * because of TDMA (tensor DMA). Hence, WREG is always not + * secured. + * + * When the driver needs to use DMA it will check that Gaudi is idle, set DMA + * channel 0 to be secured, execute the DMA and change it back to not secured. + * Currently, the driver doesn't use the DMA while there are compute jobs + * running. + * + * The current use cases for the driver to use the DMA are: + * - Clear SRAM on context switch (happens on context switch when device is + * idle) + * - MMU page tables area clear (happens on init) + * + * QMAN DMA 2-4,6,7, TPC, MME, NIC: + * PQ is secured and is located on the Host (HBM CON TPC3 bug) + * CQ, CP and the engine are not secured + * + */ + +#define GAUDI_BOOT_FIT_FILE "habanalabs/gaudi/gaudi-boot-fit.itb" +#define GAUDI_LINUX_FW_FILE "habanalabs/gaudi/gaudi-fit.itb" +#define GAUDI_TPC_FW_FILE "habanalabs/gaudi/gaudi_tpc.bin" + +#define GAUDI_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */ + +#define GAUDI_RESET_TIMEOUT_MSEC 1000 /* 1000ms */ +#define GAUDI_RESET_WAIT_MSEC 1 /* 1ms */ +#define GAUDI_CPU_RESET_WAIT_MSEC 200 /* 200ms */ +#define GAUDI_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */ + +#define GAUDI_PLDM_RESET_WAIT_MSEC 1000 /* 1s */ +#define GAUDI_PLDM_HRESET_TIMEOUT_MSEC 20000 /* 20s */ +#define GAUDI_PLDM_SRESET_TIMEOUT_MSEC 14000 /* 14s */ +#define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000 /* 1s */ +#define GAUDI_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100) +#define GAUDI_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30) +#define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30) +#define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 1000000 /* 1s */ + +#define GAUDI_QMAN0_FENCE_VAL 0x72E91AB9 + +#define GAUDI_MAX_STRING_LEN 20 + +#define GAUDI_CB_POOL_CB_CNT 512 +#define GAUDI_CB_POOL_CB_SIZE 0x20000 /* 128KB */ + +#define GAUDI_ALLOC_CPU_MEM_RETRY_CNT 3 + +#define GAUDI_NUM_OF_TPC_INTR_CAUSE 20 + +#define GAUDI_NUM_OF_QM_ERR_CAUSE 16 + +#define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE 3 + +#define GAUDI_ARB_WDT_TIMEOUT 0x400000 + +static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = { + "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3", + "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3", + "gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3", + "gaudi cpu eq" +}; + +static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = { + [GAUDI_PCI_DMA_1] = 0, + [GAUDI_PCI_DMA_2] = 1, + [GAUDI_PCI_DMA_3] = 5, + [GAUDI_HBM_DMA_1] = 2, + [GAUDI_HBM_DMA_2] = 3, + [GAUDI_HBM_DMA_3] = 4, + [GAUDI_HBM_DMA_4] = 6, + [GAUDI_HBM_DMA_5] = 7 +}; + +static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = { + [0] = GAUDI_QUEUE_ID_DMA_0_0, + [1] = GAUDI_QUEUE_ID_DMA_0_1, + [2] = GAUDI_QUEUE_ID_DMA_0_2, + [3] = GAUDI_QUEUE_ID_DMA_0_3, + [4] = GAUDI_QUEUE_ID_DMA_1_0, + [5] = GAUDI_QUEUE_ID_DMA_1_1, + [6] = GAUDI_QUEUE_ID_DMA_1_2, + [7] = GAUDI_QUEUE_ID_DMA_1_3, + [8] = GAUDI_QUEUE_ID_DMA_5_0, + [9] = GAUDI_QUEUE_ID_DMA_5_1, + [10] = GAUDI_QUEUE_ID_DMA_5_2, + [11] = GAUDI_QUEUE_ID_DMA_5_3 +}; + +static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = { + [PACKET_WREG_32] = sizeof(struct packet_wreg32), + [PACKET_WREG_BULK] = sizeof(struct packet_wreg_bulk), + [PACKET_MSG_LONG] = sizeof(struct packet_msg_long), + [PACKET_MSG_SHORT] = sizeof(struct packet_msg_short), + [PACKET_CP_DMA] = sizeof(struct packet_cp_dma), + [PACKET_REPEAT] = sizeof(struct packet_repeat), + [PACKET_MSG_PROT] = sizeof(struct packet_msg_prot), + [PACKET_FENCE] = sizeof(struct packet_fence), + [PACKET_LIN_DMA] = sizeof(struct packet_lin_dma), + [PACKET_NOP] = sizeof(struct packet_nop), + [PACKET_STOP] = sizeof(struct packet_stop), + [PACKET_ARB_POINT] = sizeof(struct packet_arb_point), + [PACKET_WAIT] = sizeof(struct packet_wait), + [PACKET_LOAD_AND_EXE] = sizeof(struct packet_load_and_exe) +}; + +static const u32 gaudi_all_events[] = { + GAUDI_EVENT_PCIE_CORE_SERR, + GAUDI_EVENT_PCIE_CORE_DERR, + GAUDI_EVENT_PCIE_IF_SERR, + GAUDI_EVENT_PCIE_IF_DERR, + GAUDI_EVENT_PCIE_PHY_SERR, + GAUDI_EVENT_PCIE_PHY_DERR, + GAUDI_EVENT_TPC0_SERR, + GAUDI_EVENT_TPC1_SERR, + GAUDI_EVENT_TPC2_SERR, + GAUDI_EVENT_TPC3_SERR, + GAUDI_EVENT_TPC4_SERR, + GAUDI_EVENT_TPC5_SERR, + GAUDI_EVENT_TPC6_SERR, + GAUDI_EVENT_TPC7_SERR, + GAUDI_EVENT_TPC0_DERR, + GAUDI_EVENT_TPC1_DERR, + GAUDI_EVENT_TPC2_DERR, + GAUDI_EVENT_TPC3_DERR, + GAUDI_EVENT_TPC4_DERR, + GAUDI_EVENT_TPC5_DERR, + GAUDI_EVENT_TPC6_DERR, + GAUDI_EVENT_TPC7_DERR, + GAUDI_EVENT_MME0_ACC_SERR, + GAUDI_EVENT_MME0_ACC_DERR, + GAUDI_EVENT_MME0_SBAB_SERR, + GAUDI_EVENT_MME0_SBAB_DERR, + GAUDI_EVENT_MME1_ACC_SERR, + GAUDI_EVENT_MME1_ACC_DERR, + GAUDI_EVENT_MME1_SBAB_SERR, + GAUDI_EVENT_MME1_SBAB_DERR, + GAUDI_EVENT_MME2_ACC_SERR, + GAUDI_EVENT_MME2_ACC_DERR, + GAUDI_EVENT_MME2_SBAB_SERR, + GAUDI_EVENT_MME2_SBAB_DERR, + GAUDI_EVENT_MME3_ACC_SERR, + GAUDI_EVENT_MME3_ACC_DERR, + GAUDI_EVENT_MME3_SBAB_SERR, + GAUDI_EVENT_MME3_SBAB_DERR, + GAUDI_EVENT_DMA0_SERR_ECC, + GAUDI_EVENT_DMA1_SERR_ECC, + GAUDI_EVENT_DMA2_SERR_ECC, + GAUDI_EVENT_DMA3_SERR_ECC, + GAUDI_EVENT_DMA4_SERR_ECC, + GAUDI_EVENT_DMA5_SERR_ECC, + GAUDI_EVENT_DMA6_SERR_ECC, + GAUDI_EVENT_DMA7_SERR_ECC, + GAUDI_EVENT_DMA0_DERR_ECC, + GAUDI_EVENT_DMA1_DERR_ECC, + GAUDI_EVENT_DMA2_DERR_ECC, + GAUDI_EVENT_DMA3_DERR_ECC, + GAUDI_EVENT_DMA4_DERR_ECC, + GAUDI_EVENT_DMA5_DERR_ECC, + GAUDI_EVENT_DMA6_DERR_ECC, + GAUDI_EVENT_DMA7_DERR_ECC, + GAUDI_EVENT_CPU_IF_ECC_SERR, + GAUDI_EVENT_CPU_IF_ECC_DERR, + GAUDI_EVENT_PSOC_MEM_SERR, + GAUDI_EVENT_PSOC_CORESIGHT_SERR, + GAUDI_EVENT_PSOC_MEM_DERR, + GAUDI_EVENT_PSOC_CORESIGHT_DERR, + GAUDI_EVENT_SRAM0_SERR, + GAUDI_EVENT_SRAM1_SERR, + GAUDI_EVENT_SRAM2_SERR, + GAUDI_EVENT_SRAM3_SERR, + GAUDI_EVENT_SRAM7_SERR, + GAUDI_EVENT_SRAM6_SERR, + GAUDI_EVENT_SRAM5_SERR, + GAUDI_EVENT_SRAM4_SERR, + GAUDI_EVENT_SRAM8_SERR, + GAUDI_EVENT_SRAM9_SERR, + GAUDI_EVENT_SRAM10_SERR, + GAUDI_EVENT_SRAM11_SERR, + GAUDI_EVENT_SRAM15_SERR, + GAUDI_EVENT_SRAM14_SERR, + GAUDI_EVENT_SRAM13_SERR, + GAUDI_EVENT_SRAM12_SERR, + GAUDI_EVENT_SRAM16_SERR, + GAUDI_EVENT_SRAM17_SERR, + GAUDI_EVENT_SRAM18_SERR, + GAUDI_EVENT_SRAM19_SERR, + GAUDI_EVENT_SRAM23_SERR, + GAUDI_EVENT_SRAM22_SERR, + GAUDI_EVENT_SRAM21_SERR, + GAUDI_EVENT_SRAM20_SERR, + GAUDI_EVENT_SRAM24_SERR, + GAUDI_EVENT_SRAM25_SERR, + GAUDI_EVENT_SRAM26_SERR, + GAUDI_EVENT_SRAM27_SERR, + GAUDI_EVENT_SRAM31_SERR, + GAUDI_EVENT_SRAM30_SERR, + GAUDI_EVENT_SRAM29_SERR, + GAUDI_EVENT_SRAM28_SERR, + GAUDI_EVENT_SRAM0_DERR, + GAUDI_EVENT_SRAM1_DERR, + GAUDI_EVENT_SRAM2_DERR, + GAUDI_EVENT_SRAM3_DERR, + GAUDI_EVENT_SRAM7_DERR, + GAUDI_EVENT_SRAM6_DERR, + GAUDI_EVENT_SRAM5_DERR, + GAUDI_EVENT_SRAM4_DERR, + GAUDI_EVENT_SRAM8_DERR, + GAUDI_EVENT_SRAM9_DERR, + GAUDI_EVENT_SRAM10_DERR, + GAUDI_EVENT_SRAM11_DERR, + GAUDI_EVENT_SRAM15_DERR, + GAUDI_EVENT_SRAM14_DERR, + GAUDI_EVENT_SRAM13_DERR, + GAUDI_EVENT_SRAM12_DERR, + GAUDI_EVENT_SRAM16_DERR, + GAUDI_EVENT_SRAM17_DERR, + GAUDI_EVENT_SRAM18_DERR, + GAUDI_EVENT_SRAM19_DERR, + GAUDI_EVENT_SRAM23_DERR, + GAUDI_EVENT_SRAM22_DERR, + GAUDI_EVENT_SRAM21_DERR, + GAUDI_EVENT_SRAM20_DERR, + GAUDI_EVENT_SRAM24_DERR, + GAUDI_EVENT_SRAM25_DERR, + GAUDI_EVENT_SRAM26_DERR, + GAUDI_EVENT_SRAM27_DERR, + GAUDI_EVENT_SRAM31_DERR, + GAUDI_EVENT_SRAM30_DERR, + GAUDI_EVENT_SRAM29_DERR, + GAUDI_EVENT_SRAM28_DERR, + GAUDI_EVENT_NIC0_SERR, + GAUDI_EVENT_NIC1_SERR, + GAUDI_EVENT_NIC2_SERR, + GAUDI_EVENT_NIC3_SERR, + GAUDI_EVENT_NIC4_SERR, + GAUDI_EVENT_NIC0_DERR, + GAUDI_EVENT_NIC1_DERR, + GAUDI_EVENT_NIC2_DERR, + GAUDI_EVENT_NIC3_DERR, + GAUDI_EVENT_NIC4_DERR, + GAUDI_EVENT_DMA_IF0_SERR, + GAUDI_EVENT_DMA_IF1_SERR, + GAUDI_EVENT_DMA_IF2_SERR, + GAUDI_EVENT_DMA_IF3_SERR, + GAUDI_EVENT_DMA_IF0_DERR, + GAUDI_EVENT_DMA_IF1_DERR, + GAUDI_EVENT_DMA_IF2_DERR, + GAUDI_EVENT_DMA_IF3_DERR, + GAUDI_EVENT_GIC500, + GAUDI_EVENT_HBM_0_SERR, + GAUDI_EVENT_HBM_1_SERR, + GAUDI_EVENT_HBM_2_SERR, + GAUDI_EVENT_HBM_3_SERR, + GAUDI_EVENT_HBM_0_DERR, + GAUDI_EVENT_HBM_1_DERR, + GAUDI_EVENT_HBM_2_DERR, + GAUDI_EVENT_HBM_3_DERR, + GAUDI_EVENT_MMU_SERR, + GAUDI_EVENT_MMU_DERR, + GAUDI_EVENT_PCIE_DEC, + GAUDI_EVENT_TPC0_DEC, + GAUDI_EVENT_TPC1_DEC, + GAUDI_EVENT_TPC2_DEC, + GAUDI_EVENT_TPC3_DEC, + GAUDI_EVENT_TPC4_DEC, + GAUDI_EVENT_TPC5_DEC, + GAUDI_EVENT_TPC6_DEC, + GAUDI_EVENT_TPC7_DEC, + GAUDI_EVENT_AXI_ECC, + GAUDI_EVENT_L2_RAM_ECC, + GAUDI_EVENT_MME0_WBC_RSP, + GAUDI_EVENT_MME0_SBAB0_RSP, + GAUDI_EVENT_MME1_WBC_RSP, + GAUDI_EVENT_MME1_SBAB0_RSP, + GAUDI_EVENT_MME2_WBC_RSP, + GAUDI_EVENT_MME2_SBAB0_RSP, + GAUDI_EVENT_MME3_WBC_RSP, + GAUDI_EVENT_MME3_SBAB0_RSP, + GAUDI_EVENT_PLL0, + GAUDI_EVENT_PLL1, + GAUDI_EVENT_PLL2, + GAUDI_EVENT_PLL3, + GAUDI_EVENT_PLL4, + GAUDI_EVENT_PLL5, + GAUDI_EVENT_PLL6, + GAUDI_EVENT_PLL7, + GAUDI_EVENT_PLL8, + GAUDI_EVENT_PLL9, + GAUDI_EVENT_PLL10, + GAUDI_EVENT_PLL11, + GAUDI_EVENT_PLL12, + GAUDI_EVENT_PLL13, + GAUDI_EVENT_PLL14, + GAUDI_EVENT_PLL15, + GAUDI_EVENT_PLL16, + GAUDI_EVENT_PLL17, + GAUDI_EVENT_CPU_AXI_SPLITTER, + GAUDI_EVENT_PSOC_AXI_DEC, + GAUDI_EVENT_PSOC_PRSTN_FALL, + GAUDI_EVENT_TPC0_BMON_SPMU, + GAUDI_EVENT_TPC0_KRN_ERR, + GAUDI_EVENT_TPC1_BMON_SPMU, + GAUDI_EVENT_TPC1_KRN_ERR, + GAUDI_EVENT_TPC2_BMON_SPMU, + GAUDI_EVENT_TPC2_KRN_ERR, + GAUDI_EVENT_TPC3_BMON_SPMU, + GAUDI_EVENT_TPC3_KRN_ERR, + GAUDI_EVENT_TPC4_BMON_SPMU, + GAUDI_EVENT_TPC4_KRN_ERR, + GAUDI_EVENT_TPC5_BMON_SPMU, + GAUDI_EVENT_TPC5_KRN_ERR, + GAUDI_EVENT_TPC6_BMON_SPMU, + GAUDI_EVENT_TPC6_KRN_ERR, + GAUDI_EVENT_TPC7_BMON_SPMU, + GAUDI_EVENT_TPC7_KRN_ERR, + GAUDI_EVENT_MMU_PAGE_FAULT, + GAUDI_EVENT_MMU_WR_PERM, + GAUDI_EVENT_DMA_BM_CH0, + GAUDI_EVENT_DMA_BM_CH1, + GAUDI_EVENT_DMA_BM_CH2, + GAUDI_EVENT_DMA_BM_CH3, + GAUDI_EVENT_DMA_BM_CH4, + GAUDI_EVENT_DMA_BM_CH5, + GAUDI_EVENT_DMA_BM_CH6, + GAUDI_EVENT_DMA_BM_CH7, + GAUDI_EVENT_HBM0_SPI_0, + GAUDI_EVENT_HBM0_SPI_1, + GAUDI_EVENT_HBM1_SPI_0, + GAUDI_EVENT_HBM1_SPI_1, + GAUDI_EVENT_HBM2_SPI_0, + GAUDI_EVENT_HBM2_SPI_1, + GAUDI_EVENT_HBM3_SPI_0, + GAUDI_EVENT_HBM3_SPI_1, + GAUDI_EVENT_RAZWI_OR_ADC, + GAUDI_EVENT_TPC0_QM, + GAUDI_EVENT_TPC1_QM, + GAUDI_EVENT_TPC2_QM, + GAUDI_EVENT_TPC3_QM, + GAUDI_EVENT_TPC4_QM, + GAUDI_EVENT_TPC5_QM, + GAUDI_EVENT_TPC6_QM, + GAUDI_EVENT_TPC7_QM, + GAUDI_EVENT_MME0_QM, + GAUDI_EVENT_MME2_QM, + GAUDI_EVENT_DMA0_QM, + GAUDI_EVENT_DMA1_QM, + GAUDI_EVENT_DMA2_QM, + GAUDI_EVENT_DMA3_QM, + GAUDI_EVENT_DMA4_QM, + GAUDI_EVENT_DMA5_QM, + GAUDI_EVENT_DMA6_QM, + GAUDI_EVENT_DMA7_QM, + GAUDI_EVENT_NIC0_QM0, + GAUDI_EVENT_NIC0_QM1, + GAUDI_EVENT_NIC1_QM0, + GAUDI_EVENT_NIC1_QM1, + GAUDI_EVENT_NIC2_QM0, + GAUDI_EVENT_NIC2_QM1, + GAUDI_EVENT_NIC3_QM0, + GAUDI_EVENT_NIC3_QM1, + GAUDI_EVENT_NIC4_QM0, + GAUDI_EVENT_NIC4_QM1, + GAUDI_EVENT_DMA0_CORE, + GAUDI_EVENT_DMA1_CORE, + GAUDI_EVENT_DMA2_CORE, + GAUDI_EVENT_DMA3_CORE, + GAUDI_EVENT_DMA4_CORE, + GAUDI_EVENT_DMA5_CORE, + GAUDI_EVENT_DMA6_CORE, + GAUDI_EVENT_DMA7_CORE, + GAUDI_EVENT_FIX_POWER_ENV_S, + GAUDI_EVENT_FIX_POWER_ENV_E, + GAUDI_EVENT_FIX_THERMAL_ENV_S, + GAUDI_EVENT_FIX_THERMAL_ENV_E, + GAUDI_EVENT_RAZWI_OR_ADC_SW +}; + +static const char * const +gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = { + "tpc_address_exceed_slm", + "tpc_div_by_0", + "tpc_spu_mac_overflow", + "tpc_spu_addsub_overflow", + "tpc_spu_abs_overflow", + "tpc_spu_fp_dst_nan_inf", + "tpc_spu_fp_dst_denorm", + "tpc_vpu_mac_overflow", + "tpc_vpu_addsub_overflow", + "tpc_vpu_abs_overflow", + "tpc_vpu_fp_dst_nan_inf", + "tpc_vpu_fp_dst_denorm", + "tpc_assertions", + "tpc_illegal_instruction", + "tpc_pc_wrap_around", + "tpc_qm_sw_err", + "tpc_hbw_rresp_err", + "tpc_hbw_bresp_err", + "tpc_lbw_rresp_err", + "tpc_lbw_bresp_err" +}; + +static const char * const +gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = { + "PQ AXI HBW error", + "CQ AXI HBW error", + "CP AXI HBW error", + "CP error due to undefined OPCODE", + "CP encountered STOP OPCODE", + "CP AXI LBW error", + "CP WRREG32 or WRBULK returned error", + "N/A", + "FENCE 0 inc over max value and clipped", + "FENCE 1 inc over max value and clipped", + "FENCE 2 inc over max value and clipped", + "FENCE 3 inc over max value and clipped", + "FENCE 0 dec under min value and clipped", + "FENCE 1 dec under min value and clipped", + "FENCE 2 dec under min value and clipped", + "FENCE 3 dec under min value and clipped" +}; + +static const char * const +gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = { + "Choice push while full error", + "Choice Q watchdog error", + "MSG AXI LBW returned with error" +}; + +static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = { + QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */ + QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */ + QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */ + QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */ + QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */ + QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */ + QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */ + QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */ + QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */ + QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_0 */ + QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_1 */ + QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_2 */ + QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_3 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_0_0 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_0_1 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_0_2 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_0_3 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_1_0 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_1_1 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_1_2 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_1_3 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_2_0 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_2_1 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_2_2 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_2_3 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_3_0 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_3_1 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_3_2 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_3_3 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_4_0 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_4_1 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_4_2 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_4_3 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_5_0 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_5_1 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_5_2 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_5_3 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_6_0 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_6_1 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_6_2 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_6_3 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_7_0 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_7_1 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_7_2 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_7_3 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_8_0 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_8_1 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_8_2 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_8_3 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_9_0 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_9_1 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_9_2 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_9_3 */ +}; + +static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid, + u64 phys_addr); +static int gaudi_send_job_on_qman0(struct hl_device *hdev, + struct hl_cs_job *job); +static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr, + u32 size, u64 val); +static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, + u32 tpc_id); +static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev); +static int gaudi_armcp_info_get(struct hl_device *hdev); +static void gaudi_disable_clock_gating(struct hl_device *hdev); +static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid); + +static int gaudi_get_fixed_properties(struct hl_device *hdev) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + int i; + + if (GAUDI_QUEUE_ID_SIZE >= HL_MAX_QUEUES) { + dev_err(hdev->dev, + "Number of H/W queues must be smaller than %d\n", + HL_MAX_QUEUES); + return -EFAULT; + } + + for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) { + if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) { + prop->hw_queues_props[i].type = QUEUE_TYPE_EXT; + prop->hw_queues_props[i].driver_only = 0; + prop->hw_queues_props[i].requires_kernel_cb = 1; + } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) { + prop->hw_queues_props[i].type = QUEUE_TYPE_CPU; + prop->hw_queues_props[i].driver_only = 1; + prop->hw_queues_props[i].requires_kernel_cb = 0; + } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) { + prop->hw_queues_props[i].type = QUEUE_TYPE_INT; + prop->hw_queues_props[i].driver_only = 0; + prop->hw_queues_props[i].requires_kernel_cb = 0; + } else if (gaudi_queue_type[i] == QUEUE_TYPE_NA) { + prop->hw_queues_props[i].type = QUEUE_TYPE_NA; + prop->hw_queues_props[i].driver_only = 0; + prop->hw_queues_props[i].requires_kernel_cb = 0; + } + } + + for (; i < HL_MAX_QUEUES; i++) + prop->hw_queues_props[i].type = QUEUE_TYPE_NA; + + prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES; + + prop->dram_base_address = DRAM_PHYS_BASE; + prop->dram_size = GAUDI_HBM_SIZE_32GB; + prop->dram_end_address = prop->dram_base_address + + prop->dram_size; + prop->dram_user_base_address = DRAM_BASE_ADDR_USER; + + prop->sram_base_address = SRAM_BASE_ADDR; + prop->sram_size = SRAM_SIZE; + prop->sram_end_address = prop->sram_base_address + + prop->sram_size; + prop->sram_user_base_address = prop->sram_base_address + + SRAM_USER_BASE_OFFSET; + + prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR; + if (hdev->pldm) + prop->mmu_pgt_size = 0x800000; /* 8MB */ + else + prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE; + prop->mmu_pte_size = HL_PTE_SIZE; + prop->mmu_hop_table_size = HOP_TABLE_SIZE; + prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE; + prop->dram_page_size = PAGE_SIZE_2MB; + + prop->pmmu.hop0_shift = HOP0_SHIFT; + prop->pmmu.hop1_shift = HOP1_SHIFT; + prop->pmmu.hop2_shift = HOP2_SHIFT; + prop->pmmu.hop3_shift = HOP3_SHIFT; + prop->pmmu.hop4_shift = HOP4_SHIFT; + prop->pmmu.hop0_mask = HOP0_MASK; + prop->pmmu.hop1_mask = HOP1_MASK; + prop->pmmu.hop2_mask = HOP2_MASK; + prop->pmmu.hop3_mask = HOP3_MASK; + prop->pmmu.hop4_mask = HOP4_MASK; + prop->pmmu.start_addr = VA_HOST_SPACE_START; + prop->pmmu.end_addr = + (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1; + prop->pmmu.page_size = PAGE_SIZE_4KB; + + /* PMMU and HPMMU are the same except of page size */ + memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu)); + prop->pmmu_huge.page_size = PAGE_SIZE_2MB; + + /* shifts and masks are the same in PMMU and DMMU */ + memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu)); + prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2); + prop->dmmu.end_addr = VA_HOST_SPACE_END; + prop->dmmu.page_size = PAGE_SIZE_2MB; + + prop->cfg_size = CFG_SIZE; + prop->max_asid = MAX_ASID; + prop->num_of_events = GAUDI_EVENT_SIZE; + prop->tpc_enabled_mask = TPC_ENABLED_MASK; + + prop->max_power_default = MAX_POWER_DEFAULT; + + prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT; + prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE; + + prop->pcie_dbi_base_address = mmPCIE_DBI_BASE; + prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI; + + strncpy(prop->armcp_info.card_name, GAUDI_DEFAULT_CARD_NAME, + CARD_NAME_MAX_LEN); + + return 0; +} + +static int gaudi_pci_bars_map(struct hl_device *hdev) +{ + static const char * const name[] = {"SRAM", "CFG", "HBM"}; + bool is_wc[3] = {false, false, true}; + int rc; + + rc = hl_pci_bars_map(hdev, name, is_wc); + if (rc) + return rc; + + hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] + + (CFG_BASE - SPI_FLASH_BASE_ADDR); + + return 0; +} + +static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr) +{ + struct gaudi_device *gaudi = hdev->asic_specific; + u64 old_addr = addr; + int rc; + + if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr)) + return old_addr; + + /* Inbound Region 2 - Bar 4 - Point to HBM */ + rc = hl_pci_set_dram_bar_base(hdev, 2, 4, addr); + if (rc) + return U64_MAX; + + if (gaudi) { + old_addr = gaudi->hbm_bar_cur_addr; + gaudi->hbm_bar_cur_addr = addr; + } + + return old_addr; +} + +static int gaudi_init_iatu(struct hl_device *hdev) +{ + int rc = 0; + + /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */ + rc = hl_pci_iatu_write(hdev, 0x314, + lower_32_bits(SPI_FLASH_BASE_ADDR)); + rc |= hl_pci_iatu_write(hdev, 0x318, + upper_32_bits(SPI_FLASH_BASE_ADDR)); + rc |= hl_pci_iatu_write(hdev, 0x300, 0); + /* Enable + Bar match + match enable */ + rc |= hl_pci_iatu_write(hdev, 0x304, 0xC0080200); + + if (rc) + return -EIO; + + return hl_pci_init_iatu(hdev, SRAM_BASE_ADDR, DRAM_PHYS_BASE, + HOST_PHYS_BASE, HOST_PHYS_SIZE); +} + +static int gaudi_early_init(struct hl_device *hdev) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct pci_dev *pdev = hdev->pdev; + int rc; + + rc = gaudi_get_fixed_properties(hdev); + if (rc) { + dev_err(hdev->dev, "Failed to get fixed properties\n"); + return rc; + } + + /* Check BAR sizes */ + if (pci_resource_len(pdev, SRAM_BAR_ID) != SRAM_BAR_SIZE) { + dev_err(hdev->dev, + "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n", + SRAM_BAR_ID, + (unsigned long long) pci_resource_len(pdev, + SRAM_BAR_ID), + SRAM_BAR_SIZE); + return -ENODEV; + } + + if (pci_resource_len(pdev, CFG_BAR_ID) != CFG_BAR_SIZE) { + dev_err(hdev->dev, + "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n", + CFG_BAR_ID, + (unsigned long long) pci_resource_len(pdev, + CFG_BAR_ID), + CFG_BAR_SIZE); + return -ENODEV; + } + + prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID); + + rc = hl_pci_init(hdev); + if (rc) + return rc; + + return 0; +} + +static int gaudi_early_fini(struct hl_device *hdev) +{ + hl_pci_fini(hdev); + + return 0; +} + +/** + * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values + * + * @hdev: pointer to hl_device structure + * + */ +static void gaudi_fetch_psoc_frequency(struct hl_device *hdev) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + + prop->psoc_pci_pll_nr = RREG32(mmPSOC_PCI_PLL_NR); + prop->psoc_pci_pll_nf = RREG32(mmPSOC_PCI_PLL_NF); + prop->psoc_pci_pll_od = RREG32(mmPSOC_PCI_PLL_OD); + prop->psoc_pci_pll_div_factor = RREG32(mmPSOC_PCI_PLL_DIV_FACTOR_1); +} + +static int _gaudi_init_tpc_mem(struct hl_device *hdev, + dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct packet_lin_dma *init_tpc_mem_pkt; + struct hl_cs_job *job; + struct hl_cb *cb; + u64 dst_addr; + u32 cb_size, ctl; + u8 tpc_id; + int rc; + + cb = hl_cb_kernel_create(hdev, PAGE_SIZE); + if (!cb) + return -EFAULT; + + init_tpc_mem_pkt = (struct packet_lin_dma *) (uintptr_t) + cb->kernel_address; + cb_size = sizeof(*init_tpc_mem_pkt); + memset(init_tpc_mem_pkt, 0, cb_size); + + init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size); + + ctl = ((PACKET_LIN_DMA << GAUDI_PKT_CTL_OPCODE_SHIFT) | + (1 << GAUDI_PKT_LIN_DMA_CTL_LIN_SHIFT) | + (1 << GAUDI_PKT_CTL_RB_SHIFT) | + (1 << GAUDI_PKT_CTL_MB_SHIFT)); + + init_tpc_mem_pkt->ctl = cpu_to_le32(ctl); + + init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr); + dst_addr = (prop->sram_user_base_address & + GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >> + GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT; + init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr); + + job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true); + if (!job) { + dev_err(hdev->dev, "Failed to allocate a new job\n"); + rc = -ENOMEM; + goto release_cb; + } + + job->id = 0; + job->user_cb = cb; + job->user_cb->cs_cnt++; + job->user_cb_size = cb_size; + job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0; + job->patched_cb = job->user_cb; + job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot); + + hl_debugfs_add_job(hdev, job); + + rc = gaudi_send_job_on_qman0(hdev, job); + + if (rc) + goto free_job; + + for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) { + rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id); + if (rc) + break; + } + +free_job: + hl_userptr_delete_list(hdev, &job->userptr_list); + hl_debugfs_remove_job(hdev, job); + kfree(job); + cb->cs_cnt--; + +release_cb: + hl_cb_put(cb); + hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT); + + return rc; +} + +/* + * gaudi_init_tpc_mem() - Initialize TPC memories. + * @hdev: Pointer to hl_device structure. + * + * Copy TPC kernel fw from firmware file and run it to initialize TPC memories. + * + * Return: 0 for success, negative value for error. + */ +static int gaudi_init_tpc_mem(struct hl_device *hdev) +{ + const struct firmware *fw; + size_t fw_size; + void *cpu_addr; + dma_addr_t dma_handle; + int rc; + + rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev); + if (rc) { + dev_err(hdev->dev, "Firmware file %s is not found!\n", + GAUDI_TPC_FW_FILE); + goto out; + } + + fw_size = fw->size; + cpu_addr = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, fw_size, + &dma_handle, GFP_KERNEL | __GFP_ZERO); + if (!cpu_addr) { + dev_err(hdev->dev, + "Failed to allocate %zu of dma memory for TPC kernel\n", + fw_size); + rc = -ENOMEM; + goto out; + } + + memcpy(cpu_addr, fw->data, fw_size); + + rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size); + + hdev->asic_funcs->asic_dma_free_coherent(hdev, fw->size, cpu_addr, + dma_handle); + +out: + release_firmware(fw); + return rc; +} + +static int gaudi_late_init(struct hl_device *hdev) +{ + struct gaudi_device *gaudi = hdev->asic_specific; + int rc; + + rc = gaudi->armcp_info_get(hdev); + if (rc) { + dev_err(hdev->dev, "Failed to get armcp info\n"); + return rc; + } + + rc = hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_ENABLE_PCI_ACCESS); + if (rc) { + dev_err(hdev->dev, "Failed to enable PCI access from CPU\n"); + return rc; + } + + WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_INTS_REGISTER); + + gaudi_fetch_psoc_frequency(hdev); + + rc = gaudi_mmu_clear_pgt_range(hdev); + if (rc) { + dev_err(hdev->dev, "Failed to clear MMU page tables range\n"); + goto disable_pci_access; + } + + rc = gaudi_init_tpc_mem(hdev); + if (rc) { + dev_err(hdev->dev, "Failed to initialize TPC memories\n"); + goto disable_pci_access; + } + + return 0; + +disable_pci_access: + hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS); + + return rc; +} + +static void gaudi_late_fini(struct hl_device *hdev) +{ + const struct hwmon_channel_info **channel_info_arr; + int i = 0; + + if (!hdev->hl_chip_info->info) + return; + + channel_info_arr = hdev->hl_chip_info->info; + + while (channel_info_arr[i]) { + kfree(channel_info_arr[i]->config); + kfree(channel_info_arr[i]); + i++; + } + + kfree(channel_info_arr); + + hdev->hl_chip_info->info = NULL; +} + +static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev) +{ + dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr; + void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}; + int i, j, rc = 0; + + /* + * The device CPU works with 40-bits addresses, while bit 39 must be set + * to '1' when accessing the host. + * Bits 49:39 of the full host address are saved for a later + * configuration of the HW to perform extension to 50 bits. + * Because there is a single HW register that holds the extension bits, + * these bits must be identical in all allocated range. + */ + + for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) { + virt_addr_arr[i] = + hdev->asic_funcs->asic_dma_alloc_coherent(hdev, + HL_CPU_ACCESSIBLE_MEM_SIZE, + &dma_addr_arr[i], + GFP_KERNEL | __GFP_ZERO); + if (!virt_addr_arr[i]) { + rc = -ENOMEM; + goto free_dma_mem_arr; + } + + end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1; + if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) == + GAUDI_CPU_PCI_MSB_ADDR(end_addr)) + break; + } + + if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) { + dev_err(hdev->dev, + "MSB of CPU accessible DMA memory are not identical in all range\n"); + rc = -EFAULT; + goto free_dma_mem_arr; + } + + hdev->cpu_accessible_dma_mem = virt_addr_arr[i]; + hdev->cpu_accessible_dma_address = dma_addr_arr[i]; + hdev->cpu_pci_msb_addr = + GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address); + + GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address); + +free_dma_mem_arr: + for (j = 0 ; j < i ; j++) + hdev->asic_funcs->asic_dma_free_coherent(hdev, + HL_CPU_ACCESSIBLE_MEM_SIZE, + virt_addr_arr[j], + dma_addr_arr[j]); + + return rc; +} + +static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev) +{ + struct gaudi_device *gaudi = hdev->asic_specific; + struct gaudi_internal_qman_info *q; + u32 i; + + for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) { + q = &gaudi->internal_qmans[i]; + if (!q->pq_kernel_addr) + continue; + hdev->asic_funcs->asic_dma_free_coherent(hdev, q->pq_size, + q->pq_kernel_addr, + q->pq_dma_addr); + } +} + +static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev) +{ + struct gaudi_device *gaudi = hdev->asic_specific; + struct gaudi_internal_qman_info *q; + int rc, i; + + for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) { + if (gaudi_queue_type[i] != QUEUE_TYPE_INT) + continue; + + q = &gaudi->internal_qmans[i]; + + switch (i) { + case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_4_3: + case GAUDI_QUEUE_ID_DMA_6_0 ... GAUDI_QUEUE_ID_DMA_7_3: + q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES; + break; + case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3: + q->pq_size = MME_QMAN_SIZE_IN_BYTES; + break; + case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3: + q->pq_size = TPC_QMAN_SIZE_IN_BYTES; + break; + default: + dev_err(hdev->dev, "Bad internal queue index %d", i); + rc = -EINVAL; + goto free_internal_qmans_pq_mem; + } + + q->pq_kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent( + hdev, q->pq_size, + &q->pq_dma_addr, + GFP_KERNEL | __GFP_ZERO); + if (!q->pq_kernel_addr) { + rc = -ENOMEM; + goto free_internal_qmans_pq_mem; + } + } + + return 0; + +free_internal_qmans_pq_mem: + gaudi_free_internal_qmans_pq_mem(hdev); + return rc; +} + +static int gaudi_sw_init(struct hl_device *hdev) +{ + struct gaudi_device *gaudi; + int rc; + + /* Allocate device structure */ + gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL); + if (!gaudi) + return -ENOMEM; + + gaudi->armcp_info_get = gaudi_armcp_info_get; + + gaudi->max_freq_value = GAUDI_MAX_CLK_FREQ; + + hdev->asic_specific = gaudi; + + /* Create DMA pool for small allocations */ + hdev->dma_pool = dma_pool_create(dev_name(hdev->dev), + &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0); + if (!hdev->dma_pool) { + dev_err(hdev->dev, "failed to create DMA pool\n"); + rc = -ENOMEM; + goto free_gaudi_device; + } + + rc = gaudi_alloc_cpu_accessible_dma_mem(hdev); + if (rc) + goto free_dma_pool; + + hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1); + if (!hdev->cpu_accessible_dma_pool) { + dev_err(hdev->dev, + "Failed to create CPU accessible DMA pool\n"); + rc = -ENOMEM; + goto free_cpu_dma_mem; + } + + rc = gen_pool_add(hdev->cpu_accessible_dma_pool, + (uintptr_t) hdev->cpu_accessible_dma_mem, + HL_CPU_ACCESSIBLE_MEM_SIZE, -1); + if (rc) { + dev_err(hdev->dev, + "Failed to add memory to CPU accessible DMA pool\n"); + rc = -EFAULT; + goto free_cpu_accessible_dma_pool; + } + + rc = gaudi_alloc_internal_qmans_pq_mem(hdev); + if (rc) + goto free_cpu_accessible_dma_pool; + + spin_lock_init(&gaudi->hw_queues_lock); + mutex_init(&gaudi->clk_gate_mutex); + + hdev->supports_sync_stream = true; + hdev->supports_coresight = true; + + return 0; + +free_cpu_accessible_dma_pool: + gen_pool_destroy(hdev->cpu_accessible_dma_pool); +free_cpu_dma_mem: + GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address, + hdev->cpu_pci_msb_addr); + hdev->asic_funcs->asic_dma_free_coherent(hdev, + HL_CPU_ACCESSIBLE_MEM_SIZE, + hdev->cpu_accessible_dma_mem, + hdev->cpu_accessible_dma_address); +free_dma_pool: + dma_pool_destroy(hdev->dma_pool); +free_gaudi_device: + kfree(gaudi); + return rc; +} + +static int gaudi_sw_fini(struct hl_device *hdev) +{ + struct gaudi_device *gaudi = hdev->asic_specific; + + gaudi_free_internal_qmans_pq_mem(hdev); + + gen_pool_destroy(hdev->cpu_accessible_dma_pool); + + GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address, + hdev->cpu_pci_msb_addr); + hdev->asic_funcs->asic_dma_free_coherent(hdev, + HL_CPU_ACCESSIBLE_MEM_SIZE, + hdev->cpu_accessible_dma_mem, + hdev->cpu_accessible_dma_address); + + dma_pool_destroy(hdev->dma_pool); + + mutex_destroy(&gaudi->clk_gate_mutex); + + kfree(gaudi); + + return 0; +} + +static irqreturn_t gaudi_irq_handler_single(int irq, void *arg) +{ + struct hl_device *hdev = arg; + int i; + + if (hdev->disabled) + return IRQ_HANDLED; + + for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) + hl_irq_handler_cq(irq, &hdev->completion_queue[i]); + + hl_irq_handler_eq(irq, &hdev->event_queue); + + return IRQ_HANDLED; +} + +/* + * For backward compatibility, new MSI interrupts should be set after the + * existing CPU and NIC interrupts. + */ +static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr, + bool cpu_eq) +{ + int msi_vec; + + if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq)) + dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n", + GAUDI_EVENT_QUEUE_MSI_IDX); + + msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr : + (nr + NIC_NUMBER_OF_ENGINES + 1); + + return pci_irq_vector(hdev->pdev, msi_vec); +} + +static int gaudi_enable_msi_single(struct hl_device *hdev) +{ + int rc, irq; + + dev_info(hdev->dev, "Working in single MSI IRQ mode\n"); + + irq = gaudi_pci_irq_vector(hdev, 0, false); + rc = request_irq(irq, gaudi_irq_handler_single, 0, + "gaudi single msi", hdev); + if (rc) + dev_err(hdev->dev, + "Failed to request single MSI IRQ\n"); + + return rc; +} + +static int gaudi_enable_msi_multi(struct hl_device *hdev) +{ + int cq_cnt = hdev->asic_prop.completion_queues_count; + int rc, i, irq_cnt_init, irq; + + for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) { + irq = gaudi_pci_irq_vector(hdev, i, false); + rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i], + &hdev->completion_queue[i]); + if (rc) { + dev_err(hdev->dev, "Failed to request IRQ %d", irq); + goto free_irqs; + } + } + + irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true); + rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt], + &hdev->event_queue); + if (rc) { + dev_err(hdev->dev, "Failed to request IRQ %d", irq); + goto free_irqs; + } + + return 0; + +free_irqs: + for (i = 0 ; i < irq_cnt_init ; i++) + free_irq(gaudi_pci_irq_vector(hdev, i, false), + &hdev->completion_queue[i]); + return rc; +} + +static int gaudi_enable_msi(struct hl_device *hdev) +{ + struct gaudi_device *gaudi = hdev->asic_specific; + int rc; + + if (gaudi->hw_cap_initialized & HW_CAP_MSI) + return 0; + + rc = pci_alloc_irq_vectors(hdev->pdev, 1, GAUDI_MSI_ENTRIES, + PCI_IRQ_MSI); + if (rc < 0) { + dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc); + return rc; + } + + if (rc < NUMBER_OF_INTERRUPTS) { + gaudi->multi_msi_mode = false; + rc = gaudi_enable_msi_single(hdev); + } else { + gaudi->multi_msi_mode = true; + rc = gaudi_enable_msi_multi(hdev); + } + + if (rc) + goto free_pci_irq_vectors; + + gaudi->hw_cap_initialized |= HW_CAP_MSI; + + return 0; + +free_pci_irq_vectors: + pci_free_irq_vectors(hdev->pdev); + return rc; +} + +static void gaudi_sync_irqs(struct hl_device *hdev) +{ + struct gaudi_device *gaudi = hdev->asic_specific; + int i, cq_cnt = hdev->asic_prop.completion_queues_count; + + if (!(gaudi->hw_cap_initialized & HW_CAP_MSI)) + return; + + /* Wait for all pending IRQs to be finished */ + if (gaudi->multi_msi_mode) { + for (i = 0 ; i < cq_cnt ; i++) + synchronize_irq(gaudi_pci_irq_vector(hdev, i, false)); + + synchronize_irq(gaudi_pci_irq_vector(hdev, + GAUDI_EVENT_QUEUE_MSI_IDX, + true)); + } else { + synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false)); + } +} + +static void gaudi_disable_msi(struct hl_device *hdev) +{ + struct gaudi_device *gaudi = hdev->asic_specific; + int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count; + + if (!(gaudi->hw_cap_initialized & HW_CAP_MSI)) + return; + + gaudi_sync_irqs(hdev); + + if (gaudi->multi_msi_mode) { + irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, + true); + free_irq(irq, &hdev->event_queue); + + for (i = 0 ; i < cq_cnt ; i++) { + irq = gaudi_pci_irq_vector(hdev, i, false); + free_irq(irq, &hdev->completion_queue[i]); + } + } else { + free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev); + } + + pci_free_irq_vectors(hdev->pdev); + + gaudi->hw_cap_initialized &= ~HW_CAP_MSI; +} + +static void gaudi_init_scrambler_sram(struct hl_device *hdev) +{ + struct gaudi_device *gaudi = hdev->asic_specific; + + if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER) + return; + + if (!hdev->sram_scrambler_enable) + return; + + WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN, + 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); + WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN, + 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); + WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN, + 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); + WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN, + 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); + WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN, + 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); + WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN, + 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); + WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN, + 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); + WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN, + 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); + + WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN, + 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); + WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN, + 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); + WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN, + 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); + WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN, + 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); + WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN, + 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); + WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN, + 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); + WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN, + 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); + WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN, + 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); + + WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN, + 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); + WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN, + 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); + WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN, + 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); + WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN, + 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); + WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN, + 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); + WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN, + 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); + WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN, + 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); + WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN, + 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); + + gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER; +} + +static void gaudi_init_scrambler_hbm(struct hl_device *hdev) +{ + struct gaudi_device *gaudi = hdev->asic_specific; + + if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER) + return; + + if (!hdev->dram_scrambler_enable) + return; + + WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN, + 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); + WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN, + 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); + WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN, + 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); + WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN, + 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); + WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN, + 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); + WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN, + 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); + WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN, + 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); + WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN, + 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); + + WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN, + 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); + WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN, + 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); + WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN, + 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); + WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN, + 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); + WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN, + 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); + WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN, + 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); + WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN, + 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); + WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN, + 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); + + WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN, + 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); + WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN, + 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); + WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN, + 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); + WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN, + 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); + WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN, + 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); + WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN, + 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); + WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN, + 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); + WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN, + 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); + + gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER; +} + +static void gaudi_init_e2e(struct hl_device *hdev) +{ + WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3); + WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3); + WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49); + WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101); + + WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3); + WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3); + WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1); + WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39); + + WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1); + WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1); + WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1); + WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32); + + WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3); + WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3); + WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19); + WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32); + + WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3); + WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3); + WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19); + WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32); + + WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1); + WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1); + WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1); + WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32); + + WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3); + WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3); + WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1); + WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39); + + WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3); + WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3); + WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19); + WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19); + + WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3); + WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3); + WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79); + WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163); + + WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3); + WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3); + WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1); + WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39); + + WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1); + WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1); + WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1); + WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32); + + WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3); + WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3); + WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19); + WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32); + + WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3); + WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3); + WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19); + WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32); + + WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1); + WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1); + WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1); + WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32); + + WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3); + WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3); + WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1); + WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39); + + WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3); + WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3); + WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79); + WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79); + + WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3); + WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3); + WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162); + WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338); + + WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3); + WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3); + WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162); + WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338); + + WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3); + WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3); + WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162); + WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338); + + WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3); + WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3); + WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162); + WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338); + + WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3); + WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3); + WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162); + WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338); + + WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3); + WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3); + WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162); + WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338); + + WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3); + WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3); + WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162); + WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338); + + WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3); + WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3); + WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162); + WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338); + + if (!hdev->dram_scrambler_enable) { + WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21); + WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22); + WREG32(mmSIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F); + WREG32(mmSIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20); + + WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21); + WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22); + WREG32(mmSIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F); + WREG32(mmSIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20); + + WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21); + WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22); + WREG32(mmSIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F); + WREG32(mmSIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20); + + WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21); + WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22); + WREG32(mmSIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F); + WREG32(mmSIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20); + + WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21); + WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22); + WREG32(mmSIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F); + WREG32(mmSIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20); + + WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21); + WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22); + WREG32(mmSIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F); + WREG32(mmSIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20); + + WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21); + WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22); + WREG32(mmSIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F); + WREG32(mmSIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20); + + WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21); + WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22); + WREG32(mmSIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F); + WREG32(mmSIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20); + + WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21); + WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22); + WREG32(mmNIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F); + WREG32(mmNIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20); + + WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21); + WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22); + WREG32(mmNIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F); + WREG32(mmNIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20); + + WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21); + WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22); + WREG32(mmNIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F); + WREG32(mmNIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20); + + WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21); + WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22); + WREG32(mmNIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F); + WREG32(mmNIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20); + + WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21); + WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22); + WREG32(mmNIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F); + WREG32(mmNIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20); + + WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21); + WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22); + WREG32(mmNIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F); + WREG32(mmNIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20); + + WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21); + WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22); + WREG32(mmNIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F); + WREG32(mmNIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20); + + WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21); + WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22); + WREG32(mmNIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F); + WREG32(mmNIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20); + + WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_0, 0x21); + WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_1, 0x22); + WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F); + WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20); + + WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_0, 0x21); + WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_1, 0x22); + WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F); + WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20); + + WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_0, 0x21); + WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_1, 0x22); + WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F); + WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20); + + WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_0, 0x21); + WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_1, 0x22); + WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F); + WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20); + + WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_0, 0x21); + WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_1, 0x22); + WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F); + WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20); + + WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_0, 0x21); + WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_1, 0x22); + WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F); + WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20); + + WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_0, 0x21); + WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_1, 0x22); + WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F); + WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20); + + WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_0, 0x21); + WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_1, 0x22); + WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F); + WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20); + } + + WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN, + 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); + WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN, + 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); + + WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN, + 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); + WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN, + 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); + + WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN, + 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); + WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN, + 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); + + WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN, + 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); + WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN, + 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); + + WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN, + 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); + WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN, + 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); + + WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN, + 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); + WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN, + 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); + + WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN, + 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); + WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN, + 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); + + WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN, + 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); + WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN, + 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); + + WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN, + 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); + WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN, + 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); + + WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN, + 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); + WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN, + 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); + + WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN, + 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); + WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN, + 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); + + WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN, + 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); + WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN, + 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); + + WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN, + 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); + WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN, + 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); + + WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN, + 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); + WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN, + 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); + + WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN, + 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); + WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN, + 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); + + WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN, + 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); + WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN, + 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); + + WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN, + 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); + WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN, + 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); + + WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN, + 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); + WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN, + 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); + + WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN, + 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); + WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN, + 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); + + WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN, + 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); + WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN, + 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); + + WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN, + 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); + WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN, + 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); + + WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN, + 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); + WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN, + 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); + + WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN, + 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); + WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN, + 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); + + WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN, + 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); + WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN, + 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); +} + +static void gaudi_init_hbm_cred(struct hl_device *hdev) +{ + uint32_t hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd; + + hbm0_wr = 0x33333333; + hbm1_wr = 0x33333333; + hbm0_rd = 0x77777777; + hbm1_rd = 0xDDDDDDDD; + + WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr); + WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr); + WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd); + WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd); + + WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr); + WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr); + WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd); + WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd); + + WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr); + WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr); + WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd); + WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd); + + WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr); + WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr); + WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd); + WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd); + + WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0, + (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | + (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); + WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0, + (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | + (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); + WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0, + (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | + (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); + WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0, + (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | + (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); + + WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1, + (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | + (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); + WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1, + (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | + (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); + WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1, + (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | + (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); + WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1, + (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | + (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); +} + +static void gaudi_init_rate_limiter(struct hl_device *hdev) +{ + u32 nr, nf, od, sat, rst, timeout; + u64 freq; + + nr = RREG32(mmPSOC_HBM_PLL_NR); + nf = RREG32(mmPSOC_HBM_PLL_NF); + od = RREG32(mmPSOC_HBM_PLL_OD); + freq = (50 * (nf + 1)) / ((nr + 1) * (od + 1)); + + dev_dbg(hdev->dev, "HBM frequency is %lluMHz\n", freq); + + /* Configuration is for five (5) DDMA channels */ + if (freq == 800) { + sat = 4; + rst = 11; + timeout = 15; + } else if (freq == 900) { + sat = 4; + rst = 15; + timeout = 16; + } else if (freq == 950) { + sat = 4; + rst = 15; + timeout = 15; + } else { + dev_warn(hdev->dev, + "unsupported HBM frequency %lluMHz, no rate-limiters\n", + freq); + return; + } + + WREG32(mmDMA_IF_W_S_DOWN_RSP_MID_WGHT_0, 0x111); + WREG32(mmDMA_IF_W_S_DOWN_RSP_MID_WGHT_1, 0x111); + WREG32(mmDMA_IF_E_S_DOWN_RSP_MID_WGHT_0, 0x111); + WREG32(mmDMA_IF_E_S_DOWN_RSP_MID_WGHT_1, 0x111); + WREG32(mmDMA_IF_W_N_DOWN_RSP_MID_WGHT_0, 0x111); + WREG32(mmDMA_IF_W_N_DOWN_RSP_MID_WGHT_1, 0x111); + WREG32(mmDMA_IF_E_N_DOWN_RSP_MID_WGHT_0, 0x111); + WREG32(mmDMA_IF_E_N_DOWN_RSP_MID_WGHT_1, 0x111); + + if (!hdev->rl_enable) { + dev_info(hdev->dev, "Rate limiters disabled\n"); + return; + } + + WREG32(mmDMA_IF_W_S_DOWN_CH0_RL_HBM_SAT, sat); + WREG32(mmDMA_IF_W_S_DOWN_CH1_RL_HBM_SAT, sat); + WREG32(mmDMA_IF_E_S_DOWN_CH0_RL_HBM_SAT, sat); + WREG32(mmDMA_IF_E_S_DOWN_CH1_RL_HBM_SAT, sat); + WREG32(mmDMA_IF_W_N_DOWN_CH0_RL_HBM_SAT, sat); + WREG32(mmDMA_IF_W_N_DOWN_CH1_RL_HBM_SAT, sat); + WREG32(mmDMA_IF_E_N_DOWN_CH0_RL_HBM_SAT, sat); + WREG32(mmDMA_IF_E_N_DOWN_CH1_RL_HBM_SAT, sat); + + WREG32(mmDMA_IF_W_S_DOWN_CH0_RL_HBM_RST, rst); + WREG32(mmDMA_IF_W_S_DOWN_CH1_RL_HBM_RST, rst); + WREG32(mmDMA_IF_E_S_DOWN_CH0_RL_HBM_RST, rst); + WREG32(mmDMA_IF_E_S_DOWN_CH1_RL_HBM_RST, rst); + WREG32(mmDMA_IF_W_N_DOWN_CH0_RL_HBM_RST, rst); + WREG32(mmDMA_IF_W_N_DOWN_CH1_RL_HBM_RST, rst); + WREG32(mmDMA_IF_E_N_DOWN_CH0_RL_HBM_RST, rst); + WREG32(mmDMA_IF_E_N_DOWN_CH1_RL_HBM_RST, rst); + + WREG32(mmDMA_IF_W_S_DOWN_CH0_RL_HBM_TIMEOUT, timeout); + WREG32(mmDMA_IF_W_S_DOWN_CH1_RL_HBM_TIMEOUT, timeout); + WREG32(mmDMA_IF_E_S_DOWN_CH0_RL_HBM_TIMEOUT, timeout); + WREG32(mmDMA_IF_E_S_DOWN_CH1_RL_HBM_TIMEOUT, timeout); + WREG32(mmDMA_IF_W_N_DOWN_CH0_RL_HBM_TIMEOUT, timeout); + WREG32(mmDMA_IF_W_N_DOWN_CH1_RL_HBM_TIMEOUT, timeout); + WREG32(mmDMA_IF_E_N_DOWN_CH0_RL_HBM_TIMEOUT, timeout); + WREG32(mmDMA_IF_E_N_DOWN_CH1_RL_HBM_TIMEOUT, timeout); + + WREG32(mmDMA_IF_W_S_DOWN_CH0_RL_HBM_EN, 1); + WREG32(mmDMA_IF_W_S_DOWN_CH1_RL_HBM_EN, 1); + WREG32(mmDMA_IF_E_S_DOWN_CH0_RL_HBM_EN, 1); + WREG32(mmDMA_IF_E_S_DOWN_CH1_RL_HBM_EN, 1); + WREG32(mmDMA_IF_W_N_DOWN_CH0_RL_HBM_EN, 1); + WREG32(mmDMA_IF_W_N_DOWN_CH1_RL_HBM_EN, 1); + WREG32(mmDMA_IF_E_N_DOWN_CH0_RL_HBM_EN, 1); + WREG32(mmDMA_IF_E_N_DOWN_CH1_RL_HBM_EN, 1); + + WREG32(mmDMA_IF_W_S_DOWN_CH0_RL_SRAM_SAT, sat); + WREG32(mmDMA_IF_W_S_DOWN_CH1_RL_SRAM_SAT, sat); + WREG32(mmDMA_IF_E_S_DOWN_CH0_RL_SRAM_SAT, sat); + WREG32(mmDMA_IF_E_S_DOWN_CH1_RL_SRAM_SAT, sat); + WREG32(mmDMA_IF_W_N_DOWN_CH0_RL_SRAM_SAT, sat); + WREG32(mmDMA_IF_W_N_DOWN_CH1_RL_SRAM_SAT, sat); + WREG32(mmDMA_IF_E_N_DOWN_CH0_RL_SRAM_SAT, sat); + WREG32(mmDMA_IF_E_N_DOWN_CH1_RL_SRAM_SAT, sat); + + WREG32(mmDMA_IF_W_S_DOWN_CH0_RL_SRAM_RST, rst); + WREG32(mmDMA_IF_W_S_DOWN_CH1_RL_SRAM_RST, rst); + WREG32(mmDMA_IF_E_S_DOWN_CH0_RL_SRAM_RST, rst); + WREG32(mmDMA_IF_E_S_DOWN_CH1_RL_SRAM_RST, rst); + WREG32(mmDMA_IF_W_N_DOWN_CH0_RL_SRAM_RST, rst); + WREG32(mmDMA_IF_W_N_DOWN_CH1_RL_SRAM_RST, rst); + WREG32(mmDMA_IF_E_N_DOWN_CH0_RL_SRAM_RST, rst); + WREG32(mmDMA_IF_E_N_DOWN_CH1_RL_SRAM_RST, rst); + + WREG32(mmDMA_IF_W_S_DOWN_CH0_RL_SRAM_TIMEOUT, timeout); + WREG32(mmDMA_IF_W_S_DOWN_CH1_RL_SRAM_TIMEOUT, timeout); + WREG32(mmDMA_IF_E_S_DOWN_CH0_RL_SRAM_TIMEOUT, timeout); + WREG32(mmDMA_IF_E_S_DOWN_CH1_RL_SRAM_TIMEOUT, timeout); + WREG32(mmDMA_IF_W_N_DOWN_CH0_RL_SRAM_TIMEOUT, timeout); + WREG32(mmDMA_IF_W_N_DOWN_CH1_RL_SRAM_TIMEOUT, timeout); + WREG32(mmDMA_IF_E_N_DOWN_CH0_RL_SRAM_TIMEOUT, timeout); + WREG32(mmDMA_IF_E_N_DOWN_CH1_RL_SRAM_TIMEOUT, timeout); + + WREG32(mmDMA_IF_W_S_DOWN_CH0_RL_SRAM_EN, 1); + WREG32(mmDMA_IF_W_S_DOWN_CH1_RL_SRAM_EN, 1); + WREG32(mmDMA_IF_E_S_DOWN_CH0_RL_SRAM_EN, 1); + WREG32(mmDMA_IF_E_S_DOWN_CH1_RL_SRAM_EN, 1); + WREG32(mmDMA_IF_W_N_DOWN_CH0_RL_SRAM_EN, 1); + WREG32(mmDMA_IF_W_N_DOWN_CH1_RL_SRAM_EN, 1); + WREG32(mmDMA_IF_E_N_DOWN_CH0_RL_SRAM_EN, 1); + WREG32(mmDMA_IF_E_N_DOWN_CH1_RL_SRAM_EN, 1); +} + +static void gaudi_init_golden_registers(struct hl_device *hdev) +{ + u32 tpc_offset; + int tpc_id, i; + + gaudi_init_e2e(hdev); + + gaudi_init_hbm_cred(hdev); + + gaudi_init_rate_limiter(hdev); + + gaudi_disable_clock_gating(hdev); + + for (tpc_id = 0, tpc_offset = 0; + tpc_id < TPC_NUMBER_OF_ENGINES; + tpc_id++, tpc_offset += TPC_CFG_OFFSET) { + /* Mask all arithmetic interrupts from TPC */ + WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFF); + /* Set 16 cache lines */ + WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset, + ICACHE_FETCH_LINE_NUM, 2); + } + + /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */ + for (i = 0 ; i < 128 ; i += 8) + writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i); + + WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3); + WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3); + WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3); + WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3); + + /* WA for H3-2081 */ + WREG32(mmPCIE_WRAP_MAX_OUTSTAND, 0x10ff); +} + +static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id, + int qman_id, dma_addr_t qman_pq_addr) +{ + u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi; + u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi; + u32 q_off, dma_qm_offset; + u32 dma_qm_err_cfg; + + dma_qm_offset = dma_id * DMA_QMAN_OFFSET; + + mtr_base_en_lo = lower_32_bits(CFG_BASE + + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); + mtr_base_en_hi = upper_32_bits(CFG_BASE + + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); + so_base_en_lo = lower_32_bits(CFG_BASE + + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); + so_base_en_hi = upper_32_bits(CFG_BASE + + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); + mtr_base_ws_lo = lower_32_bits(CFG_BASE + + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); + mtr_base_ws_hi = upper_32_bits(CFG_BASE + + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); + so_base_ws_lo = lower_32_bits(CFG_BASE + + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); + so_base_ws_hi = upper_32_bits(CFG_BASE + + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); + + q_off = dma_qm_offset + qman_id * 4; + + WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr)); + WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr)); + + WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH)); + WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0); + WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0); + + WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x74); + WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x14); + WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C); + + WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo); + WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi); + WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo); + WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi); + WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo); + WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi); + WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo); + WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi); + + /* The following configuration is needed only once per QMAN */ + if (qman_id == 0) { + /* Configure RAZWI IRQ */ + dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK; + if (hdev->stop_on_err) { + dma_qm_err_cfg |= + PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK; + } + + WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg); + WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset, + lower_32_bits(CFG_BASE + + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR)); + WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset, + upper_32_bits(CFG_BASE + + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR)); + WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset, + gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id + + dma_id); + + WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset, + QM_ARB_ERR_MSG_EN_MASK); + + /* Increase ARB WDT to support streams architecture */ + WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, + GAUDI_ARB_WDT_TIMEOUT); + + WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset, + QMAN_EXTERNAL_MAKE_TRUSTED); + + WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0); + } +} + +static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id) +{ + u32 dma_offset = dma_id * DMA_CORE_OFFSET; + u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT; + + /* Set to maximum possible according to physical size */ + WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0); + WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0); + + /* STOP_ON bit implies no completion to operation in case of RAZWI */ + if (hdev->stop_on_err) + dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT; + + WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg); + WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset, + lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR)); + WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset, + upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR)); + WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset, + gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id); + WREG32(mmDMA0_CORE_PROT + dma_offset, + 1 << DMA0_CORE_PROT_ERR_VAL_SHIFT); + /* If the channel is secured, it should be in MMU bypass mode */ + WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset, + 1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT); + WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT); +} + +static void gaudi_enable_qman(struct hl_device *hdev, int dma_id, + u32 enable_mask) +{ + u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; + + WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask); +} + +static void gaudi_init_pci_dma_qmans(struct hl_device *hdev) +{ + struct gaudi_device *gaudi = hdev->asic_specific; + struct hl_hw_queue *q; + int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0; + + if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA) + return; + + for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) { + dma_id = gaudi_dma_assignment[i]; + /* + * For queues after the CPU Q need to add 1 to get the correct + * queue. In addition, need to add the CPU EQ and NIC IRQs in + * order to get the correct MSI register. + */ + if (dma_id > 1) { + cpu_skip = 1; + nic_skip = NIC_NUMBER_OF_ENGINES; + } else { + cpu_skip = 0; + nic_skip = 0; + } + + for (j = 0 ; j < QMAN_STREAMS ; j++) { + q_idx = 4 * dma_id + j + cpu_skip; + q = &hdev->kernel_queues[q_idx]; + q->cq_id = cq_id++; + q->msi_vec = nic_skip + cpu_skip + msi_vec++; + gaudi_init_pci_dma_qman(hdev, dma_id, j, + q->bus_address); + } + + gaudi_init_dma_core(hdev, dma_id); + + gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE); + } + + gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA; +} + +static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id, + int qman_id, u64 qman_base_addr) +{ + u32 mtr_base_lo, mtr_base_hi; + u32 so_base_lo, so_base_hi; + u32 q_off, dma_qm_offset; + u32 dma_qm_err_cfg; + + dma_qm_offset = dma_id * DMA_QMAN_OFFSET; + + mtr_base_lo = lower_32_bits(CFG_BASE + + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); + mtr_base_hi = upper_32_bits(CFG_BASE + + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); + so_base_lo = lower_32_bits(CFG_BASE + + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); + so_base_hi = upper_32_bits(CFG_BASE + + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); + + q_off = dma_qm_offset + qman_id * 4; + + if (qman_id < 4) { + WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, + lower_32_bits(qman_base_addr)); + WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, + upper_32_bits(qman_base_addr)); + + WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH)); + WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0); + WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0); + + WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x81BC); + WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x81B4); + WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C); + } else { + WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x74); + WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x14); + WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C); + + /* Configure RAZWI IRQ */ + dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK; + if (hdev->stop_on_err) { + dma_qm_err_cfg |= + HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK; + } + WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg); + + WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset, + lower_32_bits(CFG_BASE + + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR)); + WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset, + upper_32_bits(CFG_BASE + + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR)); + WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset, + gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id + + dma_id); + + WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset, + QM_ARB_ERR_MSG_EN_MASK); + + /* Increase ARB WDT to support streams architecture */ + WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, + GAUDI_ARB_WDT_TIMEOUT); + + WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0); + WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset, + QMAN_INTERNAL_MAKE_TRUSTED); + } + + WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo); + WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi); + WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo); + WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi); +} + +static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev) +{ + struct gaudi_device *gaudi = hdev->asic_specific; + struct gaudi_internal_qman_info *q; + u64 qman_base_addr; + int i, j, dma_id, internal_q_index; + + if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA) + return; + + for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) { + dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i]; + + for (j = 0 ; j < QMAN_STREAMS ; j++) { + /* + * Add the CPU queue in order to get the correct queue + * number as all internal queue are placed after it + */ + internal_q_index = dma_id * QMAN_STREAMS + j + 1; + + q = &gaudi->internal_qmans[internal_q_index]; + qman_base_addr = (u64) q->pq_dma_addr; + gaudi_init_hbm_dma_qman(hdev, dma_id, j, + qman_base_addr); + } + + /* Initializing lower CP for HBM DMA QMAN */ + gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0); + + gaudi_init_dma_core(hdev, dma_id); + + gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE); + } + + gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA; +} + +static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset, + int qman_id, u64 qman_base_addr) +{ + u32 mtr_base_lo, mtr_base_hi; + u32 so_base_lo, so_base_hi; + u32 q_off, mme_id; + u32 mme_qm_err_cfg; + + mtr_base_lo = lower_32_bits(CFG_BASE + + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); + mtr_base_hi = upper_32_bits(CFG_BASE + + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); + so_base_lo = lower_32_bits(CFG_BASE + + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); + so_base_hi = upper_32_bits(CFG_BASE + + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); + + q_off = mme_offset + qman_id * 4; + + if (qman_id < 4) { + WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off, + lower_32_bits(qman_base_addr)); + WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off, + upper_32_bits(qman_base_addr)); + + WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH)); + WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0); + WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0); + + WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x81BC); + WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x81B4); + WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C); + } else { + WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x74); + WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x14); + WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C); + + /* Configure RAZWI IRQ */ + mme_id = mme_offset / + (mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0); + + mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK; + if (hdev->stop_on_err) { + mme_qm_err_cfg |= + MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK; + } + WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg); + WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset, + lower_32_bits(CFG_BASE + + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR)); + WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset, + upper_32_bits(CFG_BASE + + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR)); + WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset, + gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id + + mme_id); + + WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset, + QM_ARB_ERR_MSG_EN_MASK); + + /* Increase ARB WDT to support streams architecture */ + WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset, + GAUDI_ARB_WDT_TIMEOUT); + + WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0); + WREG32(mmMME0_QM_GLBL_PROT + mme_offset, + QMAN_INTERNAL_MAKE_TRUSTED); + } + + WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo); + WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi); + WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo); + WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi); +} + +static void gaudi_init_mme_qmans(struct hl_device *hdev) +{ + struct gaudi_device *gaudi = hdev->asic_specific; + struct gaudi_internal_qman_info *q; + u64 qman_base_addr; + u32 mme_offset; + int i, internal_q_index; + + if (gaudi->hw_cap_initialized & HW_CAP_MME) + return; + + /* + * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE) + * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE) + */ + + mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0; + + for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) { + internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i; + q = &gaudi->internal_qmans[internal_q_index]; + qman_base_addr = (u64) q->pq_dma_addr; + gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3), + qman_base_addr); + if (i == 3) + mme_offset = 0; + } + + /* Initializing lower CP for MME QMANs */ + mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0; + gaudi_init_mme_qman(hdev, mme_offset, 4, 0); + gaudi_init_mme_qman(hdev, 0, 4, 0); + + WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE); + WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE); + + gaudi->hw_cap_initialized |= HW_CAP_MME; +} + +static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset, + int qman_id, u64 qman_base_addr) +{ + u32 mtr_base_lo, mtr_base_hi; + u32 so_base_lo, so_base_hi; + u32 q_off, tpc_id; + u32 tpc_qm_err_cfg; + + mtr_base_lo = lower_32_bits(CFG_BASE + + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); + mtr_base_hi = upper_32_bits(CFG_BASE + + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); + so_base_lo = lower_32_bits(CFG_BASE + + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); + so_base_hi = upper_32_bits(CFG_BASE + + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); + + q_off = tpc_offset + qman_id * 4; + + if (qman_id < 4) { + WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off, + lower_32_bits(qman_base_addr)); + WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off, + upper_32_bits(qman_base_addr)); + + WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH)); + WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0); + WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0); + + WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x81BC); + WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x81B4); + WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C); + } else { + WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x74); + WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x14); + WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C); + + /* Configure RAZWI IRQ */ + tpc_id = tpc_offset / + (mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0); + + tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK; + if (hdev->stop_on_err) { + tpc_qm_err_cfg |= + TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK; + } + + WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg); + WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset, + lower_32_bits(CFG_BASE + + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR)); + WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset, + upper_32_bits(CFG_BASE + + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR)); + WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset, + gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id + + tpc_id); + + WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset, + QM_ARB_ERR_MSG_EN_MASK); + + /* Increase ARB WDT to support streams architecture */ + WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset, + GAUDI_ARB_WDT_TIMEOUT); + + WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0); + WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset, + QMAN_INTERNAL_MAKE_TRUSTED); + } + + WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo); + WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi); + WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo); + WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi); +} + +static void gaudi_init_tpc_qmans(struct hl_device *hdev) +{ + struct gaudi_device *gaudi = hdev->asic_specific; + struct gaudi_internal_qman_info *q; + u64 qman_base_addr; + u32 so_base_hi, tpc_offset = 0; + u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH - + mmTPC0_CFG_SM_BASE_ADDRESS_HIGH; + int i, tpc_id, internal_q_index; + + if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK) + return; + + so_base_hi = upper_32_bits(CFG_BASE + + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); + + for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) { + for (i = 0 ; i < QMAN_STREAMS ; i++) { + internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 + + tpc_id * QMAN_STREAMS + i; + q = &gaudi->internal_qmans[internal_q_index]; + qman_base_addr = (u64) q->pq_dma_addr; + gaudi_init_tpc_qman(hdev, tpc_offset, i, + qman_base_addr); + + if (i == 3) { + /* Initializing lower CP for TPC QMAN */ + gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0); + + /* Enable the QMAN and TPC channel */ + WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, + QMAN_TPC_ENABLE); + } + } + + WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta, + so_base_hi); + + tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0; + + gaudi->hw_cap_initialized |= 1 << (HW_CAP_TPC_SHIFT + tpc_id); + } +} + +static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev) +{ + struct gaudi_device *gaudi = hdev->asic_specific; + + if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)) + return; + + WREG32(mmDMA0_QM_GLBL_CFG0, 0); + WREG32(mmDMA1_QM_GLBL_CFG0, 0); + WREG32(mmDMA5_QM_GLBL_CFG0, 0); +} + +static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev) +{ + struct gaudi_device *gaudi = hdev->asic_specific; + + if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)) + return; + + WREG32(mmDMA2_QM_GLBL_CFG0, 0); + WREG32(mmDMA3_QM_GLBL_CFG0, 0); + WREG32(mmDMA4_QM_GLBL_CFG0, 0); + WREG32(mmDMA6_QM_GLBL_CFG0, 0); + WREG32(mmDMA7_QM_GLBL_CFG0, 0); +} + +static void gaudi_disable_mme_qmans(struct hl_device *hdev) +{ + struct gaudi_device *gaudi = hdev->asic_specific; + + if (!(gaudi->hw_cap_initialized & HW_CAP_MME)) + return; + + WREG32(mmMME2_QM_GLBL_CFG0, 0); + WREG32(mmMME0_QM_GLBL_CFG0, 0); +} + +static void gaudi_disable_tpc_qmans(struct hl_device *hdev) +{ + struct gaudi_device *gaudi = hdev->asic_specific; + u32 tpc_offset = 0; + int tpc_id; + + if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)) + return; + + for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) { + WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0); + tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0; + } +} + +static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev) +{ + struct gaudi_device *gaudi = hdev->asic_specific; + + if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)) + return; + + /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */ + WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); + WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); + WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); +} + +static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev) +{ + struct gaudi_device *gaudi = hdev->asic_specific; + + if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)) + return; + + /* Stop CPs of HBM DMA QMANs */ + + WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); + WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); + WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); + WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); + WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); +} + +static void gaudi_stop_mme_qmans(struct hl_device *hdev) +{ + struct gaudi_device *gaudi = hdev->asic_specific; + + if (!(gaudi->hw_cap_initialized & HW_CAP_MME)) + return; + + /* Stop CPs of MME QMANs */ + WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT); + WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT); +} + +static void gaudi_stop_tpc_qmans(struct hl_device *hdev) +{ + struct gaudi_device *gaudi = hdev->asic_specific; + + if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)) + return; + + WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); + WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); + WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); + WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); + WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); + WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); + WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); + WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); +} + +static void gaudi_pci_dma_stall(struct hl_device *hdev) +{ + struct gaudi_device *gaudi = hdev->asic_specific; + + if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)) + return; + + WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); + WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); + WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); +} + +static void gaudi_hbm_dma_stall(struct hl_device *hdev) +{ + struct gaudi_device *gaudi = hdev->asic_specific; + + if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)) + return; + + WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); + WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); + WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); + WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); + WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); +} + +static void gaudi_mme_stall(struct hl_device *hdev) +{ + struct gaudi_device *gaudi = hdev->asic_specific; + + if (!(gaudi->hw_cap_initialized & HW_CAP_MME)) + return; + + /* WA for H3-1800 bug: do ACC and SBAB writes twice */ + WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); + WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); + WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); + WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); + WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); + WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); + WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); + WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); + WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); + WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); + WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); + WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); + WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); + WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); + WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); + WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); +} + +static void gaudi_tpc_stall(struct hl_device *hdev) +{ + struct gaudi_device *gaudi = hdev->asic_specific; + + if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)) + return; + + WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); + WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); + WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); + WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); + WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); + WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); + WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); + WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); +} + +static void gaudi_enable_clock_gating(struct hl_device *hdev) +{ + struct gaudi_device *gaudi = hdev->asic_specific; + u32 qman_offset; + int i; + + if (!hdev->clock_gating) + return; + + if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) + return; + + /* In case we are during debug session, don't enable the clock gate + * as it may interfere + */ + if (hdev->in_debug) + return; + + for (i = 0, qman_offset = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) { + qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET; + WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, QMAN_CGM1_PWR_GATE_EN); + WREG32(mmDMA0_QM_CGM_CFG + qman_offset, + QMAN_UPPER_CP_CGM_PWR_GATE_EN); + } + + for (; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) { + qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET; + WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, QMAN_CGM1_PWR_GATE_EN); + WREG32(mmDMA0_QM_CGM_CFG + qman_offset, + QMAN_COMMON_CP_CGM_PWR_GATE_EN); + } + + WREG32(mmMME0_QM_CGM_CFG1, QMAN_CGM1_PWR_GATE_EN); + WREG32(mmMME0_QM_CGM_CFG, + QMAN_COMMON_CP_CGM_PWR_GATE_EN); + WREG32(mmMME2_QM_CGM_CFG1, QMAN_CGM1_PWR_GATE_EN); + WREG32(mmMME2_QM_CGM_CFG, + QMAN_COMMON_CP_CGM_PWR_GATE_EN); + + for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) { + WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, + QMAN_CGM1_PWR_GATE_EN); + WREG32(mmTPC0_QM_CGM_CFG + qman_offset, + QMAN_COMMON_CP_CGM_PWR_GATE_EN); + + qman_offset += TPC_QMAN_OFFSET; + } + + gaudi->hw_cap_initialized |= HW_CAP_CLK_GATE; +} + +static void gaudi_disable_clock_gating(struct hl_device *hdev) +{ + struct gaudi_device *gaudi = hdev->asic_specific; + u32 qman_offset; + int i; + + if (!(gaudi->hw_cap_initialized & HW_CAP_CLK_GATE)) + return; + + for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) { + WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0); + WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0); + + qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG); + } + + WREG32(mmMME0_QM_CGM_CFG, 0); + WREG32(mmMME0_QM_CGM_CFG1, 0); + WREG32(mmMME2_QM_CGM_CFG, 0); + WREG32(mmMME2_QM_CGM_CFG1, 0); + + for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) { + WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0); + WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0); + + qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG); + } + + gaudi->hw_cap_initialized &= ~(HW_CAP_CLK_GATE); +} + +static void gaudi_enable_timestamp(struct hl_device *hdev) +{ + /* Disable the timestamp counter */ + WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0); + + /* Zero the lower/upper parts of the 64-bit counter */ + WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0); + WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0); + + /* Enable the counter */ + WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1); +} + +static void gaudi_disable_timestamp(struct hl_device *hdev) +{ + /* Disable the timestamp counter */ + WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0); +} + +static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset) +{ + u32 wait_timeout_ms, cpu_timeout_ms; + + dev_info(hdev->dev, + "Halting compute engines and disabling interrupts\n"); + + if (hdev->pldm) { + wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC; + cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC; + } else { + wait_timeout_ms = GAUDI_RESET_WAIT_MSEC; + cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC; + } + + if (hard_reset) { + /* + * I don't know what is the state of the CPU so make sure it is + * stopped in any means necessary + */ + WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_GOTO_WFE); + WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, + GAUDI_EVENT_HALT_MACHINE); + msleep(cpu_timeout_ms); + } + + gaudi_stop_mme_qmans(hdev); + gaudi_stop_tpc_qmans(hdev); + gaudi_stop_hbm_dma_qmans(hdev); + gaudi_stop_pci_dma_qmans(hdev); + + gaudi_disable_clock_gating(hdev); + + msleep(wait_timeout_ms); + + gaudi_pci_dma_stall(hdev); + gaudi_hbm_dma_stall(hdev); + gaudi_tpc_stall(hdev); + gaudi_mme_stall(hdev); + + msleep(wait_timeout_ms); + + gaudi_disable_mme_qmans(hdev); + gaudi_disable_tpc_qmans(hdev); + gaudi_disable_hbm_dma_qmans(hdev); + gaudi_disable_pci_dma_qmans(hdev); + + gaudi_disable_timestamp(hdev); + + if (hard_reset) + gaudi_disable_msi(hdev); + else + gaudi_sync_irqs(hdev); +} + +static int gaudi_mmu_init(struct hl_device *hdev) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct gaudi_device *gaudi = hdev->asic_specific; + u64 hop0_addr; + int rc, i; + + if (!hdev->mmu_enable) + return 0; + + if (gaudi->hw_cap_initialized & HW_CAP_MMU) + return 0; + + hdev->dram_supports_virtual_memory = false; + + for (i = 0 ; i < prop->max_asid ; i++) { + hop0_addr = prop->mmu_pgt_addr + + (i * prop->mmu_hop_table_size); + + rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr); + if (rc) { + dev_err(hdev->dev, + "failed to set hop0 addr for asid %d\n", i); + goto err; + } + } + + /* init MMU cache manage page */ + WREG32(mmSTLB_CACHE_INV_BASE_39_8, MMU_CACHE_MNG_ADDR >> 8); + WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40); + + hdev->asic_funcs->mmu_invalidate_cache(hdev, true, + VM_TYPE_USERPTR | VM_TYPE_PHYS_PACK); + + WREG32(mmMMU_UP_MMU_ENABLE, 1); + WREG32(mmMMU_UP_SPI_MASK, 0xF); + + WREG32(mmSTLB_HOP_CONFIGURATION, + hdev->mmu_huge_page_opt ? 0x30440 : 0x40440); + + gaudi->hw_cap_initialized |= HW_CAP_MMU; + + return 0; + +err: + return rc; +} + +static int gaudi_load_firmware_to_device(struct hl_device *hdev) +{ + void __iomem *dst; + + /* HBM scrambler must be initialized before pushing F/W to HBM */ + gaudi_init_scrambler_hbm(hdev); + + dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET; + + return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst); +} + +static int gaudi_load_boot_fit_to_device(struct hl_device *hdev) +{ + void __iomem *dst; + + dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET; + + return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst); +} + +static void gaudi_read_device_fw_version(struct hl_device *hdev, + enum hl_fw_component fwc) +{ + const char *name; + u32 ver_off; + char *dest; + + switch (fwc) { + case FW_COMP_UBOOT: + ver_off = RREG32(mmUBOOT_VER_OFFSET); + dest = hdev->asic_prop.uboot_ver; + name = "U-Boot"; + break; + case FW_COMP_PREBOOT: + ver_off = RREG32(mmPREBOOT_VER_OFFSET); + dest = hdev->asic_prop.preboot_ver; + name = "Preboot"; + break; + default: + dev_warn(hdev->dev, "Undefined FW component: %d\n", fwc); + return; + } + + ver_off &= ~((u32)SRAM_BASE_ADDR); + + if (ver_off < SRAM_SIZE - VERSION_MAX_LEN) { + memcpy_fromio(dest, hdev->pcie_bar[SRAM_BAR_ID] + ver_off, + VERSION_MAX_LEN); + } else { + dev_err(hdev->dev, "%s version offset (0x%x) is above SRAM\n", + name, ver_off); + strcpy(dest, "unavailable"); + } +} + +static int gaudi_init_cpu(struct hl_device *hdev) +{ + struct gaudi_device *gaudi = hdev->asic_specific; + int rc; + + if (!hdev->cpu_enable) + return 0; + + if (gaudi->hw_cap_initialized & HW_CAP_CPU) + return 0; + + /* + * The device CPU works with 40 bits addresses. + * This register sets the extension to 50 bits. + */ + WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr); + + rc = hl_fw_init_cpu(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS, + mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, + mmCPU_CMD_STATUS_TO_HOST, + mmCPU_BOOT_ERR0, + !hdev->bmc_enable, GAUDI_CPU_TIMEOUT_USEC, + GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC); + + if (rc) + return rc; + + gaudi->hw_cap_initialized |= HW_CAP_CPU; + + return 0; +} + +static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout) +{ + struct gaudi_device *gaudi = hdev->asic_specific; + struct hl_eq *eq; + u32 status; + struct hl_hw_queue *cpu_pq = + &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ]; + int err; + + if (!hdev->cpu_queues_enable) + return 0; + + if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q) + return 0; + + eq = &hdev->event_queue; + + WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address)); + WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address)); + + WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address)); + WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address)); + + WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW, + lower_32_bits(hdev->cpu_accessible_dma_address)); + WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH, + upper_32_bits(hdev->cpu_accessible_dma_address)); + + WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES); + WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES); + WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE); + + /* Used for EQ CI */ + WREG32(mmCPU_IF_EQ_RD_OFFS, 0); + + WREG32(mmCPU_IF_PF_PQ_PI, 0); + + if (gaudi->multi_msi_mode) + WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP); + else + WREG32(mmCPU_IF_QUEUE_INIT, + PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI); + + WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_PI_UPDATE); + + err = hl_poll_timeout( + hdev, + mmCPU_IF_QUEUE_INIT, + status, + (status == PQ_INIT_STATUS_READY_FOR_HOST), + 1000, + cpu_timeout); + + if (err) { + dev_err(hdev->dev, + "Failed to communicate with ARM CPU (ArmCP timeout)\n"); + return -EIO; + } + + gaudi->hw_cap_initialized |= HW_CAP_CPU_Q; + return 0; +} + +static void gaudi_pre_hw_init(struct hl_device *hdev) +{ + /* Perform read from the device to make sure device is up */ + RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG); + + /* + * Let's mark in the H/W that we have reached this point. We check + * this value in the reset_before_init function to understand whether + * we need to reset the chip before doing H/W init. This register is + * cleared by the H/W upon H/W reset + */ + WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY); + + /* Set the access through PCI bars (Linux driver only) as secured */ + WREG32(mmPCIE_WRAP_LBW_PROT_OVR, (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK | + PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK)); + + /* Perform read to flush the waiting writes to ensure configuration + * was set in the device + */ + RREG32(mmPCIE_WRAP_LBW_PROT_OVR); + + if (hdev->axi_drain) { + WREG32(mmPCIE_WRAP_LBW_DRAIN_CFG, + 1 << PCIE_WRAP_LBW_DRAIN_CFG_EN_SHIFT); + WREG32(mmPCIE_WRAP_HBW_DRAIN_CFG, + 1 << PCIE_WRAP_HBW_DRAIN_CFG_EN_SHIFT); + + /* Perform read to flush the DRAIN cfg */ + RREG32(mmPCIE_WRAP_HBW_DRAIN_CFG); + } else { + WREG32(mmPCIE_WRAP_LBW_DRAIN_CFG, 0); + WREG32(mmPCIE_WRAP_HBW_DRAIN_CFG, 0); + + /* Perform read to flush the DRAIN cfg */ + RREG32(mmPCIE_WRAP_HBW_DRAIN_CFG); + } + + /* Configure the reset registers. Must be done as early as possible + * in case we fail during H/W initialization + */ + WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H, + (CFG_RST_H_DMA_MASK | + CFG_RST_H_MME_MASK | + CFG_RST_H_SM_MASK | + CFG_RST_H_TPC_MASK)); + + WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK); + + WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H, + (CFG_RST_H_HBM_MASK | + CFG_RST_H_TPC_MASK | + CFG_RST_H_NIC_MASK | + CFG_RST_H_SM_MASK | + CFG_RST_H_DMA_MASK | + CFG_RST_H_MME_MASK | + CFG_RST_H_CPU_MASK | + CFG_RST_H_MMU_MASK)); + + WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L, + (CFG_RST_L_IF_MASK | + CFG_RST_L_PSOC_MASK | + CFG_RST_L_TPC_MASK)); +} + +static int gaudi_hw_init(struct hl_device *hdev) +{ + int rc; + + dev_info(hdev->dev, "Starting initialization of H/W\n"); + + gaudi_pre_hw_init(hdev); + + gaudi_init_pci_dma_qmans(hdev); + + gaudi_init_hbm_dma_qmans(hdev); + + /* + * Before pushing u-boot/linux to device, need to set the hbm bar to + * base address of dram + */ + if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) { + dev_err(hdev->dev, + "failed to map HBM bar to DRAM base address\n"); + return -EIO; + } + + rc = gaudi_init_cpu(hdev); + if (rc) { + dev_err(hdev->dev, "failed to initialize CPU\n"); + return rc; + } + + /* SRAM scrambler must be initialized after CPU is running from HBM */ + gaudi_init_scrambler_sram(hdev); + + /* This is here just in case we are working without CPU */ + gaudi_init_scrambler_hbm(hdev); + + gaudi_init_golden_registers(hdev); + + rc = gaudi_mmu_init(hdev); + if (rc) + return rc; + + gaudi_init_mme_qmans(hdev); + + gaudi_init_tpc_qmans(hdev); + + gaudi_enable_clock_gating(hdev); + + gaudi_enable_timestamp(hdev); + + /* MSI must be enabled before CPU queues are initialized */ + rc = gaudi_enable_msi(hdev); + if (rc) + goto disable_queues; + + /* must be called after MSI was enabled */ + rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC); + if (rc) { + dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n", + rc); + goto disable_msi; + } + + /* Perform read from the device to flush all configuration */ + RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG); + + return 0; + +disable_msi: + gaudi_disable_msi(hdev); +disable_queues: + gaudi_disable_mme_qmans(hdev); + gaudi_disable_pci_dma_qmans(hdev); + + return rc; +} + +static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset) +{ + struct gaudi_device *gaudi = hdev->asic_specific; + u32 status, reset_timeout_ms, boot_strap = 0; + + if (hdev->pldm) { + if (hard_reset) + reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC; + else + reset_timeout_ms = GAUDI_PLDM_SRESET_TIMEOUT_MSEC; + } else { + reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC; + } + + if (hard_reset) { + /* Tell ASIC not to re-initialize PCIe */ + WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC); + + boot_strap = RREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS); + /* H/W bug WA: + * rdata[31:0] = strap_read_val; + * wdata[31:0] = rdata[30:21],1'b0,rdata[20:0] + */ + boot_strap = (((boot_strap & 0x7FE00000) << 1) | + (boot_strap & 0x001FFFFF)); + WREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS, boot_strap & ~0x2); + + /* Restart BTL/BLR upon hard-reset */ + WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1); + + WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST, + 1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT); + dev_info(hdev->dev, + "Issued HARD reset command, going to wait %dms\n", + reset_timeout_ms); + } else { + /* Don't restart BTL/BLR upon soft-reset */ + WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 0); + + WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST, + 1 << PSOC_GLOBAL_CONF_SOFT_RST_IND_SHIFT); + dev_info(hdev->dev, + "Issued SOFT reset command, going to wait %dms\n", + reset_timeout_ms); + } + + /* + * After hard reset, we can't poll the BTM_FSM register because the PSOC + * itself is in reset. Need to wait until the reset is deasserted + */ + msleep(reset_timeout_ms); + + status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM); + if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK) + dev_err(hdev->dev, + "Timeout while waiting for device to reset 0x%x\n", + status); + + if (!hard_reset) { + gaudi->hw_cap_initialized &= ~(HW_CAP_PCI_DMA | HW_CAP_MME | + HW_CAP_TPC_MASK | + HW_CAP_HBM_DMA); + + WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, + GAUDI_EVENT_SOFT_RESET); + return; + } + + /* We continue here only for hard-reset */ + + WREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS, boot_strap); + + gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q | + HW_CAP_HBM | HW_CAP_PCI_DMA | + HW_CAP_MME | HW_CAP_TPC_MASK | + HW_CAP_HBM_DMA | HW_CAP_PLL | + HW_CAP_MMU | + HW_CAP_SRAM_SCRAMBLER | + HW_CAP_HBM_SCRAMBLER); + memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat)); +} + +static int gaudi_suspend(struct hl_device *hdev) +{ + int rc; + + rc = hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS); + if (rc) + dev_err(hdev->dev, "Failed to disable PCI access from CPU\n"); + + return rc; +} + +static int gaudi_resume(struct hl_device *hdev) +{ + return gaudi_init_iatu(hdev); +} + +static int gaudi_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma, + u64 kaddress, phys_addr_t paddress, u32 size) +{ + int rc; + + vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | + VM_DONTCOPY | VM_NORESERVE; + + rc = remap_pfn_range(vma, vma->vm_start, paddress >> PAGE_SHIFT, + size, vma->vm_page_prot); + if (rc) + dev_err(hdev->dev, "remap_pfn_range error %d", rc); + + return rc; +} + +static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi) +{ + struct gaudi_device *gaudi = hdev->asic_specific; + u32 db_reg_offset, db_value, dma_qm_offset, q_off; + int dma_id; + bool invalid_queue = false; + + switch (hw_queue_id) { + case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3: + dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1]; + dma_qm_offset = dma_id * DMA_QMAN_OFFSET; + q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4; + db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; + break; + + case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3: + dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2]; + dma_qm_offset = dma_id * DMA_QMAN_OFFSET; + q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4; + db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; + break; + + case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3: + dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1]; + dma_qm_offset = dma_id * DMA_QMAN_OFFSET; + q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4; + db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; + break; + + case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3: + dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2]; + dma_qm_offset = dma_id * DMA_QMAN_OFFSET; + q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4; + db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; + break; + + case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3: + dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3]; + dma_qm_offset = dma_id * DMA_QMAN_OFFSET; + q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4; + db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; + break; + + case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3: + dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_3]; + dma_qm_offset = dma_id * DMA_QMAN_OFFSET; + q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4; + db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; + break; + + case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3: + dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4]; + dma_qm_offset = dma_id * DMA_QMAN_OFFSET; + q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4; + db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; + break; + + case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3: + dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5]; + dma_qm_offset = dma_id * DMA_QMAN_OFFSET; + q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4; + db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; + break; + + case GAUDI_QUEUE_ID_CPU_PQ: + if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q) + db_reg_offset = mmCPU_IF_PF_PQ_PI; + else + invalid_queue = true; + break; + + case GAUDI_QUEUE_ID_MME_0_0: + db_reg_offset = mmMME2_QM_PQ_PI_0; + break; + + case GAUDI_QUEUE_ID_MME_0_1: + db_reg_offset = mmMME2_QM_PQ_PI_1; + break; + + case GAUDI_QUEUE_ID_MME_0_2: + db_reg_offset = mmMME2_QM_PQ_PI_2; + break; + + case GAUDI_QUEUE_ID_MME_0_3: + db_reg_offset = mmMME2_QM_PQ_PI_3; + break; + + case GAUDI_QUEUE_ID_MME_1_0: + db_reg_offset = mmMME0_QM_PQ_PI_0; + break; + + case GAUDI_QUEUE_ID_MME_1_1: + db_reg_offset = mmMME0_QM_PQ_PI_1; + break; + + case GAUDI_QUEUE_ID_MME_1_2: + db_reg_offset = mmMME0_QM_PQ_PI_2; + break; + + case GAUDI_QUEUE_ID_MME_1_3: + db_reg_offset = mmMME0_QM_PQ_PI_3; + break; + + case GAUDI_QUEUE_ID_TPC_0_0: + db_reg_offset = mmTPC0_QM_PQ_PI_0; + break; + + case GAUDI_QUEUE_ID_TPC_0_1: + db_reg_offset = mmTPC0_QM_PQ_PI_1; + break; + + case GAUDI_QUEUE_ID_TPC_0_2: + db_reg_offset = mmTPC0_QM_PQ_PI_2; + break; + + case GAUDI_QUEUE_ID_TPC_0_3: + db_reg_offset = mmTPC0_QM_PQ_PI_3; + break; + + case GAUDI_QUEUE_ID_TPC_1_0: + db_reg_offset = mmTPC1_QM_PQ_PI_0; + break; + + case GAUDI_QUEUE_ID_TPC_1_1: + db_reg_offset = mmTPC1_QM_PQ_PI_1; + break; + + case GAUDI_QUEUE_ID_TPC_1_2: + db_reg_offset = mmTPC1_QM_PQ_PI_2; + break; + + case GAUDI_QUEUE_ID_TPC_1_3: + db_reg_offset = mmTPC1_QM_PQ_PI_3; + break; + + case GAUDI_QUEUE_ID_TPC_2_0: + db_reg_offset = mmTPC2_QM_PQ_PI_0; + break; + + case GAUDI_QUEUE_ID_TPC_2_1: + db_reg_offset = mmTPC2_QM_PQ_PI_1; + break; + + case GAUDI_QUEUE_ID_TPC_2_2: + db_reg_offset = mmTPC2_QM_PQ_PI_2; + break; + + case GAUDI_QUEUE_ID_TPC_2_3: + db_reg_offset = mmTPC2_QM_PQ_PI_3; + break; + + case GAUDI_QUEUE_ID_TPC_3_0: + db_reg_offset = mmTPC3_QM_PQ_PI_0; + break; + + case GAUDI_QUEUE_ID_TPC_3_1: + db_reg_offset = mmTPC3_QM_PQ_PI_1; + break; + + case GAUDI_QUEUE_ID_TPC_3_2: + db_reg_offset = mmTPC3_QM_PQ_PI_2; + break; + + case GAUDI_QUEUE_ID_TPC_3_3: + db_reg_offset = mmTPC3_QM_PQ_PI_3; + break; + + case GAUDI_QUEUE_ID_TPC_4_0: + db_reg_offset = mmTPC4_QM_PQ_PI_0; + break; + + case GAUDI_QUEUE_ID_TPC_4_1: + db_reg_offset = mmTPC4_QM_PQ_PI_1; + break; + + case GAUDI_QUEUE_ID_TPC_4_2: + db_reg_offset = mmTPC4_QM_PQ_PI_2; + break; + + case GAUDI_QUEUE_ID_TPC_4_3: + db_reg_offset = mmTPC4_QM_PQ_PI_3; + break; + + case GAUDI_QUEUE_ID_TPC_5_0: + db_reg_offset = mmTPC5_QM_PQ_PI_0; + break; + + case GAUDI_QUEUE_ID_TPC_5_1: + db_reg_offset = mmTPC5_QM_PQ_PI_1; + break; + + case GAUDI_QUEUE_ID_TPC_5_2: + db_reg_offset = mmTPC5_QM_PQ_PI_2; + break; + + case GAUDI_QUEUE_ID_TPC_5_3: + db_reg_offset = mmTPC5_QM_PQ_PI_3; + break; + + case GAUDI_QUEUE_ID_TPC_6_0: + db_reg_offset = mmTPC6_QM_PQ_PI_0; + break; + + case GAUDI_QUEUE_ID_TPC_6_1: + db_reg_offset = mmTPC6_QM_PQ_PI_1; + break; + + case GAUDI_QUEUE_ID_TPC_6_2: + db_reg_offset = mmTPC6_QM_PQ_PI_2; + break; + + case GAUDI_QUEUE_ID_TPC_6_3: + db_reg_offset = mmTPC6_QM_PQ_PI_3; + break; + + case GAUDI_QUEUE_ID_TPC_7_0: + db_reg_offset = mmTPC7_QM_PQ_PI_0; + break; + + case GAUDI_QUEUE_ID_TPC_7_1: + db_reg_offset = mmTPC7_QM_PQ_PI_1; + break; + + case GAUDI_QUEUE_ID_TPC_7_2: + db_reg_offset = mmTPC7_QM_PQ_PI_2; + break; + + case GAUDI_QUEUE_ID_TPC_7_3: + db_reg_offset = mmTPC7_QM_PQ_PI_3; + break; + + default: + invalid_queue = true; + } + + if (invalid_queue) { + /* Should never get here */ + dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n", + hw_queue_id); + return; + } + + db_value = pi; + + /* ring the doorbell */ + WREG32(db_reg_offset, db_value); + + if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) + WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, + GAUDI_EVENT_PI_UPDATE); +} + +static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe, + struct hl_bd *bd) +{ + __le64 *pbd = (__le64 *) bd; + + /* The QMANs are on the host memory so a simple copy suffice */ + pqe[0] = pbd[0]; + pqe[1] = pbd[1]; +} + +static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size, + dma_addr_t *dma_handle, gfp_t flags) +{ + void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size, + dma_handle, flags); + + /* Shift to the device's base physical address of host memory */ + if (kernel_addr) + *dma_handle += HOST_PHYS_BASE; + + return kernel_addr; +} + +static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size, + void *cpu_addr, dma_addr_t dma_handle) +{ + /* Cancel the device's base physical address of host memory */ + dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE; + + dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle); +} + +static void *gaudi_get_int_queue_base(struct hl_device *hdev, + u32 queue_id, dma_addr_t *dma_handle, + u16 *queue_len) +{ + struct gaudi_device *gaudi = hdev->asic_specific; + struct gaudi_internal_qman_info *q; + + if (queue_id >= GAUDI_QUEUE_ID_SIZE || + gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) { + dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id); + return NULL; + } + + q = &gaudi->internal_qmans[queue_id]; + *dma_handle = q->pq_dma_addr; + *queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE; + + return q->pq_kernel_addr; +} + +static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg, + u16 len, u32 timeout, long *result) +{ + struct gaudi_device *gaudi = hdev->asic_specific; + + if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) { + if (result) + *result = 0; + return 0; + } + + return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len, + timeout, result); +} + +static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id) +{ + struct packet_msg_prot *fence_pkt; + dma_addr_t pkt_dma_addr; + u32 fence_val, tmp, timeout_usec; + dma_addr_t fence_dma_addr; + u32 *fence_ptr; + int rc; + + if (hdev->pldm) + timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC; + else + timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC; + + fence_val = GAUDI_QMAN0_FENCE_VAL; + + fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, + &fence_dma_addr); + if (!fence_ptr) { + dev_err(hdev->dev, + "Failed to allocate memory for queue testing\n"); + return -ENOMEM; + } + + *fence_ptr = 0; + + fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, + sizeof(struct packet_msg_prot), + GFP_KERNEL, &pkt_dma_addr); + if (!fence_pkt) { + dev_err(hdev->dev, + "Failed to allocate packet for queue testing\n"); + rc = -ENOMEM; + goto free_fence_ptr; + } + + tmp = (PACKET_MSG_PROT << GAUDI_PKT_CTL_OPCODE_SHIFT) | + (1 << GAUDI_PKT_CTL_EB_SHIFT) | + (1 << GAUDI_PKT_CTL_MB_SHIFT); + fence_pkt->ctl = cpu_to_le32(tmp); + fence_pkt->value = cpu_to_le32(fence_val); + fence_pkt->addr = cpu_to_le64(fence_dma_addr); + + rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, + sizeof(struct packet_msg_prot), + pkt_dma_addr); + if (rc) { + dev_err(hdev->dev, + "Failed to send fence packet\n"); + goto free_pkt; + } + + rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val), + 1000, timeout_usec, true); + + hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id); + + if (rc == -ETIMEDOUT) { + dev_err(hdev->dev, + "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n", + hw_queue_id, (unsigned long long) fence_dma_addr, tmp); + rc = -EIO; + } + +free_pkt: + hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt, + pkt_dma_addr); +free_fence_ptr: + hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr, + fence_dma_addr); + return rc; +} + +static int gaudi_test_cpu_queue(struct hl_device *hdev) +{ + struct gaudi_device *gaudi = hdev->asic_specific; + + /* + * check capability here as send_cpu_message() won't update the result + * value if no capability + */ + if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) + return 0; + + return hl_fw_test_cpu_queue(hdev); +} + +static int gaudi_test_queues(struct hl_device *hdev) +{ + int i, rc, ret_val = 0; + + for (i = 0 ; i < HL_MAX_QUEUES ; i++) { + if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) { + rc = gaudi_test_queue(hdev, i); + if (rc) + ret_val = -EINVAL; + } + } + + rc = gaudi_test_cpu_queue(hdev); + if (rc) + ret_val = -EINVAL; + + return ret_val; +} + +static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size, + gfp_t mem_flags, dma_addr_t *dma_handle) +{ + void *kernel_addr; + + if (size > GAUDI_DMA_POOL_BLK_SIZE) + return NULL; + + kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle); + + /* Shift to the device's base physical address of host memory */ + if (kernel_addr) + *dma_handle += HOST_PHYS_BASE; + + return kernel_addr; +} + +static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr, + dma_addr_t dma_addr) +{ + /* Cancel the device's base physical address of host memory */ + dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE; + + dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr); +} + +static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, + size_t size, dma_addr_t *dma_handle) +{ + return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle); +} + +static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev, + size_t size, void *vaddr) +{ + hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr); +} + +static int gaudi_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl, + int nents, enum dma_data_direction dir) +{ + struct scatterlist *sg; + int i; + + if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir)) + return -ENOMEM; + + /* Shift to the device's base physical address of host memory */ + for_each_sg(sgl, sg, nents, i) + sg->dma_address += HOST_PHYS_BASE; + + return 0; +} + +static void gaudi_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl, + int nents, enum dma_data_direction dir) +{ + struct scatterlist *sg; + int i; + + /* Cancel the device's base physical address of host memory */ + for_each_sg(sgl, sg, nents, i) + sg->dma_address -= HOST_PHYS_BASE; + + dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir); +} + +static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev, + struct sg_table *sgt) +{ + struct scatterlist *sg, *sg_next_iter; + u32 count, dma_desc_cnt; + u64 len, len_next; + dma_addr_t addr, addr_next; + + dma_desc_cnt = 0; + + for_each_sg(sgt->sgl, sg, sgt->nents, count) { + + len = sg_dma_len(sg); + addr = sg_dma_address(sg); + + if (len == 0) + break; + + while ((count + 1) < sgt->nents) { + sg_next_iter = sg_next(sg); + len_next = sg_dma_len(sg_next_iter); + addr_next = sg_dma_address(sg_next_iter); + + if (len_next == 0) + break; + + if ((addr + len == addr_next) && + (len + len_next <= DMA_MAX_TRANSFER_SIZE)) { + len += len_next; + count++; + sg = sg_next_iter; + } else { + break; + } + } + + dma_desc_cnt++; + } + + return dma_desc_cnt * sizeof(struct packet_lin_dma); +} + +static int gaudi_pin_memory_before_cs(struct hl_device *hdev, + struct hl_cs_parser *parser, + struct packet_lin_dma *user_dma_pkt, + u64 addr, enum dma_data_direction dir) +{ + struct hl_userptr *userptr; + int rc; + + if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize), + parser->job_userptr_list, &userptr)) + goto already_pinned; + + userptr = kzalloc(sizeof(*userptr), GFP_ATOMIC); + if (!userptr) + return -ENOMEM; + + rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize), + userptr); + if (rc) + goto free_userptr; + + list_add_tail(&userptr->job_node, parser->job_userptr_list); + + rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl, + userptr->sgt->nents, dir); + if (rc) { + dev_err(hdev->dev, "failed to map sgt with DMA region\n"); + goto unpin_memory; + } + + userptr->dma_mapped = true; + userptr->dir = dir; + +already_pinned: + parser->patched_cb_size += + gaudi_get_dma_desc_list_size(hdev, userptr->sgt); + + return 0; + +unpin_memory: + hl_unpin_host_memory(hdev, userptr); +free_userptr: + kfree(userptr); + return rc; +} + +static int gaudi_validate_dma_pkt_host(struct hl_device *hdev, + struct hl_cs_parser *parser, + struct packet_lin_dma *user_dma_pkt, + bool src_in_host) +{ + enum dma_data_direction dir; + bool skip_host_mem_pin = false, user_memset; + u64 addr; + int rc = 0; + + user_memset = (le32_to_cpu(user_dma_pkt->ctl) & + GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >> + GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT; + + if (src_in_host) { + if (user_memset) + skip_host_mem_pin = true; + + dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n"); + dir = DMA_TO_DEVICE; + addr = le64_to_cpu(user_dma_pkt->src_addr); + } else { + dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n"); + dir = DMA_FROM_DEVICE; + addr = (le64_to_cpu(user_dma_pkt->dst_addr) & + GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >> + GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT; + } + + if (skip_host_mem_pin) + parser->patched_cb_size += sizeof(*user_dma_pkt); + else + rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt, + addr, dir); + + return rc; +} + +static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev, + struct hl_cs_parser *parser, + struct packet_lin_dma *user_dma_pkt) +{ + bool src_in_host = false; + u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) & + GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >> + GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT; + + dev_dbg(hdev->dev, "DMA packet details:\n"); + dev_dbg(hdev->dev, "source == 0x%llx\n", + le64_to_cpu(user_dma_pkt->src_addr)); + dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr); + dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize)); + + /* + * Special handling for DMA with size 0. Bypass all validations + * because no transactions will be done except for WR_COMP, which + * is not a security issue + */ + if (!le32_to_cpu(user_dma_pkt->tsize)) { + parser->patched_cb_size += sizeof(*user_dma_pkt); + return 0; + } + + if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3) + src_in_host = true; + + return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt, + src_in_host); +} + +static int gaudi_validate_cb(struct hl_device *hdev, + struct hl_cs_parser *parser, bool is_mmu) +{ + u32 cb_parsed_length = 0; + int rc = 0; + + parser->patched_cb_size = 0; + + /* cb_user_size is more than 0 so loop will always be executed */ + while (cb_parsed_length < parser->user_cb_size) { + enum packet_id pkt_id; + u16 pkt_size; + struct gaudi_packet *user_pkt; + + user_pkt = (struct gaudi_packet *) (uintptr_t) + (parser->user_cb->kernel_address + cb_parsed_length); + + pkt_id = (enum packet_id) ( + (le64_to_cpu(user_pkt->header) & + PACKET_HEADER_PACKET_ID_MASK) >> + PACKET_HEADER_PACKET_ID_SHIFT); + + pkt_size = gaudi_packet_sizes[pkt_id]; + cb_parsed_length += pkt_size; + if (cb_parsed_length > parser->user_cb_size) { + dev_err(hdev->dev, + "packet 0x%x is out of CB boundary\n", pkt_id); + rc = -EINVAL; + break; + } + + switch (pkt_id) { + case PACKET_MSG_PROT: + dev_err(hdev->dev, + "User not allowed to use MSG_PROT\n"); + rc = -EPERM; + break; + + case PACKET_CP_DMA: + dev_err(hdev->dev, "User not allowed to use CP_DMA\n"); + rc = -EPERM; + break; + + case PACKET_STOP: + dev_err(hdev->dev, "User not allowed to use STOP\n"); + rc = -EPERM; + break; + + case PACKET_LIN_DMA: + parser->contains_dma_pkt = true; + if (is_mmu) + parser->patched_cb_size += pkt_size; + else + rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser, + (struct packet_lin_dma *) user_pkt); + break; + + case PACKET_WREG_32: + case PACKET_WREG_BULK: + case PACKET_MSG_LONG: + case PACKET_MSG_SHORT: + case PACKET_REPEAT: + case PACKET_FENCE: + case PACKET_NOP: + case PACKET_ARB_POINT: + case PACKET_LOAD_AND_EXE: + parser->patched_cb_size += pkt_size; + break; + + default: + dev_err(hdev->dev, "Invalid packet header 0x%x\n", + pkt_id); + rc = -EINVAL; + break; + } + + if (rc) + break; + } + + /* + * The new CB should have space at the end for two MSG_PROT packets: + * 1. A packet that will act as a completion packet + * 2. A packet that will generate MSI-X interrupt + */ + parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2; + + return rc; +} + +static int gaudi_patch_dma_packet(struct hl_device *hdev, + struct hl_cs_parser *parser, + struct packet_lin_dma *user_dma_pkt, + struct packet_lin_dma *new_dma_pkt, + u32 *new_dma_pkt_size) +{ + struct hl_userptr *userptr; + struct scatterlist *sg, *sg_next_iter; + u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl; + u64 len, len_next; + dma_addr_t dma_addr, dma_addr_next; + u64 device_memory_addr, addr; + enum dma_data_direction dir; + struct sg_table *sgt; + bool src_in_host = false; + bool skip_host_mem_pin = false; + bool user_memset; + + ctl = le32_to_cpu(user_dma_pkt->ctl); + + if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3) + src_in_host = true; + + user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >> + GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT; + + if (src_in_host) { + addr = le64_to_cpu(user_dma_pkt->src_addr); + device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr); + dir = DMA_TO_DEVICE; + if (user_memset) + skip_host_mem_pin = true; + } else { + addr = le64_to_cpu(user_dma_pkt->dst_addr); + device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr); + dir = DMA_FROM_DEVICE; + } + + if ((!skip_host_mem_pin) && + (!hl_userptr_is_pinned(hdev, addr, + le32_to_cpu(user_dma_pkt->tsize), + parser->job_userptr_list, &userptr))) { + dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n", + addr, user_dma_pkt->tsize); + return -EFAULT; + } + + if ((user_memset) && (dir == DMA_TO_DEVICE)) { + memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt)); + *new_dma_pkt_size = sizeof(*user_dma_pkt); + return 0; + } + + user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK; + + sgt = userptr->sgt; + dma_desc_cnt = 0; + + for_each_sg(sgt->sgl, sg, sgt->nents, count) { + len = sg_dma_len(sg); + dma_addr = sg_dma_address(sg); + + if (len == 0) + break; + + while ((count + 1) < sgt->nents) { + sg_next_iter = sg_next(sg); + len_next = sg_dma_len(sg_next_iter); + dma_addr_next = sg_dma_address(sg_next_iter); + + if (len_next == 0) + break; + + if ((dma_addr + len == dma_addr_next) && + (len + len_next <= DMA_MAX_TRANSFER_SIZE)) { + len += len_next; + count++; + sg = sg_next_iter; + } else { + break; + } + } + + new_dma_pkt->ctl = user_dma_pkt->ctl; + + ctl = le32_to_cpu(user_dma_pkt->ctl); + if (likely(dma_desc_cnt)) + ctl &= ~GAUDI_PKT_CTL_EB_MASK; + ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK; + new_dma_pkt->ctl = cpu_to_le32(ctl); + new_dma_pkt->tsize = cpu_to_le32(len); + + if (dir == DMA_TO_DEVICE) { + new_dma_pkt->src_addr = cpu_to_le64(dma_addr); + new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr); + } else { + new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr); + new_dma_pkt->dst_addr = cpu_to_le64(dma_addr); + } + + if (!user_memset) + device_memory_addr += len; + dma_desc_cnt++; + new_dma_pkt++; + } + + if (!dma_desc_cnt) { + dev_err(hdev->dev, + "Error of 0 SG entries when patching DMA packet\n"); + return -EFAULT; + } + + /* Fix the last dma packet - wrcomp must be as user set it */ + new_dma_pkt--; + new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask); + + *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma); + + return 0; +} + +static int gaudi_patch_cb(struct hl_device *hdev, + struct hl_cs_parser *parser) +{ + u32 cb_parsed_length = 0; + u32 cb_patched_cur_length = 0; + int rc = 0; + + /* cb_user_size is more than 0 so loop will always be executed */ + while (cb_parsed_length < parser->user_cb_size) { + enum packet_id pkt_id; + u16 pkt_size; + u32 new_pkt_size = 0; + struct gaudi_packet *user_pkt, *kernel_pkt; + + user_pkt = (struct gaudi_packet *) (uintptr_t) + (parser->user_cb->kernel_address + cb_parsed_length); + kernel_pkt = (struct gaudi_packet *) (uintptr_t) + (parser->patched_cb->kernel_address + + cb_patched_cur_length); + + pkt_id = (enum packet_id) ( + (le64_to_cpu(user_pkt->header) & + PACKET_HEADER_PACKET_ID_MASK) >> + PACKET_HEADER_PACKET_ID_SHIFT); + + pkt_size = gaudi_packet_sizes[pkt_id]; + cb_parsed_length += pkt_size; + if (cb_parsed_length > parser->user_cb_size) { + dev_err(hdev->dev, + "packet 0x%x is out of CB boundary\n", pkt_id); + rc = -EINVAL; + break; + } + + switch (pkt_id) { + case PACKET_LIN_DMA: + rc = gaudi_patch_dma_packet(hdev, parser, + (struct packet_lin_dma *) user_pkt, + (struct packet_lin_dma *) kernel_pkt, + &new_pkt_size); + cb_patched_cur_length += new_pkt_size; + break; + + case PACKET_MSG_PROT: + dev_err(hdev->dev, + "User not allowed to use MSG_PROT\n"); + rc = -EPERM; + break; + + case PACKET_CP_DMA: + dev_err(hdev->dev, "User not allowed to use CP_DMA\n"); + rc = -EPERM; + break; + + case PACKET_STOP: + dev_err(hdev->dev, "User not allowed to use STOP\n"); + rc = -EPERM; + break; + + case PACKET_WREG_32: + case PACKET_WREG_BULK: + case PACKET_MSG_LONG: + case PACKET_MSG_SHORT: + case PACKET_REPEAT: + case PACKET_FENCE: + case PACKET_NOP: + case PACKET_ARB_POINT: + case PACKET_LOAD_AND_EXE: + memcpy(kernel_pkt, user_pkt, pkt_size); + cb_patched_cur_length += pkt_size; + break; + + default: + dev_err(hdev->dev, "Invalid packet header 0x%x\n", + pkt_id); + rc = -EINVAL; + break; + } + + if (rc) + break; + } + + return rc; +} + +static int gaudi_parse_cb_mmu(struct hl_device *hdev, + struct hl_cs_parser *parser) +{ + u64 patched_cb_handle; + u32 patched_cb_size; + struct hl_cb *user_cb; + int rc; + + /* + * The new CB should have space at the end for two MSG_PROT pkt: + * 1. A packet that will act as a completion packet + * 2. A packet that will generate MSI interrupt + */ + parser->patched_cb_size = parser->user_cb_size + + sizeof(struct packet_msg_prot) * 2; + + rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, + parser->patched_cb_size, + &patched_cb_handle, HL_KERNEL_ASID_ID); + + if (rc) { + dev_err(hdev->dev, + "Failed to allocate patched CB for DMA CS %d\n", + rc); + return rc; + } + + patched_cb_handle >>= PAGE_SHIFT; + parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr, + (u32) patched_cb_handle); + /* hl_cb_get should never fail here so use kernel WARN */ + WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n", + (u32) patched_cb_handle); + if (!parser->patched_cb) { + rc = -EFAULT; + goto out; + } + + /* + * The check that parser->user_cb_size <= parser->user_cb->size was done + * in validate_queue_index(). + */ + memcpy((void *) (uintptr_t) parser->patched_cb->kernel_address, + (void *) (uintptr_t) parser->user_cb->kernel_address, + parser->user_cb_size); + + patched_cb_size = parser->patched_cb_size; + + /* Validate patched CB instead of user CB */ + user_cb = parser->user_cb; + parser->user_cb = parser->patched_cb; + rc = gaudi_validate_cb(hdev, parser, true); + parser->user_cb = user_cb; + + if (rc) { + hl_cb_put(parser->patched_cb); + goto out; + } + + if (patched_cb_size != parser->patched_cb_size) { + dev_err(hdev->dev, "user CB size mismatch\n"); + hl_cb_put(parser->patched_cb); + rc = -EINVAL; + goto out; + } + +out: + /* + * Always call cb destroy here because we still have 1 reference + * to it by calling cb_get earlier. After the job will be completed, + * cb_put will release it, but here we want to remove it from the + * idr + */ + hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, + patched_cb_handle << PAGE_SHIFT); + + return rc; +} + +static int gaudi_parse_cb_no_mmu(struct hl_device *hdev, + struct hl_cs_parser *parser) +{ + u64 patched_cb_handle; + int rc; + + rc = gaudi_validate_cb(hdev, parser, false); + + if (rc) + goto free_userptr; + + rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, + parser->patched_cb_size, + &patched_cb_handle, HL_KERNEL_ASID_ID); + if (rc) { + dev_err(hdev->dev, + "Failed to allocate patched CB for DMA CS %d\n", rc); + goto free_userptr; + } + + patched_cb_handle >>= PAGE_SHIFT; + parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr, + (u32) patched_cb_handle); + /* hl_cb_get should never fail here so use kernel WARN */ + WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n", + (u32) patched_cb_handle); + if (!parser->patched_cb) { + rc = -EFAULT; + goto out; + } + + rc = gaudi_patch_cb(hdev, parser); + + if (rc) + hl_cb_put(parser->patched_cb); + +out: + /* + * Always call cb destroy here because we still have 1 reference + * to it by calling cb_get earlier. After the job will be completed, + * cb_put will release it, but here we want to remove it from the + * idr + */ + hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, + patched_cb_handle << PAGE_SHIFT); + +free_userptr: + if (rc) + hl_userptr_delete_list(hdev, parser->job_userptr_list); + return rc; +} + +static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev, + struct hl_cs_parser *parser) +{ + struct asic_fixed_properties *asic_prop = &hdev->asic_prop; + + /* For internal queue jobs just check if CB address is valid */ + if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb, + parser->user_cb_size, + asic_prop->sram_user_base_address, + asic_prop->sram_end_address)) + return 0; + + if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb, + parser->user_cb_size, + asic_prop->dram_user_base_address, + asic_prop->dram_end_address)) + return 0; + + /* PMMU and HPMMU addresses are equal, check only one of them */ + if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb, + parser->user_cb_size, + asic_prop->pmmu.start_addr, + asic_prop->pmmu.end_addr)) + return 0; + + dev_err(hdev->dev, + "CB address 0x%px + 0x%x for internal QMAN is not valid\n", + parser->user_cb, parser->user_cb_size); + + return -EFAULT; +} + +static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser) +{ + struct gaudi_device *gaudi = hdev->asic_specific; + + if (parser->queue_type == QUEUE_TYPE_INT) + return gaudi_parse_cb_no_ext_queue(hdev, parser); + + if (gaudi->hw_cap_initialized & HW_CAP_MMU) + return gaudi_parse_cb_mmu(hdev, parser); + else + return gaudi_parse_cb_no_mmu(hdev, parser); +} + +static void gaudi_add_end_of_cb_packets(struct hl_device *hdev, + u64 kernel_address, u32 len, + u64 cq_addr, u32 cq_val, u32 msi_vec, + bool eb) +{ + struct gaudi_device *gaudi = hdev->asic_specific; + struct packet_msg_prot *cq_pkt; + u32 tmp; + + cq_pkt = (struct packet_msg_prot *) (uintptr_t) + (kernel_address + len - (sizeof(struct packet_msg_prot) * 2)); + + tmp = (PACKET_MSG_PROT << GAUDI_PKT_CTL_OPCODE_SHIFT) | + (1 << GAUDI_PKT_CTL_MB_SHIFT); + + if (eb) + tmp |= (1 << GAUDI_PKT_CTL_EB_SHIFT); + + cq_pkt->ctl = cpu_to_le32(tmp); + cq_pkt->value = cpu_to_le32(cq_val); + cq_pkt->addr = cpu_to_le64(cq_addr); + + cq_pkt++; + + tmp = (PACKET_MSG_PROT << GAUDI_PKT_CTL_OPCODE_SHIFT) | + (1 << GAUDI_PKT_CTL_MB_SHIFT); + cq_pkt->ctl = cpu_to_le32(tmp); + cq_pkt->value = cpu_to_le32(1); + + if (!gaudi->multi_msi_mode) + msi_vec = 0; + + cq_pkt->addr = cpu_to_le64(CFG_BASE + mmPCIE_MSI_INTR_0 + msi_vec * 4); +} + +static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val) +{ + WREG32(mmCPU_IF_EQ_RD_OFFS, val); +} + +static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr, + u32 size, u64 val) +{ + struct packet_lin_dma *lin_dma_pkt; + struct hl_cs_job *job; + u32 cb_size, ctl; + struct hl_cb *cb; + int rc; + + cb = hl_cb_kernel_create(hdev, PAGE_SIZE); + if (!cb) + return -EFAULT; + + lin_dma_pkt = (struct packet_lin_dma *) (uintptr_t) cb->kernel_address; + memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt)); + cb_size = sizeof(*lin_dma_pkt); + + ctl = ((PACKET_LIN_DMA << GAUDI_PKT_CTL_OPCODE_SHIFT) | + (1 << GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT) | + (1 << GAUDI_PKT_LIN_DMA_CTL_LIN_SHIFT) | + (1 << GAUDI_PKT_CTL_RB_SHIFT) | + (1 << GAUDI_PKT_CTL_MB_SHIFT)); + lin_dma_pkt->ctl = cpu_to_le32(ctl); + lin_dma_pkt->src_addr = cpu_to_le64(val); + lin_dma_pkt->dst_addr |= cpu_to_le64(addr); + lin_dma_pkt->tsize = cpu_to_le32(size); + + job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true); + if (!job) { + dev_err(hdev->dev, "Failed to allocate a new job\n"); + rc = -ENOMEM; + goto release_cb; + } + + job->id = 0; + job->user_cb = cb; + job->user_cb->cs_cnt++; + job->user_cb_size = cb_size; + job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0; + job->patched_cb = job->user_cb; + job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot); + + hl_debugfs_add_job(hdev, job); + + rc = gaudi_send_job_on_qman0(hdev, job); + + hl_debugfs_remove_job(hdev, job); + kfree(job); + cb->cs_cnt--; + +release_cb: + hl_cb_put(cb); + hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT); + + return rc; +} + +static void gaudi_restore_sm_registers(struct hl_device *hdev) +{ + int i; + + for (i = 0 ; i < NUM_OF_SOB_IN_BLOCK << 2 ; i += 4) { + WREG32(mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0); + WREG32(mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0); + WREG32(mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0); + } + + for (i = 0 ; i < NUM_OF_MONITORS_IN_BLOCK << 2 ; i += 4) { + WREG32(mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0); + WREG32(mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0); + WREG32(mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0); + } + + i = GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4; + + for (; i < NUM_OF_SOB_IN_BLOCK << 2 ; i += 4) + WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0); + + i = GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4; + + for (; i < NUM_OF_MONITORS_IN_BLOCK << 2 ; i += 4) + WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0); +} + +static void gaudi_restore_dma_registers(struct hl_device *hdev) +{ + u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 - + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0; + int i; + + for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) { + u64 sob_addr = CFG_BASE + + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 + + (i * sob_delta); + u32 dma_offset = i * DMA_CORE_OFFSET; + + WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset, + lower_32_bits(sob_addr)); + WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset, + upper_32_bits(sob_addr)); + WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001); + + /* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be + * modified by the user for SRAM reduction + */ + if (i > 1) + WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset, + 0x00000001); + } +} + +static void gaudi_restore_qm_registers(struct hl_device *hdev) +{ + u32 qman_offset; + int i; + + for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) { + qman_offset = i * DMA_QMAN_OFFSET; + WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0); + } + + for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) { + qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE); + WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0); + } + + for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) { + qman_offset = i * TPC_QMAN_OFFSET; + WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0); + } +} + +static void gaudi_restore_user_registers(struct hl_device *hdev) +{ + gaudi_restore_sm_registers(hdev); + gaudi_restore_dma_registers(hdev); + gaudi_restore_qm_registers(hdev); +} + +static int gaudi_context_switch(struct hl_device *hdev, u32 asid) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + u64 addr = prop->sram_user_base_address; + u32 size = hdev->pldm ? 0x10000 : + (prop->sram_size - SRAM_USER_BASE_OFFSET); + u64 val = 0x7777777777777777ull; + int rc; + + rc = gaudi_memset_device_memory(hdev, addr, size, val); + if (rc) { + dev_err(hdev->dev, "Failed to clear SRAM in context switch\n"); + return rc; + } + + gaudi_mmu_prepare(hdev, asid); + + gaudi_restore_user_registers(hdev); + + return 0; +} + +static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct gaudi_device *gaudi = hdev->asic_specific; + u64 addr = prop->mmu_pgt_addr; + u32 size = prop->mmu_pgt_size + MMU_CACHE_MNG_SIZE; + + if (!(gaudi->hw_cap_initialized & HW_CAP_MMU)) + return 0; + + return gaudi_memset_device_memory(hdev, addr, size, 0); +} + +static void gaudi_restore_phase_topology(struct hl_device *hdev) +{ + +} + +static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct gaudi_device *gaudi = hdev->asic_specific; + u64 hbm_bar_addr; + int rc = 0; + + if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) { + if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) { + dev_err_ratelimited(hdev->dev, + "Can't read register - clock gating is enabled!\n"); + rc = -EFAULT; + } else { + *val = RREG32(addr - CFG_BASE); + } + } else if ((addr >= SRAM_BASE_ADDR) && + (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) { + *val = readl(hdev->pcie_bar[SRAM_BAR_ID] + + (addr - SRAM_BASE_ADDR)); + } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) { + u64 bar_base_addr = DRAM_PHYS_BASE + + (addr & ~(prop->dram_pci_bar_size - 0x1ull)); + + hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr); + if (hbm_bar_addr != U64_MAX) { + *val = readl(hdev->pcie_bar[HBM_BAR_ID] + + (addr - bar_base_addr)); + + hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, + hbm_bar_addr); + } + if (hbm_bar_addr == U64_MAX) + rc = -EIO; + } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) { + *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE); + } else { + rc = -EFAULT; + } + + return rc; +} + +static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct gaudi_device *gaudi = hdev->asic_specific; + u64 hbm_bar_addr; + int rc = 0; + + if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) { + if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) { + dev_err_ratelimited(hdev->dev, + "Can't write register - clock gating is enabled!\n"); + rc = -EFAULT; + } else { + WREG32(addr - CFG_BASE, val); + } + } else if ((addr >= SRAM_BASE_ADDR) && + (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) { + writel(val, hdev->pcie_bar[SRAM_BAR_ID] + + (addr - SRAM_BASE_ADDR)); + } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) { + u64 bar_base_addr = DRAM_PHYS_BASE + + (addr & ~(prop->dram_pci_bar_size - 0x1ull)); + + hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr); + if (hbm_bar_addr != U64_MAX) { + writel(val, hdev->pcie_bar[HBM_BAR_ID] + + (addr - bar_base_addr)); + + hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, + hbm_bar_addr); + } + if (hbm_bar_addr == U64_MAX) + rc = -EIO; + } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) { + *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val; + } else { + rc = -EFAULT; + } + + return rc; +} + +static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct gaudi_device *gaudi = hdev->asic_specific; + u64 hbm_bar_addr; + int rc = 0; + + if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) { + if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) { + dev_err_ratelimited(hdev->dev, + "Can't read register - clock gating is enabled!\n"); + rc = -EFAULT; + } else { + u32 val_l = RREG32(addr - CFG_BASE); + u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE); + + *val = (((u64) val_h) << 32) | val_l; + } + } else if ((addr >= SRAM_BASE_ADDR) && + (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) { + *val = readq(hdev->pcie_bar[SRAM_BAR_ID] + + (addr - SRAM_BASE_ADDR)); + } else if (addr <= + DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) { + u64 bar_base_addr = DRAM_PHYS_BASE + + (addr & ~(prop->dram_pci_bar_size - 0x1ull)); + + hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr); + if (hbm_bar_addr != U64_MAX) { + *val = readq(hdev->pcie_bar[HBM_BAR_ID] + + (addr - bar_base_addr)); + + hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, + hbm_bar_addr); + } + if (hbm_bar_addr == U64_MAX) + rc = -EIO; + } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) { + *val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE); + } else { + rc = -EFAULT; + } + + return rc; +} + +static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct gaudi_device *gaudi = hdev->asic_specific; + u64 hbm_bar_addr; + int rc = 0; + + if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) { + if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) { + dev_err_ratelimited(hdev->dev, + "Can't write register - clock gating is enabled!\n"); + rc = -EFAULT; + } else { + WREG32(addr - CFG_BASE, lower_32_bits(val)); + WREG32(addr + sizeof(u32) - CFG_BASE, + upper_32_bits(val)); + } + } else if ((addr >= SRAM_BASE_ADDR) && + (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) { + writeq(val, hdev->pcie_bar[SRAM_BAR_ID] + + (addr - SRAM_BASE_ADDR)); + } else if (addr <= + DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) { + u64 bar_base_addr = DRAM_PHYS_BASE + + (addr & ~(prop->dram_pci_bar_size - 0x1ull)); + + hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr); + if (hbm_bar_addr != U64_MAX) { + writeq(val, hdev->pcie_bar[HBM_BAR_ID] + + (addr - bar_base_addr)); + + hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, + hbm_bar_addr); + } + if (hbm_bar_addr == U64_MAX) + rc = -EIO; + } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) { + *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val; + } else { + rc = -EFAULT; + } + + return rc; +} + +static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr) +{ + struct gaudi_device *gaudi = hdev->asic_specific; + + if (hdev->hard_reset_pending) + return U64_MAX; + + return readq(hdev->pcie_bar[HBM_BAR_ID] + + (addr - gaudi->hbm_bar_cur_addr)); +} + +static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val) +{ + struct gaudi_device *gaudi = hdev->asic_specific; + + if (hdev->hard_reset_pending) + return; + + writeq(val, hdev->pcie_bar[HBM_BAR_ID] + + (addr - gaudi->hbm_bar_cur_addr)); +} + +static void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid) +{ + /* mask to zero the MMBP and ASID bits */ + WREG32_AND(reg, ~0x7FF); + WREG32_OR(reg, asid); +} + +static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid) +{ + struct gaudi_device *gaudi = hdev->asic_specific; + + if (!(gaudi->hw_cap_initialized & HW_CAP_MMU)) + return; + + if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) { + WARN(1, "asid %u is too big\n", asid); + return; + } + + mutex_lock(&gaudi->clk_gate_mutex); + + hdev->asic_funcs->disable_clock_gating(hdev); + + gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid); + gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid); + gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid); + gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid); + gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid); + + gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid); + gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid); + gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid); + gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid); + gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid); + + gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid); + gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid); + gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid); + gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid); + gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid); + + gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid); + gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid); + gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid); + gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid); + gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid); + + gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid); + gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid); + gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid); + gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid); + gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid); + + gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid); + gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid); + gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid); + gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid); + gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid); + + gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid); + gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid); + gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid); + gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid); + gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid); + + gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid); + gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid); + gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid); + gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid); + gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid); + + gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid); + gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid); + gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid); + gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid); + gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid); + gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid); + gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid); + gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid); + + gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid); + gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid); + gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid); + gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid); + gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid); + gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid); + gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid); + + gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid); + gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid); + gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid); + gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid); + gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid); + gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid); + gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid); + + gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid); + gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid); + gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid); + gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid); + gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid); + gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid); + gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid); + + gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid); + gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid); + gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid); + gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid); + gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid); + gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid); + gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid); + + gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid); + gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid); + gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid); + gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid); + gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid); + gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid); + gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid); + + gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid); + gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid); + gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid); + gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid); + gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid); + gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid); + gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid); + + gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid); + gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid); + gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid); + gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid); + gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid); + gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid); + gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid); + + gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid); + gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid); + gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid); + gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid); + gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid); + gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid); + gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid); + + gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid); + gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid); + gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid); + gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid); + gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid); + gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid); + gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid); + gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid); + gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid); + gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid); + + gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid); + gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid); + gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid); + gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid); + gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid); + gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid); + gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid); + gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid); + gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid); + gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid); + gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid); + gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid); + + gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid); + gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid); + + hdev->asic_funcs->enable_clock_gating(hdev); + + mutex_unlock(&gaudi->clk_gate_mutex); +} + +static int gaudi_send_job_on_qman0(struct hl_device *hdev, + struct hl_cs_job *job) +{ + struct packet_msg_prot *fence_pkt; + u32 *fence_ptr; + dma_addr_t fence_dma_addr; + struct hl_cb *cb; + u32 tmp, timeout, dma_offset; + int rc; + + if (hdev->pldm) + timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC; + else + timeout = HL_DEVICE_TIMEOUT_USEC; + + if (!hdev->asic_funcs->is_device_idle(hdev, NULL, NULL)) { + dev_err_ratelimited(hdev->dev, + "Can't send driver job on QMAN0 because the device is not idle\n"); + return -EBUSY; + } + + fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, + &fence_dma_addr); + if (!fence_ptr) { + dev_err(hdev->dev, + "Failed to allocate fence memory for QMAN0\n"); + return -ENOMEM; + } + + cb = job->patched_cb; + + fence_pkt = (struct packet_msg_prot *) (uintptr_t) (cb->kernel_address + + job->job_cb_size - sizeof(struct packet_msg_prot)); + + tmp = (PACKET_MSG_PROT << GAUDI_PKT_CTL_OPCODE_SHIFT) | + (1 << GAUDI_PKT_CTL_EB_SHIFT) | + (1 << GAUDI_PKT_CTL_MB_SHIFT); + fence_pkt->ctl = cpu_to_le32(tmp); + fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL); + fence_pkt->addr = cpu_to_le64(fence_dma_addr); + + dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET; + + WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT)); + + rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0, + job->job_cb_size, cb->bus_address); + if (rc) { + dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc); + goto free_fence_ptr; + } + + rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, + (tmp == GAUDI_QMAN0_FENCE_VAL), 1000, + timeout, true); + + hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0); + + if (rc == -ETIMEDOUT) { + dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp); + goto free_fence_ptr; + } + +free_fence_ptr: + WREG32_AND(mmDMA0_CORE_PROT + dma_offset, + ~BIT(DMA0_CORE_PROT_VAL_SHIFT)); + + hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr, + fence_dma_addr); + return rc; +} + +static const char *_gaudi_get_event_desc(u16 event_type) +{ + switch (event_type) { + case GAUDI_EVENT_PCIE_CORE_SERR: + return "PCIe_core_serr"; + case GAUDI_EVENT_PCIE_CORE_DERR: + return "PCIe_core_derr"; + case GAUDI_EVENT_PCIE_IF_SERR: + return "PCIe_if_serr"; + case GAUDI_EVENT_PCIE_IF_DERR: + return "PCIe_if_derr"; + case GAUDI_EVENT_PCIE_PHY_SERR: + return "PCIe_phy_serr"; + case GAUDI_EVENT_PCIE_PHY_DERR: + return "PCIe_phy_derr"; + case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR: + return "TPC%d_Serr"; + case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR: + return "TPC%d_Derr"; + case GAUDI_EVENT_MME0_ACC_SERR: + case GAUDI_EVENT_MME1_ACC_SERR: + case GAUDI_EVENT_MME2_ACC_SERR: + case GAUDI_EVENT_MME3_ACC_SERR: + return "MME%d_acc_serr"; + case GAUDI_EVENT_MME0_ACC_DERR: + case GAUDI_EVENT_MME1_ACC_DERR: + case GAUDI_EVENT_MME2_ACC_DERR: + case GAUDI_EVENT_MME3_ACC_DERR: + return "MME%d_acc_derr"; + case GAUDI_EVENT_MME0_SBAB_SERR: + case GAUDI_EVENT_MME1_SBAB_SERR: + case GAUDI_EVENT_MME2_SBAB_SERR: + case GAUDI_EVENT_MME3_SBAB_SERR: + return "MME%d_sbab_serr"; + case GAUDI_EVENT_MME0_SBAB_DERR: + case GAUDI_EVENT_MME1_SBAB_DERR: + case GAUDI_EVENT_MME2_SBAB_DERR: + case GAUDI_EVENT_MME3_SBAB_DERR: + return "MME%d_sbab_derr"; + case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC: + return "DMA%d_serr_ecc"; + case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC: + return "DMA%d_derr_ecc"; + case GAUDI_EVENT_CPU_IF_ECC_SERR: + return "CPU_if_ecc_serr"; + case GAUDI_EVENT_CPU_IF_ECC_DERR: + return "CPU_if_ecc_derr"; + case GAUDI_EVENT_PSOC_MEM_SERR: + return "PSOC_mem_serr"; + case GAUDI_EVENT_PSOC_MEM_DERR: + return "PSOC_mem_derr"; + case GAUDI_EVENT_PSOC_CORESIGHT_SERR: + return "PSOC_coresight_serr"; + case GAUDI_EVENT_PSOC_CORESIGHT_DERR: + return "PSOC_coresight_derr"; + case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR: + return "SRAM%d_serr"; + case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR: + return "SRAM%d_derr"; + case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR: + return "DMA%d_if_serr"; + case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR: + return "DMA%d_if_derr"; + case GAUDI_EVENT_GIC500: + return "GIC500"; + case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR: + return "HBM%d_serr"; + case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR: + return "HBM%d_derr"; + case GAUDI_EVENT_MMU_SERR: + return "MMU_serr"; + case GAUDI_EVENT_MMU_DERR: + return "MMU_derr"; + case GAUDI_EVENT_PCIE_DEC: + return "PCIe_dec"; + case GAUDI_EVENT_TPC0_DEC: + case GAUDI_EVENT_TPC1_DEC: + case GAUDI_EVENT_TPC2_DEC: + case GAUDI_EVENT_TPC3_DEC: + case GAUDI_EVENT_TPC4_DEC: + case GAUDI_EVENT_TPC5_DEC: + case GAUDI_EVENT_TPC6_DEC: + case GAUDI_EVENT_TPC7_DEC: + return "TPC%d_dec"; + case GAUDI_EVENT_AXI_ECC: + return "AXI_ecc"; + case GAUDI_EVENT_L2_RAM_ECC: + return "L2_ram_ecc"; + case GAUDI_EVENT_MME0_WBC_RSP: + case GAUDI_EVENT_MME1_WBC_RSP: + case GAUDI_EVENT_MME2_WBC_RSP: + case GAUDI_EVENT_MME3_WBC_RSP: + return "MME%d_wbc_rsp"; + case GAUDI_EVENT_MME0_SBAB0_RSP: + case GAUDI_EVENT_MME1_SBAB0_RSP: + case GAUDI_EVENT_MME2_SBAB0_RSP: + case GAUDI_EVENT_MME3_SBAB0_RSP: + return "MME%d_sbab0_rsp"; + case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17: + return "PLL%d"; + case GAUDI_EVENT_CPU_AXI_SPLITTER: + return "CPU_axi_splitter"; + case GAUDI_EVENT_PSOC_AXI_DEC: + return "CPU_axi_dec"; + case GAUDI_EVENT_PSOC_PRSTN_FALL: + return "PSOC_prstn_fall"; + case GAUDI_EVENT_TPC0_BMON_SPMU: + case GAUDI_EVENT_TPC1_BMON_SPMU: + case GAUDI_EVENT_TPC2_BMON_SPMU: + case GAUDI_EVENT_TPC3_BMON_SPMU: + case GAUDI_EVENT_TPC4_BMON_SPMU: + case GAUDI_EVENT_TPC5_BMON_SPMU: + case GAUDI_EVENT_TPC6_BMON_SPMU: + case GAUDI_EVENT_TPC7_BMON_SPMU: + return "TPC%d_bmon_spmu"; + case GAUDI_EVENT_TPC0_KRN_ERR: + case GAUDI_EVENT_TPC1_KRN_ERR: + case GAUDI_EVENT_TPC2_KRN_ERR: + case GAUDI_EVENT_TPC3_KRN_ERR: + case GAUDI_EVENT_TPC4_KRN_ERR: + case GAUDI_EVENT_TPC5_KRN_ERR: + case GAUDI_EVENT_TPC6_KRN_ERR: + case GAUDI_EVENT_TPC7_KRN_ERR: + return "TPC%d_krn_err"; + case GAUDI_EVENT_MMU_PAGE_FAULT: + return "MMU_page_fault"; + case GAUDI_EVENT_MMU_WR_PERM: + return "MMU_write_permission"; + case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7: + return "DMA_bm_ch%d"; + case GAUDI_EVENT_HBM0_SPI_0: + case GAUDI_EVENT_HBM1_SPI_0: + case GAUDI_EVENT_HBM2_SPI_0: + case GAUDI_EVENT_HBM3_SPI_0: + return "HBM%d_spi_0"; + case GAUDI_EVENT_HBM0_SPI_1: + case GAUDI_EVENT_HBM1_SPI_1: + case GAUDI_EVENT_HBM2_SPI_1: + case GAUDI_EVENT_HBM3_SPI_1: + return "HBM%d_spi_1"; + case GAUDI_EVENT_FIX_POWER_ENV_S: + return "POWER_ENV_S"; + case GAUDI_EVENT_FIX_POWER_ENV_E: + return "POWER_ENV_E"; + case GAUDI_EVENT_FIX_THERMAL_ENV_S: + return "THERMAL_ENV_S"; + case GAUDI_EVENT_FIX_THERMAL_ENV_E: + return "THERMAL_ENV_E"; + case GAUDI_EVENT_RAZWI_OR_ADC: + return "PSOC_razwi_or_adc"; + case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM: + return "TPC%d_qm"; + case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM: + return "MME%d_qm"; + case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM: + return "DMA%d_qm"; + case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE: + return "DMA%d_core"; + case GAUDI_EVENT_RAZWI_OR_ADC_SW: + return "PSOC_razwi_or_adc_sw"; + default: + return "N/A"; + } +} + +static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size) +{ + u8 index; + + switch (event_type) { + case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR: + index = event_type - GAUDI_EVENT_TPC0_SERR; + snprintf(desc, size, _gaudi_get_event_desc(event_type), index); + break; + case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR: + index = event_type - GAUDI_EVENT_TPC0_DERR; + snprintf(desc, size, _gaudi_get_event_desc(event_type), index); + break; + case GAUDI_EVENT_MME0_ACC_SERR: + case GAUDI_EVENT_MME1_ACC_SERR: + case GAUDI_EVENT_MME2_ACC_SERR: + case GAUDI_EVENT_MME3_ACC_SERR: + index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4; + snprintf(desc, size, _gaudi_get_event_desc(event_type), index); + break; + case GAUDI_EVENT_MME0_ACC_DERR: + case GAUDI_EVENT_MME1_ACC_DERR: + case GAUDI_EVENT_MME2_ACC_DERR: + case GAUDI_EVENT_MME3_ACC_DERR: + index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4; + snprintf(desc, size, _gaudi_get_event_desc(event_type), index); + break; + case GAUDI_EVENT_MME0_SBAB_SERR: + case GAUDI_EVENT_MME1_SBAB_SERR: + case GAUDI_EVENT_MME2_SBAB_SERR: + case GAUDI_EVENT_MME3_SBAB_SERR: + index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4; + snprintf(desc, size, _gaudi_get_event_desc(event_type), index); + break; + case GAUDI_EVENT_MME0_SBAB_DERR: + case GAUDI_EVENT_MME1_SBAB_DERR: + case GAUDI_EVENT_MME2_SBAB_DERR: + case GAUDI_EVENT_MME3_SBAB_DERR: + index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4; + snprintf(desc, size, _gaudi_get_event_desc(event_type), index); + break; + case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC: + index = event_type - GAUDI_EVENT_DMA0_SERR_ECC; + snprintf(desc, size, _gaudi_get_event_desc(event_type), index); + break; + case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC: + index = event_type - GAUDI_EVENT_DMA0_DERR_ECC; + snprintf(desc, size, _gaudi_get_event_desc(event_type), index); + break; + case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR: + index = event_type - GAUDI_EVENT_SRAM0_SERR; + snprintf(desc, size, _gaudi_get_event_desc(event_type), index); + break; + case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR: + index = event_type - GAUDI_EVENT_SRAM0_DERR; + snprintf(desc, size, _gaudi_get_event_desc(event_type), index); + break; + case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR: + index = event_type - GAUDI_EVENT_DMA_IF0_SERR; + snprintf(desc, size, _gaudi_get_event_desc(event_type), index); + break; + case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR: + index = event_type - GAUDI_EVENT_DMA_IF0_DERR; + snprintf(desc, size, _gaudi_get_event_desc(event_type), index); + break; + case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR: + index = event_type - GAUDI_EVENT_HBM_0_SERR; + snprintf(desc, size, _gaudi_get_event_desc(event_type), index); + break; + case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR: + index = event_type - GAUDI_EVENT_HBM_0_DERR; + snprintf(desc, size, _gaudi_get_event_desc(event_type), index); + break; + case GAUDI_EVENT_TPC0_DEC: + case GAUDI_EVENT_TPC1_DEC: + case GAUDI_EVENT_TPC2_DEC: + case GAUDI_EVENT_TPC3_DEC: + case GAUDI_EVENT_TPC4_DEC: + case GAUDI_EVENT_TPC5_DEC: + case GAUDI_EVENT_TPC6_DEC: + case GAUDI_EVENT_TPC7_DEC: + index = (event_type - GAUDI_EVENT_TPC0_DEC) / 2; + snprintf(desc, size, _gaudi_get_event_desc(event_type), index); + break; + case GAUDI_EVENT_MME0_WBC_RSP: + case GAUDI_EVENT_MME1_WBC_RSP: + case GAUDI_EVENT_MME2_WBC_RSP: + case GAUDI_EVENT_MME3_WBC_RSP: + index = (event_type - GAUDI_EVENT_MME0_WBC_RSP) / 5; + snprintf(desc, size, _gaudi_get_event_desc(event_type), index); + break; + case GAUDI_EVENT_MME0_SBAB0_RSP: + case GAUDI_EVENT_MME1_SBAB0_RSP: + case GAUDI_EVENT_MME2_SBAB0_RSP: + case GAUDI_EVENT_MME3_SBAB0_RSP: + index = (event_type - GAUDI_EVENT_MME0_SBAB0_RSP) / 5; + snprintf(desc, size, _gaudi_get_event_desc(event_type), index); + break; + case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17: + index = event_type - GAUDI_EVENT_PLL0; + snprintf(desc, size, _gaudi_get_event_desc(event_type), index); + break; + case GAUDI_EVENT_TPC0_BMON_SPMU: + case GAUDI_EVENT_TPC1_BMON_SPMU: + case GAUDI_EVENT_TPC2_BMON_SPMU: + case GAUDI_EVENT_TPC3_BMON_SPMU: + case GAUDI_EVENT_TPC4_BMON_SPMU: + case GAUDI_EVENT_TPC5_BMON_SPMU: + case GAUDI_EVENT_TPC6_BMON_SPMU: + case GAUDI_EVENT_TPC7_BMON_SPMU: + index = (event_type - GAUDI_EVENT_TPC0_BMON_SPMU) / 6; + snprintf(desc, size, _gaudi_get_event_desc(event_type), index); + break; + case GAUDI_EVENT_TPC0_KRN_ERR: + case GAUDI_EVENT_TPC1_KRN_ERR: + case GAUDI_EVENT_TPC2_KRN_ERR: + case GAUDI_EVENT_TPC3_KRN_ERR: + case GAUDI_EVENT_TPC4_KRN_ERR: + case GAUDI_EVENT_TPC5_KRN_ERR: + case GAUDI_EVENT_TPC6_KRN_ERR: + case GAUDI_EVENT_TPC7_KRN_ERR: + index = (event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6; + snprintf(desc, size, _gaudi_get_event_desc(event_type), index); + break; + case GAUDI_EVENT_MMU_PAGE_FAULT: + snprintf(desc, size, _gaudi_get_event_desc(event_type)); + break; + case GAUDI_EVENT_MMU_WR_PERM: + snprintf(desc, size, _gaudi_get_event_desc(event_type)); + break; + case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7: + index = event_type - GAUDI_EVENT_DMA_BM_CH0; + snprintf(desc, size, _gaudi_get_event_desc(event_type), index); + break; + case GAUDI_EVENT_HBM0_SPI_0: + case GAUDI_EVENT_HBM1_SPI_0: + case GAUDI_EVENT_HBM2_SPI_0: + case GAUDI_EVENT_HBM3_SPI_0: + index = (event_type - GAUDI_EVENT_HBM0_SPI_0) / 4; + snprintf(desc, size, _gaudi_get_event_desc(event_type), index); + break; + case GAUDI_EVENT_HBM0_SPI_1: + case GAUDI_EVENT_HBM1_SPI_1: + case GAUDI_EVENT_HBM2_SPI_1: + case GAUDI_EVENT_HBM3_SPI_1: + index = (event_type - GAUDI_EVENT_HBM0_SPI_1) / 4; + snprintf(desc, size, _gaudi_get_event_desc(event_type), index); + break; + case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM: + index = event_type - GAUDI_EVENT_TPC0_QM; + snprintf(desc, size, _gaudi_get_event_desc(event_type), index); + break; + case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM: + index = event_type - GAUDI_EVENT_MME0_QM; + snprintf(desc, size, _gaudi_get_event_desc(event_type), index); + break; + case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM: + index = event_type - GAUDI_EVENT_DMA0_QM; + snprintf(desc, size, _gaudi_get_event_desc(event_type), index); + break; + case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE: + index = event_type - GAUDI_EVENT_DMA0_CORE; + snprintf(desc, size, _gaudi_get_event_desc(event_type), index); + break; + default: + snprintf(desc, size, _gaudi_get_event_desc(event_type)); + break; + } +} + +static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev, + u32 x_y, bool is_write) +{ + u32 dma_id[2], dma_offset, err_cause[2], mask, i; + + mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK : + DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK; + + switch (x_y) { + case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0: + case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1: + dma_id[0] = 0; + dma_id[1] = 2; + break; + case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0: + case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1: + dma_id[0] = 1; + dma_id[1] = 3; + break; + case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0: + case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1: + dma_id[0] = 4; + dma_id[1] = 6; + break; + case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0: + case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1: + dma_id[0] = 5; + dma_id[1] = 7; + break; + default: + goto unknown_initiator; + } + + for (i = 0 ; i < 2 ; i++) { + dma_offset = dma_id[i] * DMA_CORE_OFFSET; + err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset); + } + + switch (x_y) { + case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0: + case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1: + if ((err_cause[0] & mask) && !(err_cause[1] & mask)) + return "DMA0"; + else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) + return "DMA2"; + else + return "DMA0 or DMA2"; + case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0: + case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1: + if ((err_cause[0] & mask) && !(err_cause[1] & mask)) + return "DMA1"; + else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) + return "DMA3"; + else + return "DMA1 or DMA3"; + case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0: + case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1: + if ((err_cause[0] & mask) && !(err_cause[1] & mask)) + return "DMA4"; + else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) + return "DMA6"; + else + return "DMA4 or DMA6"; + case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0: + case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1: + if ((err_cause[0] & mask) && !(err_cause[1] & mask)) + return "DMA5"; + else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) + return "DMA7"; + else + return "DMA5 or DMA7"; + } + +unknown_initiator: + return "unknown initiator"; +} + +static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev, + bool is_write) +{ + u32 val, x_y, axi_id; + + val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) : + RREG32(mmMMU_UP_RAZWI_READ_ID); + x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) | + (RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT)); + axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK << + RAZWI_INITIATOR_AXI_ID_SHIFT); + + switch (x_y) { + case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0: + if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) + return "TPC0"; + if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) + return "NIC0"; + break; + case RAZWI_INITIATOR_ID_X_Y_TPC1: + return "TPC1"; + case RAZWI_INITIATOR_ID_X_Y_MME0_0: + case RAZWI_INITIATOR_ID_X_Y_MME0_1: + return "MME0"; + case RAZWI_INITIATOR_ID_X_Y_MME1_0: + case RAZWI_INITIATOR_ID_X_Y_MME1_1: + return "MME1"; + case RAZWI_INITIATOR_ID_X_Y_TPC2: + return "TPC2"; + case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC: + if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) + return "TPC3"; + if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI)) + return "PCI"; + if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU)) + return "CPU"; + if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC)) + return "PSOC"; + break; + case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0: + case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1: + case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0: + case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1: + case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0: + case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1: + case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0: + case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1: + return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write); + case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2: + if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) + return "TPC4"; + if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) + return "NIC1"; + if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) + return "NIC2"; + break; + case RAZWI_INITIATOR_ID_X_Y_TPC5: + return "TPC5"; + case RAZWI_INITIATOR_ID_X_Y_MME2_0: + case RAZWI_INITIATOR_ID_X_Y_MME2_1: + return "MME2"; + case RAZWI_INITIATOR_ID_X_Y_MME3_0: + case RAZWI_INITIATOR_ID_X_Y_MME3_1: + return "MME3"; + case RAZWI_INITIATOR_ID_X_Y_TPC6: + return "TPC6"; + case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5: + if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) + return "TPC7"; + if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) + return "NIC4"; + if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) + return "NIC5"; + break; + default: + break; + } + + dev_err(hdev->dev, + "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n", + val, + (val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK, + (val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK, + (val >> RAZWI_INITIATOR_AXI_ID_SHIFT) & + RAZWI_INITIATOR_AXI_ID_MASK); + + return "unknown initiator"; +} + +static void gaudi_print_razwi_info(struct hl_device *hdev) +{ + if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) { + dev_err_ratelimited(hdev->dev, + "RAZWI event caused by illegal write of %s\n", + gaudi_get_razwi_initiator_name(hdev, true)); + WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0); + } + + if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) { + dev_err_ratelimited(hdev->dev, + "RAZWI event caused by illegal read of %s\n", + gaudi_get_razwi_initiator_name(hdev, false)); + WREG32(mmMMU_UP_RAZWI_READ_VLD, 0); + } +} + +static void gaudi_print_mmu_error_info(struct hl_device *hdev) +{ + struct gaudi_device *gaudi = hdev->asic_specific; + u64 addr; + u32 val; + + if (!(gaudi->hw_cap_initialized & HW_CAP_MMU)) + return; + + val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE); + if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) { + addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK; + addr <<= 32; + addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA); + + dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n", + addr); + + WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0); + } + + val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE); + if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) { + addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK; + addr <<= 32; + addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA); + + dev_err_ratelimited(hdev->dev, + "MMU access error on va 0x%llx\n", addr); + + WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0); + } +} + +/* + * +-------------------+------------------------------------------------------+ + * | Configuration Reg | Description | + * | Address | | + * +-------------------+------------------------------------------------------+ + * | 0xF30 - 0xF3F |ECC single error indication (1 bit per memory wrapper)| + * | |0xF30 memory wrappers 31:0 (MSB to LSB) | + * | |0xF34 memory wrappers 63:32 | + * | |0xF38 memory wrappers 95:64 | + * | |0xF3C memory wrappers 127:96 | + * +-------------------+------------------------------------------------------+ + * | 0xF40 - 0xF4F |ECC double error indication (1 bit per memory wrapper)| + * | |0xF40 memory wrappers 31:0 (MSB to LSB) | + * | |0xF44 memory wrappers 63:32 | + * | |0xF48 memory wrappers 95:64 | + * | |0xF4C memory wrappers 127:96 | + * +-------------------+------------------------------------------------------+ + */ +static void gaudi_print_ecc_info_generic(struct hl_device *hdev, + const char *block_name, + u64 block_address, int num_memories, + bool derr, bool disable_clock_gating) +{ + struct gaudi_device *gaudi = hdev->asic_specific; + int num_mem_regs = num_memories / 32 + ((num_memories % 32) ? 1 : 0); + + if (block_address >= CFG_BASE) + block_address -= CFG_BASE; + + if (derr) + block_address += GAUDI_ECC_DERR0_OFFSET; + else + block_address += GAUDI_ECC_SERR0_OFFSET; + + if (disable_clock_gating) { + mutex_lock(&gaudi->clk_gate_mutex); + hdev->asic_funcs->disable_clock_gating(hdev); + } + + switch (num_mem_regs) { + case 1: + dev_err(hdev->dev, + "%s ECC indication: 0x%08x\n", + block_name, RREG32(block_address)); + break; + case 2: + dev_err(hdev->dev, + "%s ECC indication: 0x%08x 0x%08x\n", + block_name, + RREG32(block_address), RREG32(block_address + 4)); + break; + case 3: + dev_err(hdev->dev, + "%s ECC indication: 0x%08x 0x%08x 0x%08x\n", + block_name, + RREG32(block_address), RREG32(block_address + 4), + RREG32(block_address + 8)); + break; + case 4: + dev_err(hdev->dev, + "%s ECC indication: 0x%08x 0x%08x 0x%08x 0x%08x\n", + block_name, + RREG32(block_address), RREG32(block_address + 4), + RREG32(block_address + 8), RREG32(block_address + 0xc)); + break; + default: + break; + + } + + if (disable_clock_gating) { + hdev->asic_funcs->enable_clock_gating(hdev); + mutex_unlock(&gaudi->clk_gate_mutex); + } +} + +static void gaudi_handle_qman_err_generic(struct hl_device *hdev, + const char *qm_name, + u64 glbl_sts_addr, + u64 arb_err_addr) +{ + u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val; + char reg_desc[32]; + + /* Iterate through all stream GLBL_STS1 registers + Lower CP */ + for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) { + glbl_sts_clr_val = 0; + glbl_sts_val = RREG32(glbl_sts_addr + 4 * i); + + if (!glbl_sts_val) + continue; + + if (i == QMAN_STREAMS) + snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP"); + else + snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i); + + for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) { + if (glbl_sts_val & BIT(j)) { + dev_err_ratelimited(hdev->dev, + "%s %s. err cause: %s\n", + qm_name, reg_desc, + gaudi_qman_error_cause[j]); + glbl_sts_clr_val |= BIT(j); + } + } + + /* Write 1 clear errors */ + WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val); + } + + arb_err_val = RREG32(arb_err_addr); + + if (!arb_err_val) + return; + + for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) { + if (arb_err_val & BIT(j)) { + dev_err_ratelimited(hdev->dev, + "%s ARB_ERR. err cause: %s\n", + qm_name, + gaudi_qman_arb_error_cause[j]); + } + } +} + +static void gaudi_print_ecc_info(struct hl_device *hdev, u16 event_type) +{ + u64 block_address; + u8 index; + int num_memories; + char desc[32]; + bool derr; + bool disable_clock_gating; + + switch (event_type) { + case GAUDI_EVENT_PCIE_CORE_SERR: + snprintf(desc, ARRAY_SIZE(desc), "%s", "PCIE_CORE"); + block_address = mmPCIE_CORE_BASE; + num_memories = 51; + derr = false; + disable_clock_gating = false; + break; + case GAUDI_EVENT_PCIE_CORE_DERR: + snprintf(desc, ARRAY_SIZE(desc), "%s", "PCIE_CORE"); + block_address = mmPCIE_CORE_BASE; + num_memories = 51; + derr = true; + disable_clock_gating = false; + break; + case GAUDI_EVENT_PCIE_IF_SERR: + snprintf(desc, ARRAY_SIZE(desc), "%s", "PCIE_WRAP"); + block_address = mmPCIE_WRAP_BASE; + num_memories = 11; + derr = false; + disable_clock_gating = false; + break; + case GAUDI_EVENT_PCIE_IF_DERR: + snprintf(desc, ARRAY_SIZE(desc), "%s", "PCIE_WRAP"); + block_address = mmPCIE_WRAP_BASE; + num_memories = 11; + derr = true; + disable_clock_gating = false; + break; + case GAUDI_EVENT_PCIE_PHY_SERR: + snprintf(desc, ARRAY_SIZE(desc), "%s", "PCIE_PHY"); + block_address = mmPCIE_PHY_BASE; + num_memories = 4; + derr = false; + disable_clock_gating = false; + break; + case GAUDI_EVENT_PCIE_PHY_DERR: + snprintf(desc, ARRAY_SIZE(desc), "%s", "PCIE_PHY"); + block_address = mmPCIE_PHY_BASE; + num_memories = 4; + derr = true; + disable_clock_gating = false; + break; + case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR: + index = event_type - GAUDI_EVENT_TPC0_SERR; + block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET; + snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC", index); + num_memories = 90; + derr = false; + disable_clock_gating = true; + break; + case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR: + index = event_type - GAUDI_EVENT_TPC0_DERR; + block_address = + mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET; + snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC", index); + num_memories = 90; + derr = true; + disable_clock_gating = true; + break; + case GAUDI_EVENT_MME0_ACC_SERR: + case GAUDI_EVENT_MME1_ACC_SERR: + case GAUDI_EVENT_MME2_ACC_SERR: + case GAUDI_EVENT_MME3_ACC_SERR: + index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4; + block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET; + snprintf(desc, ARRAY_SIZE(desc), "MME%d_ACC", index); + num_memories = 128; + derr = false; + disable_clock_gating = true; + break; + case GAUDI_EVENT_MME0_ACC_DERR: + case GAUDI_EVENT_MME1_ACC_DERR: + case GAUDI_EVENT_MME2_ACC_DERR: + case GAUDI_EVENT_MME3_ACC_DERR: + index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4; + block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET; + snprintf(desc, ARRAY_SIZE(desc), "MME%d_ACC", index); + num_memories = 128; + derr = true; + disable_clock_gating = true; + break; + case GAUDI_EVENT_MME0_SBAB_SERR: + case GAUDI_EVENT_MME1_SBAB_SERR: + case GAUDI_EVENT_MME2_SBAB_SERR: + case GAUDI_EVENT_MME3_SBAB_SERR: + index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4; + block_address = mmMME0_SBAB_BASE + index * MME_ACC_OFFSET; + snprintf(desc, ARRAY_SIZE(desc), "MME%d_SBAB", index); + num_memories = 33; + derr = false; + disable_clock_gating = true; + break; + case GAUDI_EVENT_MME0_SBAB_DERR: + case GAUDI_EVENT_MME1_SBAB_DERR: + case GAUDI_EVENT_MME2_SBAB_DERR: + case GAUDI_EVENT_MME3_SBAB_DERR: + index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4; + block_address = mmMME0_SBAB_BASE + index * MME_ACC_OFFSET; + snprintf(desc, ARRAY_SIZE(desc), "MME%d_SBAB", index); + num_memories = 33; + derr = true; + disable_clock_gating = true; + break; + case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC: + index = event_type - GAUDI_EVENT_DMA0_SERR_ECC; + block_address = mmDMA0_CORE_BASE + index * DMA_CORE_OFFSET; + snprintf(desc, ARRAY_SIZE(desc), "DMA%d_CORE", index); + num_memories = 16; + derr = false; + disable_clock_gating = false; + break; + case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC: + index = event_type - GAUDI_EVENT_DMA0_DERR_ECC; + block_address = mmDMA0_CORE_BASE + index * DMA_CORE_OFFSET; + snprintf(desc, ARRAY_SIZE(desc), "DMA%d_CORE", index); + num_memories = 16; + derr = true; + disable_clock_gating = false; + break; + case GAUDI_EVENT_CPU_IF_ECC_SERR: + block_address = mmCPU_IF_BASE; + snprintf(desc, ARRAY_SIZE(desc), "%s", "CPU"); + num_memories = 4; + derr = false; + disable_clock_gating = false; + break; + case GAUDI_EVENT_CPU_IF_ECC_DERR: + block_address = mmCPU_IF_BASE; + snprintf(desc, ARRAY_SIZE(desc), "%s", "CPU"); + num_memories = 4; + derr = true; + disable_clock_gating = false; + break; + case GAUDI_EVENT_PSOC_MEM_SERR: + block_address = mmPSOC_GLOBAL_CONF_BASE; + snprintf(desc, ARRAY_SIZE(desc), "%s", "CPU"); + num_memories = 4; + derr = false; + disable_clock_gating = false; + break; + case GAUDI_EVENT_PSOC_MEM_DERR: + block_address = mmPSOC_GLOBAL_CONF_BASE; + snprintf(desc, ARRAY_SIZE(desc), "%s", "CPU"); + num_memories = 4; + derr = true; + disable_clock_gating = false; + break; + case GAUDI_EVENT_PSOC_CORESIGHT_SERR: + block_address = mmPSOC_CS_TRACE_BASE; + snprintf(desc, ARRAY_SIZE(desc), "%s", "CPU"); + num_memories = 2; + derr = false; + disable_clock_gating = false; + break; + case GAUDI_EVENT_PSOC_CORESIGHT_DERR: + block_address = mmPSOC_CS_TRACE_BASE; + snprintf(desc, ARRAY_SIZE(desc), "%s", "CPU"); + num_memories = 2; + derr = true; + disable_clock_gating = false; + break; + case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR: + index = event_type - GAUDI_EVENT_SRAM0_SERR; + block_address = + mmSRAM_Y0_X0_BANK_BASE + index * SRAM_BANK_OFFSET; + snprintf(desc, ARRAY_SIZE(desc), "SRAM%d", index); + num_memories = 2; + derr = false; + disable_clock_gating = false; + break; + case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR: + index = event_type - GAUDI_EVENT_SRAM0_DERR; + block_address = + mmSRAM_Y0_X0_BANK_BASE + index * SRAM_BANK_OFFSET; + snprintf(desc, ARRAY_SIZE(desc), "SRAM%d", index); + num_memories = 2; + derr = true; + disable_clock_gating = false; + break; + case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR: + index = event_type - GAUDI_EVENT_DMA_IF0_SERR; + block_address = mmDMA_IF_W_S_BASE + + index * (mmDMA_IF_E_S_BASE - mmDMA_IF_W_S_BASE); + snprintf(desc, ARRAY_SIZE(desc), "DMA_IF%d", index); + num_memories = 60; + derr = false; + disable_clock_gating = false; + break; + case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR: + index = event_type - GAUDI_EVENT_DMA_IF0_DERR; + block_address = mmDMA_IF_W_S_BASE + + index * (mmDMA_IF_E_S_BASE - mmDMA_IF_W_S_BASE); + snprintf(desc, ARRAY_SIZE(desc), "DMA_IF%d", index); + derr = true; + num_memories = 60; + disable_clock_gating = false; + break; + case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR: + index = event_type - GAUDI_EVENT_HBM_0_SERR; + /* HBM Registers are at different offsets */ + block_address = mmHBM0_BASE + 0x8000 + + index * (mmHBM1_BASE - mmHBM0_BASE); + snprintf(desc, ARRAY_SIZE(desc), "HBM%d", index); + derr = false; + num_memories = 64; + disable_clock_gating = false; + break; + case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR: + index = event_type - GAUDI_EVENT_HBM_0_SERR; + /* HBM Registers are at different offsets */ + block_address = mmHBM0_BASE + 0x8000 + + index * (mmHBM1_BASE - mmHBM0_BASE); + snprintf(desc, ARRAY_SIZE(desc), "HBM%d", index); + derr = true; + num_memories = 64; + disable_clock_gating = false; + break; + default: + return; + } + + gaudi_print_ecc_info_generic(hdev, desc, block_address, num_memories, + derr, disable_clock_gating); +} + +static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type) +{ + u64 glbl_sts_addr, arb_err_addr; + u8 index; + char desc[32]; + + switch (event_type) { + case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM: + index = event_type - GAUDI_EVENT_TPC0_QM; + glbl_sts_addr = + mmTPC0_QM_GLBL_STS1_0 + index * TPC_QMAN_OFFSET; + arb_err_addr = + mmTPC0_QM_ARB_ERR_CAUSE + index * TPC_QMAN_OFFSET; + snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index); + break; + case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM: + index = event_type - GAUDI_EVENT_MME0_QM; + glbl_sts_addr = + mmMME0_QM_GLBL_STS1_0 + index * MME_QMAN_OFFSET; + arb_err_addr = + mmMME0_QM_ARB_ERR_CAUSE + index * MME_QMAN_OFFSET; + snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index); + break; + case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM: + index = event_type - GAUDI_EVENT_DMA0_QM; + glbl_sts_addr = + mmDMA0_QM_GLBL_STS1_0 + index * DMA_QMAN_OFFSET; + arb_err_addr = + mmDMA0_QM_ARB_ERR_CAUSE + index * DMA_QMAN_OFFSET; + snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index); + break; + default: + return; + } + + gaudi_handle_qman_err_generic(hdev, desc, glbl_sts_addr, arb_err_addr); +} + +static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type, + bool razwi) +{ + char desc[20] = ""; + + gaudi_get_event_desc(event_type, desc, sizeof(desc)); + dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n", + event_type, desc); + + gaudi_print_ecc_info(hdev, event_type); + + if (razwi) { + gaudi_print_razwi_info(hdev); + gaudi_print_mmu_error_info(hdev); + } +} + +static int gaudi_unmask_irq_arr(struct hl_device *hdev, const u32 *irq_arr, + size_t irq_arr_size) +{ + struct armcp_unmask_irq_arr_packet *pkt; + size_t total_pkt_size; + long result; + int rc; + + total_pkt_size = sizeof(struct armcp_unmask_irq_arr_packet) + + irq_arr_size; + + /* data should be aligned to 8 bytes in order to ArmCP to copy it */ + total_pkt_size = (total_pkt_size + 0x7) & ~0x7; + + /* total_pkt_size is casted to u16 later on */ + if (total_pkt_size > USHRT_MAX) { + dev_err(hdev->dev, "too many elements in IRQ array\n"); + return -EINVAL; + } + + pkt = kzalloc(total_pkt_size, GFP_KERNEL); + if (!pkt) + return -ENOMEM; + + pkt->length = cpu_to_le32(irq_arr_size / sizeof(irq_arr[0])); + memcpy(&pkt->irqs, irq_arr, irq_arr_size); + + pkt->armcp_pkt.ctl = cpu_to_le32(ARMCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY << + ARMCP_PKT_CTL_OPCODE_SHIFT); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) pkt, + total_pkt_size, HL_DEVICE_TIMEOUT_USEC, &result); + + if (rc) + dev_err(hdev->dev, "failed to unmask IRQ array\n"); + + kfree(pkt); + + return rc; +} + +static int gaudi_soft_reset_late_init(struct hl_device *hdev) +{ + /* Unmask all IRQs since some could have been received + * during the soft reset + */ + return gaudi_unmask_irq_arr(hdev, gaudi_all_events, + sizeof(gaudi_all_events)); +} + +static int gaudi_unmask_irq(struct hl_device *hdev, u16 event_type) +{ + struct armcp_packet pkt; + long result; + int rc; + + memset(&pkt, 0, sizeof(pkt)); + + pkt.ctl = cpu_to_le32(ARMCP_PACKET_UNMASK_RAZWI_IRQ << + ARMCP_PKT_CTL_OPCODE_SHIFT); + pkt.value = cpu_to_le64(event_type); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), + HL_DEVICE_TIMEOUT_USEC, &result); + + if (rc) + dev_err(hdev->dev, "failed to unmask RAZWI IRQ %d", event_type); + + return rc; +} + +static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device) +{ + int ch, err = 0; + u32 base, val, val2; + + base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET; + for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) { + val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF); + val = (val & 0xFF) | ((val >> 8) & 0xFF); + if (val) { + err = 1; + dev_err(hdev->dev, + "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n", + device, ch * 2, val & 0x1, (val >> 1) & 0x1, + (val >> 2) & 0x1, (val >> 3) & 0x1, + (val >> 4) & 0x1); + + val2 = RREG32(base + ch * 0x1000 + 0x060); + dev_err(hdev->dev, + "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DED_CNT=%d\n", + device, ch * 2, + RREG32(base + ch * 0x1000 + 0x064), + (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10, + (val2 & 0xFF0000) >> 16, + (val2 & 0xFF000000) >> 24); + } + + val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF); + val = (val & 0xFF) | ((val >> 8) & 0xFF); + if (val) { + err = 1; + dev_err(hdev->dev, + "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n", + device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1, + (val >> 2) & 0x1, (val >> 3) & 0x1, + (val >> 4) & 0x1); + + val2 = RREG32(base + ch * 0x1000 + 0x070); + dev_err(hdev->dev, + "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DED_CNT=%d\n", + device, ch * 2 + 1, + RREG32(base + ch * 0x1000 + 0x074), + (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10, + (val2 & 0xFF0000) >> 16, + (val2 & 0xFF000000) >> 24); + } + + /* Clear interrupts */ + RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF); + RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF); + WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F); + WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F); + RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF); + RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF); + } + + val = RREG32(base + 0x8F30); + val2 = RREG32(base + 0x8F34); + if (val | val2) { + err = 1; + dev_err(hdev->dev, + "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n", + device, val, val2); + } + val = RREG32(base + 0x8F40); + val2 = RREG32(base + 0x8F44); + if (val | val2) { + err = 1; + dev_err(hdev->dev, + "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n", + device, val, val2); + } + + return err; +} + +static int gaudi_hbm_event_to_dev(u16 hbm_event_type) +{ + switch (hbm_event_type) { + case GAUDI_EVENT_HBM0_SPI_0: + case GAUDI_EVENT_HBM0_SPI_1: + return 0; + case GAUDI_EVENT_HBM1_SPI_0: + case GAUDI_EVENT_HBM1_SPI_1: + return 1; + case GAUDI_EVENT_HBM2_SPI_0: + case GAUDI_EVENT_HBM2_SPI_1: + return 2; + case GAUDI_EVENT_HBM3_SPI_0: + case GAUDI_EVENT_HBM3_SPI_1: + return 3; + default: + break; + } + + /* Should never happen */ + return 0; +} + +static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id, + char *interrupt_name) +{ + struct gaudi_device *gaudi = hdev->asic_specific; + u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i; + bool soft_reset_required = false; + + /* Accessing the TPC_INTR_CAUSE registers requires disabling the clock + * gating, and thus cannot be done in ArmCP and should be done instead + * by the driver. + */ + + mutex_lock(&gaudi->clk_gate_mutex); + + hdev->asic_funcs->disable_clock_gating(hdev); + + tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) & + TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK; + + for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++) + if (tpc_interrupts_cause & BIT(i)) { + dev_err_ratelimited(hdev->dev, + "TPC%d_%s interrupt cause: %s\n", + tpc_id, interrupt_name, + gaudi_tpc_interrupts_cause[i]); + /* If this is QM error, we need to soft-reset */ + if (i == 15) + soft_reset_required = true; + } + + /* Clear interrupts */ + WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0); + + hdev->asic_funcs->enable_clock_gating(hdev); + + mutex_unlock(&gaudi->clk_gate_mutex); + + return soft_reset_required; +} + +static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type) +{ + return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1; +} + +static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type) +{ + return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6; +} + +static void gaudi_print_clk_change_info(struct hl_device *hdev, + u16 event_type) +{ + switch (event_type) { + case GAUDI_EVENT_FIX_POWER_ENV_S: + dev_info_ratelimited(hdev->dev, + "Clock throttling due to power consumption\n"); + break; + + case GAUDI_EVENT_FIX_POWER_ENV_E: + dev_info_ratelimited(hdev->dev, + "Power envelop is safe, back to optimal clock\n"); + break; + + case GAUDI_EVENT_FIX_THERMAL_ENV_S: + dev_info_ratelimited(hdev->dev, + "Clock throttling due to overheating\n"); + break; + + case GAUDI_EVENT_FIX_THERMAL_ENV_E: + dev_info_ratelimited(hdev->dev, + "Thermal envelop is safe, back to optimal clock\n"); + break; + + default: + dev_err(hdev->dev, "Received invalid clock change event %d\n", + event_type); + break; + } +} + +static void gaudi_handle_eqe(struct hl_device *hdev, + struct hl_eq_entry *eq_entry) +{ + struct gaudi_device *gaudi = hdev->asic_specific; + u32 ctl = le32_to_cpu(eq_entry->hdr.ctl); + u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK) + >> EQ_CTL_EVENT_TYPE_SHIFT); + u8 cause; + bool soft_reset_required; + + gaudi->events_stat[event_type]++; + gaudi->events_stat_aggregate[event_type]++; + + switch (event_type) { + case GAUDI_EVENT_PCIE_CORE_DERR: + case GAUDI_EVENT_PCIE_IF_DERR: + case GAUDI_EVENT_PCIE_PHY_DERR: + case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR: + case GAUDI_EVENT_MME0_ACC_DERR: + case GAUDI_EVENT_MME0_SBAB_DERR: + case GAUDI_EVENT_MME1_ACC_DERR: + case GAUDI_EVENT_MME1_SBAB_DERR: + case GAUDI_EVENT_MME2_ACC_DERR: + case GAUDI_EVENT_MME2_SBAB_DERR: + case GAUDI_EVENT_MME3_ACC_DERR: + case GAUDI_EVENT_MME3_SBAB_DERR: + case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC: + fallthrough; + case GAUDI_EVENT_CPU_IF_ECC_DERR: + case GAUDI_EVENT_PSOC_MEM_DERR: + case GAUDI_EVENT_PSOC_CORESIGHT_DERR: + case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR: + case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR: + fallthrough; + case GAUDI_EVENT_GIC500: + case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR: + case GAUDI_EVENT_MMU_DERR: + case GAUDI_EVENT_AXI_ECC: + case GAUDI_EVENT_L2_RAM_ECC: + case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17: + gaudi_print_irq_info(hdev, event_type, false); + if (hdev->hard_reset_on_fw_events) + hl_device_reset(hdev, true, false); + break; + + case GAUDI_EVENT_HBM0_SPI_0: + case GAUDI_EVENT_HBM1_SPI_0: + case GAUDI_EVENT_HBM2_SPI_0: + case GAUDI_EVENT_HBM3_SPI_0: + gaudi_print_irq_info(hdev, event_type, false); + gaudi_hbm_read_interrupts(hdev, + gaudi_hbm_event_to_dev(event_type)); + if (hdev->hard_reset_on_fw_events) + hl_device_reset(hdev, true, false); + break; + + case GAUDI_EVENT_HBM0_SPI_1: + case GAUDI_EVENT_HBM1_SPI_1: + case GAUDI_EVENT_HBM2_SPI_1: + case GAUDI_EVENT_HBM3_SPI_1: + gaudi_print_irq_info(hdev, event_type, false); + gaudi_hbm_read_interrupts(hdev, + gaudi_hbm_event_to_dev(event_type)); + break; + + case GAUDI_EVENT_TPC0_DEC: + case GAUDI_EVENT_TPC1_DEC: + case GAUDI_EVENT_TPC2_DEC: + case GAUDI_EVENT_TPC3_DEC: + case GAUDI_EVENT_TPC4_DEC: + case GAUDI_EVENT_TPC5_DEC: + case GAUDI_EVENT_TPC6_DEC: + case GAUDI_EVENT_TPC7_DEC: + gaudi_print_irq_info(hdev, event_type, true); + soft_reset_required = gaudi_tpc_read_interrupts(hdev, + tpc_dec_event_to_tpc_id(event_type), + "AXI_SLV_DEC_Error"); + if (soft_reset_required) + hl_device_reset(hdev, false, false); + gaudi_unmask_irq(hdev, event_type); + break; + + case GAUDI_EVENT_TPC0_KRN_ERR: + case GAUDI_EVENT_TPC1_KRN_ERR: + case GAUDI_EVENT_TPC2_KRN_ERR: + case GAUDI_EVENT_TPC3_KRN_ERR: + case GAUDI_EVENT_TPC4_KRN_ERR: + case GAUDI_EVENT_TPC5_KRN_ERR: + case GAUDI_EVENT_TPC6_KRN_ERR: + case GAUDI_EVENT_TPC7_KRN_ERR: + gaudi_print_irq_info(hdev, event_type, true); + soft_reset_required = gaudi_tpc_read_interrupts(hdev, + tpc_krn_event_to_tpc_id(event_type), + "KRN_ERR"); + if (soft_reset_required) + hl_device_reset(hdev, false, false); + gaudi_unmask_irq(hdev, event_type); + break; + + case GAUDI_EVENT_PCIE_CORE_SERR: + case GAUDI_EVENT_PCIE_IF_SERR: + case GAUDI_EVENT_PCIE_PHY_SERR: + case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR: + case GAUDI_EVENT_MME0_ACC_SERR: + case GAUDI_EVENT_MME0_SBAB_SERR: + case GAUDI_EVENT_MME1_ACC_SERR: + case GAUDI_EVENT_MME1_SBAB_SERR: + case GAUDI_EVENT_MME2_ACC_SERR: + case GAUDI_EVENT_MME2_SBAB_SERR: + case GAUDI_EVENT_MME3_ACC_SERR: + case GAUDI_EVENT_MME3_SBAB_SERR: + case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC: + case GAUDI_EVENT_CPU_IF_ECC_SERR: + case GAUDI_EVENT_PSOC_MEM_SERR: + case GAUDI_EVENT_PSOC_CORESIGHT_SERR: + case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR: + case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR: + case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR: + fallthrough; + case GAUDI_EVENT_MMU_SERR: + case GAUDI_EVENT_PCIE_DEC: + case GAUDI_EVENT_MME0_WBC_RSP: + case GAUDI_EVENT_MME0_SBAB0_RSP: + case GAUDI_EVENT_MME1_WBC_RSP: + case GAUDI_EVENT_MME1_SBAB0_RSP: + case GAUDI_EVENT_MME2_WBC_RSP: + case GAUDI_EVENT_MME2_SBAB0_RSP: + case GAUDI_EVENT_MME3_WBC_RSP: + case GAUDI_EVENT_MME3_SBAB0_RSP: + case GAUDI_EVENT_CPU_AXI_SPLITTER: + case GAUDI_EVENT_PSOC_AXI_DEC: + case GAUDI_EVENT_PSOC_PRSTN_FALL: + case GAUDI_EVENT_MMU_PAGE_FAULT: + case GAUDI_EVENT_MMU_WR_PERM: + case GAUDI_EVENT_RAZWI_OR_ADC: + case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM: + case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM: + case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM: + fallthrough; + case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE: + gaudi_print_irq_info(hdev, event_type, true); + gaudi_handle_qman_err(hdev, event_type); + gaudi_unmask_irq(hdev, event_type); + break; + + case GAUDI_EVENT_RAZWI_OR_ADC_SW: + gaudi_print_irq_info(hdev, event_type, true); + hl_device_reset(hdev, false, false); + gaudi_unmask_irq(hdev, event_type); + break; + + case GAUDI_EVENT_TPC0_BMON_SPMU: + case GAUDI_EVENT_TPC1_BMON_SPMU: + case GAUDI_EVENT_TPC2_BMON_SPMU: + case GAUDI_EVENT_TPC3_BMON_SPMU: + case GAUDI_EVENT_TPC4_BMON_SPMU: + case GAUDI_EVENT_TPC5_BMON_SPMU: + case GAUDI_EVENT_TPC6_BMON_SPMU: + case GAUDI_EVENT_TPC7_BMON_SPMU: + case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7: + gaudi_print_irq_info(hdev, event_type, false); + gaudi_unmask_irq(hdev, event_type); + break; + + case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E: + gaudi_print_clk_change_info(hdev, event_type); + gaudi_unmask_irq(hdev, event_type); + break; + + case GAUDI_EVENT_PSOC_GPIO_U16_0: + cause = le64_to_cpu(eq_entry->data[0]) & 0xFF; + dev_err(hdev->dev, + "Received high temp H/W interrupt %d (cause %d)\n", + event_type, cause); + break; + + default: + dev_err(hdev->dev, "Received invalid H/W interrupt %d\n", + event_type); + break; + } +} + +static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate, + u32 *size) +{ + struct gaudi_device *gaudi = hdev->asic_specific; + + if (aggregate) { + *size = (u32) sizeof(gaudi->events_stat_aggregate); + return gaudi->events_stat_aggregate; + } + + *size = (u32) sizeof(gaudi->events_stat); + return gaudi->events_stat; +} + +static void gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, + u32 flags) +{ + struct gaudi_device *gaudi = hdev->asic_specific; + u32 status, timeout_usec; + int rc; + + if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) || + hdev->hard_reset_pending) + return; + + mutex_lock(&hdev->mmu_cache_lock); + + if (hdev->pldm) + timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC; + else + timeout_usec = MMU_CONFIG_TIMEOUT_USEC; + + /* L0 & L1 invalidation */ + WREG32(mmSTLB_INV_ALL_START, 1); + + rc = hl_poll_timeout( + hdev, + mmSTLB_INV_ALL_START, + status, + !status, + 1000, + timeout_usec); + + if (rc) + dev_notice_ratelimited(hdev->dev, + "Timeout when waiting for MMU cache invalidation\n"); + + mutex_unlock(&hdev->mmu_cache_lock); +} + +static void gaudi_mmu_invalidate_cache_range(struct hl_device *hdev, + bool is_hard, u32 asid, u64 va, u64 size) +{ + struct gaudi_device *gaudi = hdev->asic_specific; + u32 status, timeout_usec; + u32 inv_data; + u32 pi; + int rc; + + if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) || + hdev->hard_reset_pending) + return; + + mutex_lock(&hdev->mmu_cache_lock); + + if (hdev->pldm) + timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC; + else + timeout_usec = MMU_CONFIG_TIMEOUT_USEC; + + /* + * TODO: currently invalidate entire L0 & L1 as in regular hard + * invalidation. Need to apply invalidation of specific cache + * lines with mask of ASID & VA & size. + * Note that L1 with be flushed entirely in any case. + */ + + /* L0 & L1 invalidation */ + inv_data = RREG32(mmSTLB_CACHE_INV); + /* PI is 8 bit */ + pi = ((inv_data & STLB_CACHE_INV_PRODUCER_INDEX_MASK) + 1) & 0xFF; + WREG32(mmSTLB_CACHE_INV, + (inv_data & STLB_CACHE_INV_INDEX_MASK_MASK) | pi); + + rc = hl_poll_timeout( + hdev, + mmSTLB_INV_CONSUMER_INDEX, + status, + status == pi, + 1000, + timeout_usec); + + if (rc) + dev_notice_ratelimited(hdev->dev, + "Timeout when waiting for MMU cache invalidation\n"); + + mutex_unlock(&hdev->mmu_cache_lock); +} + +static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, + u32 asid, u64 phys_addr) +{ + u32 status, timeout_usec; + int rc; + + if (hdev->pldm) + timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC; + else + timeout_usec = MMU_CONFIG_TIMEOUT_USEC; + + WREG32(MMU_ASID, asid); + WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT); + WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT); + WREG32(MMU_BUSY, 0x80000000); + + rc = hl_poll_timeout( + hdev, + MMU_BUSY, + status, + !(status & 0x80000000), + 1000, + timeout_usec); + + if (rc) { + dev_err(hdev->dev, + "Timeout during MMU hop0 config of asid %d\n", asid); + return rc; + } + + return 0; +} + +static int gaudi_send_heartbeat(struct hl_device *hdev) +{ + struct gaudi_device *gaudi = hdev->asic_specific; + + if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) + return 0; + + return hl_fw_send_heartbeat(hdev); +} + +static int gaudi_armcp_info_get(struct hl_device *hdev) +{ + struct gaudi_device *gaudi = hdev->asic_specific; + struct asic_fixed_properties *prop = &hdev->asic_prop; + int rc; + + if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) + return 0; + + rc = hl_fw_armcp_info_get(hdev); + if (rc) + return rc; + + if (!strlen(prop->armcp_info.card_name)) + strncpy(prop->armcp_info.card_name, GAUDI_DEFAULT_CARD_NAME, + CARD_NAME_MAX_LEN); + + return 0; +} + +static bool gaudi_is_device_idle(struct hl_device *hdev, u32 *mask, + struct seq_file *s) +{ + struct gaudi_device *gaudi = hdev->asic_specific; + const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n"; + const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n"; + u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts; + bool is_idle = true, is_eng_idle, is_slave; + u64 offset; + int i, dma_id; + + mutex_lock(&gaudi->clk_gate_mutex); + + hdev->asic_funcs->disable_clock_gating(hdev); + + if (s) + seq_puts(s, + "\nDMA is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_STS0\n" + "--- ------- ------------ ---------- -------------\n"); + + for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) { + dma_id = gaudi_dma_assignment[i]; + offset = dma_id * DMA_QMAN_OFFSET; + + qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset); + qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset); + dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset); + is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) && + IS_DMA_IDLE(dma_core_sts0); + is_idle &= is_eng_idle; + + if (mask) + *mask |= !is_eng_idle << + (GAUDI_ENGINE_ID_DMA_0 + dma_id); + if (s) + seq_printf(s, fmt, dma_id, + is_eng_idle ? "Y" : "N", qm_glbl_sts0, + qm_cgm_sts, dma_core_sts0); + } + + if (s) + seq_puts(s, + "\nTPC is_idle QM_GLBL_STS0 QM_CGM_STS CFG_STATUS\n" + "--- ------- ------------ ---------- ----------\n"); + + for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) { + offset = i * TPC_QMAN_OFFSET; + qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset); + qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset); + tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset); + is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) && + IS_TPC_IDLE(tpc_cfg_sts); + is_idle &= is_eng_idle; + + if (mask) + *mask |= !is_eng_idle << (GAUDI_ENGINE_ID_TPC_0 + i); + if (s) + seq_printf(s, fmt, i, + is_eng_idle ? "Y" : "N", + qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts); + } + + if (s) + seq_puts(s, + "\nMME is_idle QM_GLBL_STS0 QM_CGM_STS ARCH_STATUS\n" + "--- ------- ------------ ---------- -----------\n"); + + for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) { + offset = i * MME_QMAN_OFFSET; + mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset); + is_eng_idle = IS_MME_IDLE(mme_arch_sts); + + /* MME 1 & 3 are slaves, no need to check their QMANs */ + is_slave = i % 2; + if (!is_slave) { + qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset); + qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset); + is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts); + } + + is_idle &= is_eng_idle; + + if (mask) + *mask |= !is_eng_idle << (GAUDI_ENGINE_ID_MME_0 + i); + if (s) { + if (!is_slave) + seq_printf(s, fmt, i, + is_eng_idle ? "Y" : "N", + qm_glbl_sts0, qm_cgm_sts, mme_arch_sts); + else + seq_printf(s, mme_slave_fmt, i, + is_eng_idle ? "Y" : "N", "-", + "-", mme_arch_sts); + } + } + + if (s) + seq_puts(s, "\n"); + + hdev->asic_funcs->enable_clock_gating(hdev); + + mutex_unlock(&gaudi->clk_gate_mutex); + + return is_idle; +} + +static void gaudi_hw_queues_lock(struct hl_device *hdev) + __acquires(&gaudi->hw_queues_lock) +{ + struct gaudi_device *gaudi = hdev->asic_specific; + + spin_lock(&gaudi->hw_queues_lock); +} + +static void gaudi_hw_queues_unlock(struct hl_device *hdev) + __releases(&gaudi->hw_queues_lock) +{ + struct gaudi_device *gaudi = hdev->asic_specific; + + spin_unlock(&gaudi->hw_queues_lock); +} + +static u32 gaudi_get_pci_id(struct hl_device *hdev) +{ + return hdev->pdev->device; +} + +static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data, + size_t max_size) +{ + struct gaudi_device *gaudi = hdev->asic_specific; + + if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) + return 0; + + return hl_fw_get_eeprom_data(hdev, data, max_size); +} + +/* + * this function should be used only during initialization and/or after reset, + * when there are no active users. + */ +static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, + u32 tpc_id) +{ + struct gaudi_device *gaudi = hdev->asic_specific; + u64 kernel_timeout; + u32 status, offset; + int rc; + + offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS); + + if (hdev->pldm) + kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC; + else + kernel_timeout = HL_DEVICE_TIMEOUT_USEC; + + mutex_lock(&gaudi->clk_gate_mutex); + + hdev->asic_funcs->disable_clock_gating(hdev); + + WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset, + lower_32_bits(tpc_kernel)); + WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset, + upper_32_bits(tpc_kernel)); + + WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset, + lower_32_bits(tpc_kernel)); + WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset, + upper_32_bits(tpc_kernel)); + /* set a valid LUT pointer, content is of no significance */ + WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset, + lower_32_bits(tpc_kernel)); + WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset, + upper_32_bits(tpc_kernel)); + + WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset, + lower_32_bits(CFG_BASE + + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0)); + + WREG32(mmTPC0_CFG_TPC_CMD + offset, + (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT | + 1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT)); + /* wait a bit for the engine to start executing */ + usleep_range(1000, 1500); + + /* wait until engine has finished executing */ + rc = hl_poll_timeout( + hdev, + mmTPC0_CFG_STATUS + offset, + status, + (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) == + TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK, + 1000, + kernel_timeout); + + if (rc) { + dev_err(hdev->dev, + "Timeout while waiting for TPC%d icache prefetch\n", + tpc_id); + hdev->asic_funcs->enable_clock_gating(hdev); + mutex_unlock(&gaudi->clk_gate_mutex); + return -EIO; + } + + WREG32(mmTPC0_CFG_TPC_EXECUTE + offset, + 1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT); + + /* wait a bit for the engine to start executing */ + usleep_range(1000, 1500); + + /* wait until engine has finished executing */ + rc = hl_poll_timeout( + hdev, + mmTPC0_CFG_STATUS + offset, + status, + (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) == + TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK, + 1000, + kernel_timeout); + + rc = hl_poll_timeout( + hdev, + mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset, + status, + (status == 0), + 1000, + kernel_timeout); + + hdev->asic_funcs->enable_clock_gating(hdev); + mutex_unlock(&gaudi->clk_gate_mutex); + + if (rc) { + dev_err(hdev->dev, + "Timeout while waiting for TPC%d kernel to execute\n", + tpc_id); + return -EIO; + } + + return 0; +} + +static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev) +{ + return RREG32(mmHW_STATE); +} + +static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx) +{ + return gaudi_cq_assignment[cq_idx]; +} + +static void gaudi_ext_queue_init(struct hl_device *hdev, u32 q_idx) +{ + struct gaudi_device *gaudi = hdev->asic_specific; + struct hl_hw_queue *hw_queue = &hdev->kernel_queues[q_idx]; + struct hl_hw_sob *hw_sob; + int sob, ext_idx = gaudi->ext_queue_idx++; + + /* + * The external queues might not sit sequentially, hence use the + * real external queue index for the SOB/MON base id. + */ + hw_queue->base_sob_id = ext_idx * HL_RSVD_SOBS; + hw_queue->base_mon_id = ext_idx * HL_RSVD_MONS; + hw_queue->next_sob_val = 1; + hw_queue->curr_sob_offset = 0; + + for (sob = 0 ; sob < HL_RSVD_SOBS ; sob++) { + hw_sob = &hw_queue->hw_sob[sob]; + hw_sob->hdev = hdev; + hw_sob->sob_id = hw_queue->base_sob_id + sob; + hw_sob->q_idx = q_idx; + kref_init(&hw_sob->kref); + } +} + +static void gaudi_ext_queue_reset(struct hl_device *hdev, u32 q_idx) +{ + struct hl_hw_queue *hw_queue = &hdev->kernel_queues[q_idx]; + + /* + * In case we got here due to a stuck CS, the refcnt might be bigger + * than 1 and therefore we reset it. + */ + kref_init(&hw_queue->hw_sob[hw_queue->curr_sob_offset].kref); + hw_queue->curr_sob_offset = 0; + hw_queue->next_sob_val = 1; +} + +static u32 gaudi_get_signal_cb_size(struct hl_device *hdev) +{ + return sizeof(struct packet_msg_short) + + sizeof(struct packet_msg_prot) * 2; +} + +static u32 gaudi_get_wait_cb_size(struct hl_device *hdev) +{ + return sizeof(struct packet_msg_short) * 4 + + sizeof(struct packet_fence) + + sizeof(struct packet_msg_prot) * 2; +} + +static void gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id) +{ + struct hl_cb *cb = (struct hl_cb *) data; + struct packet_msg_short *pkt; + u32 value, ctl; + + pkt = (struct packet_msg_short *) (uintptr_t) cb->kernel_address; + memset(pkt, 0, sizeof(*pkt)); + + value = 1 << GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_SHIFT; /* inc by 1 */ + value |= 1 << GAUDI_PKT_SHORT_VAL_SOB_MOD_SHIFT; /* add mode */ + + ctl = (sob_id * 4) << GAUDI_PKT_SHORT_CTL_ADDR_SHIFT; /* SOB id */ + ctl |= 0 << GAUDI_PKT_SHORT_CTL_OP_SHIFT; /* write the value */ + ctl |= 3 << GAUDI_PKT_SHORT_CTL_BASE_SHIFT; /* W_S SOB base */ + ctl |= PACKET_MSG_SHORT << GAUDI_PKT_SHORT_CTL_OPCODE_SHIFT; + ctl |= 1 << GAUDI_PKT_SHORT_CTL_EB_SHIFT; + ctl |= 1 << GAUDI_PKT_SHORT_CTL_RB_SHIFT; + ctl |= 1 << GAUDI_PKT_SHORT_CTL_MB_SHIFT; + + pkt->value = cpu_to_le32(value); + pkt->ctl = cpu_to_le32(ctl); +} + +static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value, + u16 addr) +{ + u32 ctl, pkt_size = sizeof(*pkt); + + memset(pkt, 0, pkt_size); + + ctl = addr << GAUDI_PKT_SHORT_CTL_ADDR_SHIFT; + ctl |= 2 << GAUDI_PKT_SHORT_CTL_BASE_SHIFT; /* W_S MON base */ + ctl |= PACKET_MSG_SHORT << GAUDI_PKT_SHORT_CTL_OPCODE_SHIFT; + ctl |= 0 << GAUDI_PKT_SHORT_CTL_EB_SHIFT; + ctl |= 1 << GAUDI_PKT_SHORT_CTL_RB_SHIFT; + ctl |= 0 << GAUDI_PKT_SHORT_CTL_MB_SHIFT; /* only last pkt needs MB */ + + pkt->value = cpu_to_le32(value); + pkt->ctl = cpu_to_le32(ctl); + + return pkt_size; +} + +static u32 gaudi_add_arm_monitor_pkt(struct packet_msg_short *pkt, u16 sob_id, + u16 sob_val, u16 addr) +{ + u32 ctl, value, pkt_size = sizeof(*pkt); + u8 mask = ~(1 << (sob_id & 0x7)); + + memset(pkt, 0, pkt_size); + + value = (sob_id / 8) << GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_SHIFT; + value |= sob_val << GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_SHIFT; + value |= 0 << GAUDI_PKT_SHORT_VAL_MON_MODE_SHIFT; /* GREATER_OR_EQUAL */ + value |= mask << GAUDI_PKT_SHORT_VAL_MON_MASK_SHIFT; + + ctl = addr << GAUDI_PKT_SHORT_CTL_ADDR_SHIFT; + ctl |= 0 << GAUDI_PKT_SHORT_CTL_OP_SHIFT; /* write the value */ + ctl |= 2 << GAUDI_PKT_SHORT_CTL_BASE_SHIFT; /* W_S MON base */ + ctl |= PACKET_MSG_SHORT << GAUDI_PKT_SHORT_CTL_OPCODE_SHIFT; + ctl |= 0 << GAUDI_PKT_SHORT_CTL_EB_SHIFT; + ctl |= 1 << GAUDI_PKT_SHORT_CTL_RB_SHIFT; + ctl |= 1 << GAUDI_PKT_SHORT_CTL_MB_SHIFT; + + pkt->value = cpu_to_le32(value); + pkt->ctl = cpu_to_le32(ctl); + + return pkt_size; +} + +static u32 gaudi_add_fence_pkt(struct packet_fence *pkt) +{ + u32 ctl, cfg, pkt_size = sizeof(*pkt); + + memset(pkt, 0, pkt_size); + + cfg = 1 << GAUDI_PKT_FENCE_CFG_DEC_VAL_SHIFT; + cfg |= 1 << GAUDI_PKT_FENCE_CFG_TARGET_VAL_SHIFT; + cfg |= 2 << GAUDI_PKT_FENCE_CFG_ID_SHIFT; + + ctl = 0 << GAUDI_PKT_FENCE_CTL_PRED_SHIFT; + ctl |= PACKET_FENCE << GAUDI_PKT_FENCE_CTL_OPCODE_SHIFT; + ctl |= 0 << GAUDI_PKT_FENCE_CTL_EB_SHIFT; + ctl |= 1 << GAUDI_PKT_FENCE_CTL_RB_SHIFT; + ctl |= 1 << GAUDI_PKT_FENCE_CTL_MB_SHIFT; + + pkt->cfg = cpu_to_le32(cfg); + pkt->ctl = cpu_to_le32(ctl); + + return pkt_size; +} + +static void gaudi_gen_wait_cb(struct hl_device *hdev, void *data, u16 sob_id, + u16 sob_val, u16 mon_id, u32 q_idx) +{ + struct hl_cb *cb = (struct hl_cb *) data; + void *buf = (void *) (uintptr_t) cb->kernel_address; + u64 monitor_base, fence_addr = 0; + u32 size = 0; + u16 msg_addr_offset; + + switch (q_idx) { + case GAUDI_QUEUE_ID_DMA_0_0: + fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_0; + break; + case GAUDI_QUEUE_ID_DMA_0_1: + fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_1; + break; + case GAUDI_QUEUE_ID_DMA_0_2: + fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_2; + break; + case GAUDI_QUEUE_ID_DMA_0_3: + fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_3; + break; + case GAUDI_QUEUE_ID_DMA_1_0: + fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_0; + break; + case GAUDI_QUEUE_ID_DMA_1_1: + fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_1; + break; + case GAUDI_QUEUE_ID_DMA_1_2: + fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_2; + break; + case GAUDI_QUEUE_ID_DMA_1_3: + fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_3; + break; + case GAUDI_QUEUE_ID_DMA_5_0: + fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_0; + break; + case GAUDI_QUEUE_ID_DMA_5_1: + fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_1; + break; + case GAUDI_QUEUE_ID_DMA_5_2: + fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_2; + break; + case GAUDI_QUEUE_ID_DMA_5_3: + fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_3; + break; + default: + /* queue index should be valid here */ + dev_crit(hdev->dev, "wrong queue id %d for wait packet\n", + q_idx); + return; + } + + fence_addr += CFG_BASE; + + /* + * monitor_base should be the content of the base0 address registers, + * so it will be added to the msg short offsets + */ + monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0; + + /* First monitor config packet: low address of the sync */ + msg_addr_offset = + (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) - + monitor_base; + + size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr, + msg_addr_offset); + + /* Second monitor config packet: high address of the sync */ + msg_addr_offset = + (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) - + monitor_base; + + size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32), + msg_addr_offset); + + /* + * Third monitor config packet: the payload, i.e. what to write when the + * sync triggers + */ + msg_addr_offset = + (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) - + monitor_base; + + size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset); + + /* Fourth monitor config packet: bind the monitor to a sync object */ + msg_addr_offset = + (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) - + monitor_base; + size += gaudi_add_arm_monitor_pkt(buf + size, sob_id, sob_val, + msg_addr_offset); + + /* Fence packet */ + size += gaudi_add_fence_pkt(buf + size); +} + +static void gaudi_reset_sob(struct hl_device *hdev, void *data) +{ + struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data; + + dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx, + hw_sob->sob_id); + + WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + hw_sob->sob_id * 4, + 0); + + kref_init(&hw_sob->kref); +} + +static void gaudi_set_dma_mask_from_fw(struct hl_device *hdev) +{ + if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0) == + HL_POWER9_HOST_MAGIC) { + hdev->power9_64bit_dma_enable = 1; + hdev->dma_mask = 64; + } else { + hdev->power9_64bit_dma_enable = 0; + hdev->dma_mask = 48; + } +} + +static u64 gaudi_get_device_time(struct hl_device *hdev) +{ + u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32; + + return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL); +} + +static const struct hl_asic_funcs gaudi_funcs = { + .early_init = gaudi_early_init, + .early_fini = gaudi_early_fini, + .late_init = gaudi_late_init, + .late_fini = gaudi_late_fini, + .sw_init = gaudi_sw_init, + .sw_fini = gaudi_sw_fini, + .hw_init = gaudi_hw_init, + .hw_fini = gaudi_hw_fini, + .halt_engines = gaudi_halt_engines, + .suspend = gaudi_suspend, + .resume = gaudi_resume, + .cb_mmap = gaudi_cb_mmap, + .ring_doorbell = gaudi_ring_doorbell, + .pqe_write = gaudi_pqe_write, + .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent, + .asic_dma_free_coherent = gaudi_dma_free_coherent, + .get_int_queue_base = gaudi_get_int_queue_base, + .test_queues = gaudi_test_queues, + .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc, + .asic_dma_pool_free = gaudi_dma_pool_free, + .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc, + .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free, + .hl_dma_unmap_sg = gaudi_dma_unmap_sg, + .cs_parser = gaudi_cs_parser, + .asic_dma_map_sg = gaudi_dma_map_sg, + .get_dma_desc_list_size = gaudi_get_dma_desc_list_size, + .add_end_of_cb_packets = gaudi_add_end_of_cb_packets, + .update_eq_ci = gaudi_update_eq_ci, + .context_switch = gaudi_context_switch, + .restore_phase_topology = gaudi_restore_phase_topology, + .debugfs_read32 = gaudi_debugfs_read32, + .debugfs_write32 = gaudi_debugfs_write32, + .debugfs_read64 = gaudi_debugfs_read64, + .debugfs_write64 = gaudi_debugfs_write64, + .add_device_attr = NULL, + .handle_eqe = gaudi_handle_eqe, + .set_pll_profile = NULL, + .get_events_stat = gaudi_get_events_stat, + .read_pte = gaudi_read_pte, + .write_pte = gaudi_write_pte, + .mmu_invalidate_cache = gaudi_mmu_invalidate_cache, + .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range, + .send_heartbeat = gaudi_send_heartbeat, + .enable_clock_gating = gaudi_enable_clock_gating, + .disable_clock_gating = gaudi_disable_clock_gating, + .debug_coresight = NULL, + .is_device_idle = gaudi_is_device_idle, + .soft_reset_late_init = gaudi_soft_reset_late_init, + .hw_queues_lock = gaudi_hw_queues_lock, + .hw_queues_unlock = gaudi_hw_queues_unlock, + .get_pci_id = gaudi_get_pci_id, + .get_eeprom_data = gaudi_get_eeprom_data, + .send_cpu_message = gaudi_send_cpu_message, + .get_hw_state = gaudi_get_hw_state, + .pci_bars_map = gaudi_pci_bars_map, + .set_dram_bar_base = gaudi_set_hbm_bar_base, + .init_iatu = gaudi_init_iatu, + .rreg = hl_rreg, + .wreg = hl_wreg, + .halt_coresight = NULL, + .get_clk_rate = NULL, + .get_queue_id_for_cq = gaudi_get_queue_id_for_cq, + .read_device_fw_version = gaudi_read_device_fw_version, + .load_firmware_to_device = gaudi_load_firmware_to_device, + .load_boot_fit_to_device = gaudi_load_boot_fit_to_device, + .ext_queue_init = gaudi_ext_queue_init, + .ext_queue_reset = gaudi_ext_queue_reset, + .get_signal_cb_size = gaudi_get_signal_cb_size, + .get_wait_cb_size = gaudi_get_wait_cb_size, + .gen_signal_cb = gaudi_gen_signal_cb, + .gen_wait_cb = gaudi_gen_wait_cb, + .reset_sob = gaudi_reset_sob, + .set_dma_mask_from_fw = gaudi_set_dma_mask_from_fw, + .get_device_time = gaudi_get_device_time +}; + +/** + * gaudi_set_asic_funcs - set GAUDI function pointers + * + * @*hdev: pointer to hl_device structure + * + */ +void gaudi_set_asic_funcs(struct hl_device *hdev) +{ + hdev->asic_funcs = &gaudi_funcs; +} diff --git a/drivers/misc/habanalabs/gaudi/gaudiP.h b/drivers/misc/habanalabs/gaudi/gaudiP.h new file mode 100644 index 0000000000000000000000000000000000000000..4fe66b8e19685d8415b6cdebc6d81095a2044960 --- /dev/null +++ b/drivers/misc/habanalabs/gaudi/gaudiP.h @@ -0,0 +1,251 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * Copyright 2019-2020 HabanaLabs, Ltd. + * All Rights Reserved. + * + */ + +#ifndef GAUDIP_H_ +#define GAUDIP_H_ + +#include +#include "habanalabs.h" +#include "include/hl_boot_if.h" +#include "include/gaudi/gaudi_packets.h" +#include "include/gaudi/gaudi.h" +#include "include/gaudi/gaudi_async_events.h" + +#define NUMBER_OF_EXT_HW_QUEUES 12 +#define NUMBER_OF_CMPLT_QUEUES NUMBER_OF_EXT_HW_QUEUES +#define NUMBER_OF_CPU_HW_QUEUES 1 +#define NUMBER_OF_INT_HW_QUEUES 100 +#define NUMBER_OF_HW_QUEUES (NUMBER_OF_EXT_HW_QUEUES + \ + NUMBER_OF_CPU_HW_QUEUES + \ + NUMBER_OF_INT_HW_QUEUES) + +/* + * Number of MSI interrupts IDS: + * Each completion queue has 1 ID + * The event queue has 1 ID + */ +#define NUMBER_OF_INTERRUPTS (NUMBER_OF_CMPLT_QUEUES + \ + NUMBER_OF_CPU_HW_QUEUES) + +#if (NUMBER_OF_INTERRUPTS > GAUDI_MSI_ENTRIES) +#error "Number of MSI interrupts must be smaller or equal to GAUDI_MSI_ENTRIES" +#endif + +#define QMAN_FENCE_TIMEOUT_USEC 10000 /* 10 ms */ + +#define CORESIGHT_TIMEOUT_USEC 100000 /* 100 ms */ + +#define GAUDI_MAX_CLK_FREQ 2200000000ull /* 2200 MHz */ + +#define MAX_POWER_DEFAULT 200000 /* 200W */ + +#define GAUDI_CPU_TIMEOUT_USEC 15000000 /* 15s */ + +#define TPC_ENABLED_MASK 0xFF + +#define GAUDI_HBM_SIZE_32GB 0x800000000ull +#define GAUDI_HBM_DEVICES 4 +#define GAUDI_HBM_CHANNELS 8 +#define GAUDI_HBM_CFG_BASE (mmHBM0_BASE - CFG_BASE) +#define GAUDI_HBM_CFG_OFFSET (mmHBM1_BASE - mmHBM0_BASE) + +#define DMA_MAX_TRANSFER_SIZE U32_MAX + +#define GAUDI_DEFAULT_CARD_NAME "HL2000" + +#define PCI_DMA_NUMBER_OF_CHNLS 3 +#define HBM_DMA_NUMBER_OF_CHNLS 5 +#define DMA_NUMBER_OF_CHNLS (PCI_DMA_NUMBER_OF_CHNLS + \ + HBM_DMA_NUMBER_OF_CHNLS) + +#define MME_NUMBER_OF_SLAVE_ENGINES 2 +#define MME_NUMBER_OF_ENGINES (MME_NUMBER_OF_MASTER_ENGINES + \ + MME_NUMBER_OF_SLAVE_ENGINES) +#define MME_NUMBER_OF_QMANS (MME_NUMBER_OF_MASTER_ENGINES * \ + QMAN_STREAMS) + +#define QMAN_STREAMS 4 + +#define DMA_QMAN_OFFSET (mmDMA1_QM_BASE - mmDMA0_QM_BASE) +#define TPC_QMAN_OFFSET (mmTPC1_QM_BASE - mmTPC0_QM_BASE) +#define MME_QMAN_OFFSET (mmMME1_QM_BASE - mmMME0_QM_BASE) +#define NIC_MACRO_QMAN_OFFSET (mmNIC1_QM0_BASE - mmNIC0_QM0_BASE) + +#define TPC_CFG_OFFSET (mmTPC1_CFG_BASE - mmTPC0_CFG_BASE) + +#define DMA_CORE_OFFSET (mmDMA1_CORE_BASE - mmDMA0_CORE_BASE) + +#define SIF_RTR_CTRL_OFFSET (mmSIF_RTR_CTRL_1_BASE - mmSIF_RTR_CTRL_0_BASE) + +#define NIF_RTR_CTRL_OFFSET (mmNIF_RTR_CTRL_1_BASE - mmNIF_RTR_CTRL_0_BASE) + +#define MME_ACC_OFFSET (mmMME1_ACC_BASE - mmMME0_ACC_BASE) +#define SRAM_BANK_OFFSET (mmSRAM_Y0_X1_RTR_BASE - mmSRAM_Y0_X0_RTR_BASE) + +#define NUM_OF_SOB_IN_BLOCK \ + (((mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_2047 - \ + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0) + 4) >> 2) + +#define NUM_OF_MONITORS_IN_BLOCK \ + (((mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_511 - \ + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0) + 4) >> 2) + + +/* DRAM Memory Map */ + +#define CPU_FW_IMAGE_SIZE 0x10000000 /* 256MB */ +#define MMU_PAGE_TABLES_SIZE 0x0BF00000 /* 191MB */ +#define MMU_CACHE_MNG_SIZE 0x00100000 /* 1MB */ +#define RESERVED 0x04000000 /* 64MB */ + +#define CPU_FW_IMAGE_ADDR DRAM_PHYS_BASE +#define MMU_PAGE_TABLES_ADDR (CPU_FW_IMAGE_ADDR + CPU_FW_IMAGE_SIZE) +#define MMU_CACHE_MNG_ADDR (MMU_PAGE_TABLES_ADDR + MMU_PAGE_TABLES_SIZE) + +#define DRAM_DRIVER_END_ADDR (MMU_CACHE_MNG_ADDR + MMU_CACHE_MNG_SIZE +\ + RESERVED) + +#define DRAM_BASE_ADDR_USER 0x20000000 + +#if (DRAM_DRIVER_END_ADDR > DRAM_BASE_ADDR_USER) +#error "Driver must reserve no more than 512MB" +#endif + +/* Internal QMANs PQ sizes */ + +#define MME_QMAN_LENGTH 64 +#define MME_QMAN_SIZE_IN_BYTES (MME_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE) + +#define HBM_DMA_QMAN_LENGTH 64 +#define HBM_DMA_QMAN_SIZE_IN_BYTES \ + (HBM_DMA_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE) + +#define TPC_QMAN_LENGTH 64 +#define TPC_QMAN_SIZE_IN_BYTES (TPC_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE) + +#define SRAM_USER_BASE_OFFSET GAUDI_DRIVER_SRAM_RESERVED_SIZE_FROM_START + +/* Virtual address space */ +#define VA_HOST_SPACE_START 0x1000000000000ull /* 256TB */ +#define VA_HOST_SPACE_END 0x3FF8000000000ull /* 1PB - 1TB */ +#define VA_HOST_SPACE_SIZE (VA_HOST_SPACE_END - \ + VA_HOST_SPACE_START) /* 767TB */ + +#define HW_CAP_PLL 0x00000001 +#define HW_CAP_HBM 0x00000002 +#define HW_CAP_MMU 0x00000004 +#define HW_CAP_MME 0x00000008 +#define HW_CAP_CPU 0x00000010 +#define HW_CAP_PCI_DMA 0x00000020 +#define HW_CAP_MSI 0x00000040 +#define HW_CAP_CPU_Q 0x00000080 +#define HW_CAP_HBM_DMA 0x00000100 +#define HW_CAP_CLK_GATE 0x00000200 +#define HW_CAP_SRAM_SCRAMBLER 0x00000400 +#define HW_CAP_HBM_SCRAMBLER 0x00000800 + +#define HW_CAP_TPC0 0x01000000 +#define HW_CAP_TPC1 0x02000000 +#define HW_CAP_TPC2 0x04000000 +#define HW_CAP_TPC3 0x08000000 +#define HW_CAP_TPC4 0x10000000 +#define HW_CAP_TPC5 0x20000000 +#define HW_CAP_TPC6 0x40000000 +#define HW_CAP_TPC7 0x80000000 +#define HW_CAP_TPC_MASK 0xFF000000 +#define HW_CAP_TPC_SHIFT 24 + +#define GAUDI_CPU_PCI_MSB_ADDR(addr) (((addr) & GENMASK_ULL(49, 39)) >> 39) +#define GAUDI_PCI_TO_CPU_ADDR(addr) \ + do { \ + (addr) &= ~GENMASK_ULL(49, 39); \ + (addr) |= BIT_ULL(39); \ + } while (0) +#define GAUDI_CPU_TO_PCI_ADDR(addr, extension) \ + do { \ + (addr) &= ~GENMASK_ULL(49, 39); \ + (addr) |= (u64) (extension) << 39; \ + } while (0) + +enum gaudi_dma_channels { + GAUDI_PCI_DMA_1, + GAUDI_PCI_DMA_2, + GAUDI_PCI_DMA_3, + GAUDI_HBM_DMA_1, + GAUDI_HBM_DMA_2, + GAUDI_HBM_DMA_3, + GAUDI_HBM_DMA_4, + GAUDI_HBM_DMA_5, + GAUDI_DMA_MAX +}; + +enum gaudi_tpc_mask { + GAUDI_TPC_MASK_TPC0 = 0x01, + GAUDI_TPC_MASK_TPC1 = 0x02, + GAUDI_TPC_MASK_TPC2 = 0x04, + GAUDI_TPC_MASK_TPC3 = 0x08, + GAUDI_TPC_MASK_TPC4 = 0x10, + GAUDI_TPC_MASK_TPC5 = 0x20, + GAUDI_TPC_MASK_TPC6 = 0x40, + GAUDI_TPC_MASK_TPC7 = 0x80, + GAUDI_TPC_MASK_ALL = 0xFF +}; + +/** + * struct gaudi_internal_qman_info - Internal QMAN information. + * @pq_kernel_addr: Kernel address of the PQ memory area in the host. + * @pq_dma_addr: DMA address of the PQ memory area in the host. + * @pq_size: Size of allocated host memory for PQ. + */ +struct gaudi_internal_qman_info { + void *pq_kernel_addr; + dma_addr_t pq_dma_addr; + size_t pq_size; +}; + +/** + * struct gaudi_device - ASIC specific manage structure. + * @armcp_info_get: get information on device from ArmCP + * @hw_queues_lock: protects the H/W queues from concurrent access. + * @clk_gate_mutex: protects code areas that require clock gating to be disabled + * temporarily + * @internal_qmans: Internal QMANs information. The array size is larger than + * the actual number of internal queues because they are not in + * consecutive order. + * @hbm_bar_cur_addr: current address of HBM PCI bar. + * @max_freq_value: current max clk frequency. + * @events_stat: array that holds histogram of all received events. + * @events_stat_aggregate: same as events_stat but doesn't get cleared on reset + * @hw_cap_initialized: This field contains a bit per H/W engine. When that + * engine is initialized, that bit is set by the driver to + * signal we can use this engine in later code paths. + * Each bit is cleared upon reset of its corresponding H/W + * engine. + * @multi_msi_mode: whether we are working in multi MSI single MSI mode. + * Multi MSI is possible only with IOMMU enabled. + * @ext_queue_idx: helper index for external queues initialization. + */ +struct gaudi_device { + int (*armcp_info_get)(struct hl_device *hdev); + + /* TODO: remove hw_queues_lock after moving to scheduler code */ + spinlock_t hw_queues_lock; + struct mutex clk_gate_mutex; + + struct gaudi_internal_qman_info internal_qmans[GAUDI_QUEUE_ID_SIZE]; + + u64 hbm_bar_cur_addr; + u64 max_freq_value; + + u32 events_stat[GAUDI_EVENT_SIZE]; + u32 events_stat_aggregate[GAUDI_EVENT_SIZE]; + u32 hw_cap_initialized; + u8 multi_msi_mode; + u8 ext_queue_idx; +}; + +#endif /* GAUDIP_H_ */ diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h index b1c3a89c7f3861dc9f49e436726fdb70e011a2dd..eb0b8594a210f16ca247878c462f517e1378d520 100644 --- a/drivers/misc/habanalabs/habanalabs.h +++ b/drivers/misc/habanalabs/habanalabs.h @@ -1210,6 +1210,16 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val); #define WREG32_AND(reg, and) WREG32_P(reg, 0, and) #define WREG32_OR(reg, or) WREG32_P(reg, or, ~(or)) +#define RMWREG32(reg, val, mask) \ + do { \ + u32 tmp_ = RREG32(reg); \ + tmp_ &= ~(mask); \ + tmp_ |= ((val) << __ffs(mask)); \ + WREG32(reg, tmp_); \ + } while (0) + +#define RREG32_MASK(reg, mask) ((RREG32(reg) & mask) >> __ffs(mask)) + #define REG_FIELD_SHIFT(reg, field) reg##_##field##_SHIFT #define REG_FIELD_MASK(reg, field) reg##_##field##_MASK #define WREG32_FIELD(reg, offset, field, val) \ @@ -1399,6 +1409,8 @@ struct hl_device_idle_busy_ts { * @idle_busy_ts_idx: index of current entry in idle_busy_ts_arr * @id: device minor. * @id_control: minor of the control device + * @cpu_pci_msb_addr: 50-bit extension bits for the device CPU's 40-bit + * addresses. * @disabled: is device disabled. * @late_init_done: is late init stage was done during initialization. * @hwmon_initialized: is H/W monitor sensors was initialized. @@ -1412,6 +1424,7 @@ struct hl_device_idle_busy_ts { * huge pages. * @init_done: is the initialization of the device done. * @mmu_enable: is MMU enabled. + * @mmu_huge_page_opt: is MMU huge pages optimization enabled. * @clock_gating: is clock gating enabled. * @device_cpu_disabled: is the device CPU disabled (due to timeouts) * @dma_mask: the dma mask that was set for this device @@ -1489,6 +1502,7 @@ struct hl_device { u32 idle_busy_ts_idx; u16 id; u16 id_control; + u16 cpu_pci_msb_addr; u8 disabled; u8 late_init_done; u8 hwmon_initialized; @@ -1503,7 +1517,7 @@ struct hl_device { u8 device_cpu_disabled; u8 dma_mask; u8 in_debug; - u8 power9_64bit_dma_enable; + u8 power9_64bit_dma_enable; u8 cdev_sysfs_created; u8 stop_on_err; u8 supports_sync_stream; @@ -1511,11 +1525,18 @@ struct hl_device { /* Parameters for bring-up */ u8 mmu_enable; + u8 mmu_huge_page_opt; u8 cpu_enable; u8 reset_pcilink; u8 cpu_queues_enable; u8 fw_loading; u8 pldm; + u8 axi_drain; + u8 sram_scrambler_enable; + u8 dram_scrambler_enable; + u8 hard_reset_on_fw_events; + u8 bmc_enable; + u8 rl_enable; }; @@ -1685,6 +1706,7 @@ struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev, void hl_sob_reset_error(struct kref *ref); void goya_set_asic_funcs(struct hl_device *hdev); +void gaudi_set_asic_funcs(struct hl_device *hdev); int hl_vm_ctx_init(struct hl_ctx *ctx); void hl_vm_ctx_fini(struct hl_ctx *ctx); diff --git a/drivers/misc/habanalabs/habanalabs_drv.c b/drivers/misc/habanalabs/habanalabs_drv.c index 822acb5a00bb8ca16e26003b83e56c9c29a42e2c..9d009b50aa0ddc66507667c57977273056af6aac 100644 --- a/drivers/misc/habanalabs/habanalabs_drv.c +++ b/drivers/misc/habanalabs/habanalabs_drv.c @@ -172,6 +172,7 @@ int hl_device_open(struct inode *inode, struct file *filp) put_pid(hpriv->taskpid); kfree(hpriv); + return rc; } @@ -234,6 +235,12 @@ static void set_driver_behavior_per_device(struct hl_device *hdev) hdev->clock_gating = 1; hdev->reset_pcilink = 0; + hdev->axi_drain = 0; + hdev->sram_scrambler_enable = 1; + hdev->dram_scrambler_enable = 1; + hdev->rl_enable = 1; + hdev->bmc_enable = 1; + hdev->hard_reset_on_fw_events = 1; } /* diff --git a/drivers/misc/habanalabs/include/gaudi/gaudi_async_ids_map.h b/drivers/misc/habanalabs/include/gaudi/gaudi_async_ids_map.h new file mode 100644 index 0000000000000000000000000000000000000000..d2959506eeee1a1149271c9d83623dbc4fa376dd --- /dev/null +++ b/drivers/misc/habanalabs/include/gaudi/gaudi_async_ids_map.h @@ -0,0 +1,687 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * Copyright 2018-2020 HabanaLabs, Ltd. + * All Rights Reserved. + * + */ + +/************************************ + ** This is an auto-generated file ** + ** DO NOT EDIT BELOW ** + ************************************/ + +#ifndef __GAUDI_ASYNC_IDS_MAP_EVENTS_H_ +#define __GAUDI_ASYNC_IDS_MAP_EVENTS_H_ + +struct gaudi_async_events_ids_map { + int fc_id; + int cpu_id; +}; + +static struct gaudi_async_events_ids_map gaudi_irq_map_table[] = { + { .fc_id = 0, .cpu_id = 0 }, + { .fc_id = 1, .cpu_id = 1 }, + { .fc_id = 2, .cpu_id = 2 }, + { .fc_id = 3, .cpu_id = 3 }, + { .fc_id = 4, .cpu_id = 4 }, + { .fc_id = 5, .cpu_id = 5 }, + { .fc_id = 6, .cpu_id = 6 }, + { .fc_id = 7, .cpu_id = 7 }, + { .fc_id = 8, .cpu_id = 8 }, + { .fc_id = 9, .cpu_id = 9 }, + { .fc_id = 10, .cpu_id = 10 }, + { .fc_id = 11, .cpu_id = 11 }, + { .fc_id = 12, .cpu_id = 12 }, + { .fc_id = 13, .cpu_id = 13 }, + { .fc_id = 14, .cpu_id = 14 }, + { .fc_id = 15, .cpu_id = 15 }, + { .fc_id = 16, .cpu_id = 16 }, + { .fc_id = 17, .cpu_id = 17 }, + { .fc_id = 18, .cpu_id = 18 }, + { .fc_id = 19, .cpu_id = 19 }, + { .fc_id = 20, .cpu_id = 20 }, + { .fc_id = 21, .cpu_id = 21 }, + { .fc_id = 22, .cpu_id = 22 }, + { .fc_id = 23, .cpu_id = 23 }, + { .fc_id = 24, .cpu_id = 24 }, + { .fc_id = 25, .cpu_id = 25 }, + { .fc_id = 26, .cpu_id = 26 }, + { .fc_id = 27, .cpu_id = 27 }, + { .fc_id = 28, .cpu_id = 28 }, + { .fc_id = 29, .cpu_id = 29 }, + { .fc_id = 30, .cpu_id = 30 }, + { .fc_id = 31, .cpu_id = 31 }, + { .fc_id = 32, .cpu_id = 32 }, + { .fc_id = 33, .cpu_id = 33 }, + { .fc_id = 34, .cpu_id = 34 }, + { .fc_id = 35, .cpu_id = 35 }, + { .fc_id = 36, .cpu_id = 36 }, + { .fc_id = 37, .cpu_id = 37 }, + { .fc_id = 38, .cpu_id = 38 }, + { .fc_id = 39, .cpu_id = 38 }, + { .fc_id = 40, .cpu_id = 38 }, + { .fc_id = 41, .cpu_id = 38 }, + { .fc_id = 42, .cpu_id = 38 }, + { .fc_id = 43, .cpu_id = 38 }, + { .fc_id = 44, .cpu_id = 38 }, + { .fc_id = 45, .cpu_id = 38 }, + { .fc_id = 46, .cpu_id = 39 }, + { .fc_id = 47, .cpu_id = 39 }, + { .fc_id = 48, .cpu_id = 39 }, + { .fc_id = 49, .cpu_id = 39 }, + { .fc_id = 50, .cpu_id = 39 }, + { .fc_id = 51, .cpu_id = 39 }, + { .fc_id = 52, .cpu_id = 39 }, + { .fc_id = 53, .cpu_id = 39 }, + { .fc_id = 54, .cpu_id = 40 }, + { .fc_id = 55, .cpu_id = 41 }, + { .fc_id = 56, .cpu_id = 42 }, + { .fc_id = 57, .cpu_id = 43 }, + { .fc_id = 58, .cpu_id = 44 }, + { .fc_id = 59, .cpu_id = 45 }, + { .fc_id = 60, .cpu_id = 46 }, + { .fc_id = 61, .cpu_id = 47 }, + { .fc_id = 62, .cpu_id = 48 }, + { .fc_id = 63, .cpu_id = 49 }, + { .fc_id = 64, .cpu_id = 50 }, + { .fc_id = 65, .cpu_id = 51 }, + { .fc_id = 66, .cpu_id = 52 }, + { .fc_id = 67, .cpu_id = 53 }, + { .fc_id = 68, .cpu_id = 54 }, + { .fc_id = 69, .cpu_id = 55 }, + { .fc_id = 70, .cpu_id = 56 }, + { .fc_id = 71, .cpu_id = 56 }, + { .fc_id = 72, .cpu_id = 56 }, + { .fc_id = 73, .cpu_id = 56 }, + { .fc_id = 74, .cpu_id = 56 }, + { .fc_id = 75, .cpu_id = 56 }, + { .fc_id = 76, .cpu_id = 56 }, + { .fc_id = 77, .cpu_id = 56 }, + { .fc_id = 78, .cpu_id = 57 }, + { .fc_id = 79, .cpu_id = 57 }, + { .fc_id = 80, .cpu_id = 57 }, + { .fc_id = 81, .cpu_id = 57 }, + { .fc_id = 82, .cpu_id = 57 }, + { .fc_id = 83, .cpu_id = 57 }, + { .fc_id = 84, .cpu_id = 57 }, + { .fc_id = 85, .cpu_id = 57 }, + { .fc_id = 86, .cpu_id = 58 }, + { .fc_id = 87, .cpu_id = 59 }, + { .fc_id = 88, .cpu_id = 60 }, + { .fc_id = 89, .cpu_id = 61 }, + { .fc_id = 90, .cpu_id = 62 }, + { .fc_id = 91, .cpu_id = 63 }, + { .fc_id = 92, .cpu_id = 64 }, + { .fc_id = 93, .cpu_id = 64 }, + { .fc_id = 94, .cpu_id = 64 }, + { .fc_id = 95, .cpu_id = 64 }, + { .fc_id = 96, .cpu_id = 64 }, + { .fc_id = 97, .cpu_id = 64 }, + { .fc_id = 98, .cpu_id = 64 }, + { .fc_id = 99, .cpu_id = 64 }, + { .fc_id = 100, .cpu_id = 64 }, + { .fc_id = 101, .cpu_id = 64 }, + { .fc_id = 102, .cpu_id = 64 }, + { .fc_id = 103, .cpu_id = 64 }, + { .fc_id = 104, .cpu_id = 64 }, + { .fc_id = 105, .cpu_id = 64 }, + { .fc_id = 106, .cpu_id = 64 }, + { .fc_id = 107, .cpu_id = 64 }, + { .fc_id = 108, .cpu_id = 64 }, + { .fc_id = 109, .cpu_id = 64 }, + { .fc_id = 110, .cpu_id = 64 }, + { .fc_id = 111, .cpu_id = 64 }, + { .fc_id = 112, .cpu_id = 64 }, + { .fc_id = 113, .cpu_id = 64 }, + { .fc_id = 114, .cpu_id = 64 }, + { .fc_id = 115, .cpu_id = 64 }, + { .fc_id = 116, .cpu_id = 64 }, + { .fc_id = 117, .cpu_id = 64 }, + { .fc_id = 118, .cpu_id = 64 }, + { .fc_id = 119, .cpu_id = 64 }, + { .fc_id = 120, .cpu_id = 64 }, + { .fc_id = 121, .cpu_id = 64 }, + { .fc_id = 122, .cpu_id = 64 }, + { .fc_id = 123, .cpu_id = 64 }, + { .fc_id = 124, .cpu_id = 65 }, + { .fc_id = 125, .cpu_id = 65 }, + { .fc_id = 126, .cpu_id = 65 }, + { .fc_id = 127, .cpu_id = 65 }, + { .fc_id = 128, .cpu_id = 65 }, + { .fc_id = 129, .cpu_id = 65 }, + { .fc_id = 130, .cpu_id = 65 }, + { .fc_id = 131, .cpu_id = 65 }, + { .fc_id = 132, .cpu_id = 65 }, + { .fc_id = 133, .cpu_id = 65 }, + { .fc_id = 134, .cpu_id = 65 }, + { .fc_id = 135, .cpu_id = 65 }, + { .fc_id = 136, .cpu_id = 65 }, + { .fc_id = 137, .cpu_id = 65 }, + { .fc_id = 138, .cpu_id = 65 }, + { .fc_id = 139, .cpu_id = 65 }, + { .fc_id = 140, .cpu_id = 65 }, + { .fc_id = 141, .cpu_id = 65 }, + { .fc_id = 142, .cpu_id = 65 }, + { .fc_id = 143, .cpu_id = 65 }, + { .fc_id = 144, .cpu_id = 65 }, + { .fc_id = 145, .cpu_id = 65 }, + { .fc_id = 146, .cpu_id = 65 }, + { .fc_id = 147, .cpu_id = 65 }, + { .fc_id = 148, .cpu_id = 65 }, + { .fc_id = 149, .cpu_id = 65 }, + { .fc_id = 150, .cpu_id = 65 }, + { .fc_id = 151, .cpu_id = 65 }, + { .fc_id = 152, .cpu_id = 65 }, + { .fc_id = 153, .cpu_id = 65 }, + { .fc_id = 154, .cpu_id = 65 }, + { .fc_id = 155, .cpu_id = 65 }, + { .fc_id = 156, .cpu_id = 66 }, + { .fc_id = 157, .cpu_id = 66 }, + { .fc_id = 158, .cpu_id = 66 }, + { .fc_id = 159, .cpu_id = 66 }, + { .fc_id = 160, .cpu_id = 66 }, + { .fc_id = 161, .cpu_id = 66 }, + { .fc_id = 162, .cpu_id = 66 }, + { .fc_id = 163, .cpu_id = 66 }, + { .fc_id = 164, .cpu_id = 66 }, + { .fc_id = 165, .cpu_id = 66 }, + { .fc_id = 166, .cpu_id = 67 }, + { .fc_id = 167, .cpu_id = 67 }, + { .fc_id = 168, .cpu_id = 67 }, + { .fc_id = 169, .cpu_id = 67 }, + { .fc_id = 170, .cpu_id = 67 }, + { .fc_id = 171, .cpu_id = 67 }, + { .fc_id = 172, .cpu_id = 67 }, + { .fc_id = 173, .cpu_id = 67 }, + { .fc_id = 174, .cpu_id = 67 }, + { .fc_id = 175, .cpu_id = 67 }, + { .fc_id = 176, .cpu_id = 68 }, + { .fc_id = 177, .cpu_id = 68 }, + { .fc_id = 178, .cpu_id = 68 }, + { .fc_id = 179, .cpu_id = 68 }, + { .fc_id = 180, .cpu_id = 69 }, + { .fc_id = 181, .cpu_id = 69 }, + { .fc_id = 182, .cpu_id = 69 }, + { .fc_id = 183, .cpu_id = 69 }, + { .fc_id = 184, .cpu_id = 70 }, + { .fc_id = 185, .cpu_id = 71 }, + { .fc_id = 186, .cpu_id = 71 }, + { .fc_id = 187, .cpu_id = 71 }, + { .fc_id = 188, .cpu_id = 71 }, + { .fc_id = 189, .cpu_id = 72 }, + { .fc_id = 190, .cpu_id = 72 }, + { .fc_id = 191, .cpu_id = 72 }, + { .fc_id = 192, .cpu_id = 72 }, + { .fc_id = 193, .cpu_id = 73 }, + { .fc_id = 194, .cpu_id = 74 }, + { .fc_id = 195, .cpu_id = 75 }, + { .fc_id = 196, .cpu_id = 76 }, + { .fc_id = 197, .cpu_id = 77 }, + { .fc_id = 198, .cpu_id = 78 }, + { .fc_id = 199, .cpu_id = 79 }, + { .fc_id = 200, .cpu_id = 80 }, + { .fc_id = 201, .cpu_id = 81 }, + { .fc_id = 202, .cpu_id = 82 }, + { .fc_id = 203, .cpu_id = 83 }, + { .fc_id = 204, .cpu_id = 84 }, + { .fc_id = 205, .cpu_id = 85 }, + { .fc_id = 206, .cpu_id = 86 }, + { .fc_id = 207, .cpu_id = 87 }, + { .fc_id = 208, .cpu_id = 88 }, + { .fc_id = 209, .cpu_id = 89 }, + { .fc_id = 210, .cpu_id = 90 }, + { .fc_id = 211, .cpu_id = 91 }, + { .fc_id = 212, .cpu_id = 92 }, + { .fc_id = 213, .cpu_id = 93 }, + { .fc_id = 214, .cpu_id = 94 }, + { .fc_id = 215, .cpu_id = 95 }, + { .fc_id = 216, .cpu_id = 96 }, + { .fc_id = 217, .cpu_id = 97 }, + { .fc_id = 218, .cpu_id = 98 }, + { .fc_id = 219, .cpu_id = 99 }, + { .fc_id = 220, .cpu_id = 100 }, + { .fc_id = 221, .cpu_id = 101 }, + { .fc_id = 222, .cpu_id = 102 }, + { .fc_id = 223, .cpu_id = 103 }, + { .fc_id = 224, .cpu_id = 104 }, + { .fc_id = 225, .cpu_id = 105 }, + { .fc_id = 226, .cpu_id = 106 }, + { .fc_id = 227, .cpu_id = 107 }, + { .fc_id = 228, .cpu_id = 108 }, + { .fc_id = 229, .cpu_id = 109 }, + { .fc_id = 230, .cpu_id = 110 }, + { .fc_id = 231, .cpu_id = 111 }, + { .fc_id = 232, .cpu_id = 112 }, + { .fc_id = 233, .cpu_id = 113 }, + { .fc_id = 234, .cpu_id = 114 }, + { .fc_id = 235, .cpu_id = 115 }, + { .fc_id = 236, .cpu_id = 116 }, + { .fc_id = 237, .cpu_id = 117 }, + { .fc_id = 238, .cpu_id = 118 }, + { .fc_id = 239, .cpu_id = 119 }, + { .fc_id = 240, .cpu_id = 119 }, + { .fc_id = 241, .cpu_id = 119 }, + { .fc_id = 242, .cpu_id = 119 }, + { .fc_id = 243, .cpu_id = 119 }, + { .fc_id = 244, .cpu_id = 119 }, + { .fc_id = 245, .cpu_id = 119 }, + { .fc_id = 246, .cpu_id = 119 }, + { .fc_id = 247, .cpu_id = 119 }, + { .fc_id = 248, .cpu_id = 119 }, + { .fc_id = 249, .cpu_id = 119 }, + { .fc_id = 250, .cpu_id = 119 }, + { .fc_id = 251, .cpu_id = 119 }, + { .fc_id = 252, .cpu_id = 119 }, + { .fc_id = 253, .cpu_id = 119 }, + { .fc_id = 254, .cpu_id = 119 }, + { .fc_id = 255, .cpu_id = 119 }, + { .fc_id = 256, .cpu_id = 119 }, + { .fc_id = 257, .cpu_id = 120 }, + { .fc_id = 258, .cpu_id = 121 }, + { .fc_id = 259, .cpu_id = 122 }, + { .fc_id = 260, .cpu_id = 123 }, + { .fc_id = 261, .cpu_id = 124 }, + { .fc_id = 262, .cpu_id = 125 }, + { .fc_id = 263, .cpu_id = 126 }, + { .fc_id = 264, .cpu_id = 127 }, + { .fc_id = 265, .cpu_id = 127 }, + { .fc_id = 266, .cpu_id = 127 }, + { .fc_id = 267, .cpu_id = 127 }, + { .fc_id = 268, .cpu_id = 127 }, + { .fc_id = 269, .cpu_id = 128 }, + { .fc_id = 270, .cpu_id = 128 }, + { .fc_id = 271, .cpu_id = 128 }, + { .fc_id = 272, .cpu_id = 128 }, + { .fc_id = 273, .cpu_id = 128 }, + { .fc_id = 274, .cpu_id = 128 }, + { .fc_id = 275, .cpu_id = 128 }, + { .fc_id = 276, .cpu_id = 128 }, + { .fc_id = 277, .cpu_id = 129 }, + { .fc_id = 278, .cpu_id = 129 }, + { .fc_id = 279, .cpu_id = 129 }, + { .fc_id = 280, .cpu_id = 129 }, + { .fc_id = 281, .cpu_id = 130 }, + { .fc_id = 282, .cpu_id = 131 }, + { .fc_id = 283, .cpu_id = 132 }, + { .fc_id = 284, .cpu_id = 133 }, + { .fc_id = 285, .cpu_id = 134 }, + { .fc_id = 286, .cpu_id = 135 }, + { .fc_id = 287, .cpu_id = 136 }, + { .fc_id = 288, .cpu_id = 137 }, + { .fc_id = 289, .cpu_id = 138 }, + { .fc_id = 290, .cpu_id = 139 }, + { .fc_id = 291, .cpu_id = 140 }, + { .fc_id = 292, .cpu_id = 141 }, + { .fc_id = 293, .cpu_id = 142 }, + { .fc_id = 294, .cpu_id = 143 }, + { .fc_id = 295, .cpu_id = 144 }, + { .fc_id = 296, .cpu_id = 145 }, + { .fc_id = 297, .cpu_id = 146 }, + { .fc_id = 298, .cpu_id = 147 }, + { .fc_id = 299, .cpu_id = 148 }, + { .fc_id = 300, .cpu_id = 149 }, + { .fc_id = 301, .cpu_id = 150 }, + { .fc_id = 302, .cpu_id = 151 }, + { .fc_id = 303, .cpu_id = 152 }, + { .fc_id = 304, .cpu_id = 153 }, + { .fc_id = 305, .cpu_id = 154 }, + { .fc_id = 306, .cpu_id = 155 }, + { .fc_id = 307, .cpu_id = 156 }, + { .fc_id = 308, .cpu_id = 157 }, + { .fc_id = 309, .cpu_id = 158 }, + { .fc_id = 310, .cpu_id = 159 }, + { .fc_id = 311, .cpu_id = 160 }, + { .fc_id = 312, .cpu_id = 161 }, + { .fc_id = 313, .cpu_id = 162 }, + { .fc_id = 314, .cpu_id = 163 }, + { .fc_id = 315, .cpu_id = 164 }, + { .fc_id = 316, .cpu_id = 165 }, + { .fc_id = 317, .cpu_id = 166 }, + { .fc_id = 318, .cpu_id = 167 }, + { .fc_id = 319, .cpu_id = 168 }, + { .fc_id = 320, .cpu_id = 169 }, + { .fc_id = 321, .cpu_id = 170 }, + { .fc_id = 322, .cpu_id = 171 }, + { .fc_id = 323, .cpu_id = 172 }, + { .fc_id = 324, .cpu_id = 173 }, + { .fc_id = 325, .cpu_id = 174 }, + { .fc_id = 326, .cpu_id = 175 }, + { .fc_id = 327, .cpu_id = 176 }, + { .fc_id = 328, .cpu_id = 177 }, + { .fc_id = 329, .cpu_id = 178 }, + { .fc_id = 330, .cpu_id = 179 }, + { .fc_id = 331, .cpu_id = 180 }, + { .fc_id = 332, .cpu_id = 181 }, + { .fc_id = 333, .cpu_id = 182 }, + { .fc_id = 334, .cpu_id = 183 }, + { .fc_id = 335, .cpu_id = 184 }, + { .fc_id = 336, .cpu_id = 185 }, + { .fc_id = 337, .cpu_id = 186 }, + { .fc_id = 338, .cpu_id = 187 }, + { .fc_id = 339, .cpu_id = 188 }, + { .fc_id = 340, .cpu_id = 189 }, + { .fc_id = 341, .cpu_id = 190 }, + { .fc_id = 342, .cpu_id = 191 }, + { .fc_id = 343, .cpu_id = 192 }, + { .fc_id = 344, .cpu_id = 193 }, + { .fc_id = 345, .cpu_id = 194 }, + { .fc_id = 346, .cpu_id = 195 }, + { .fc_id = 347, .cpu_id = 196 }, + { .fc_id = 348, .cpu_id = 197 }, + { .fc_id = 349, .cpu_id = 198 }, + { .fc_id = 350, .cpu_id = 199 }, + { .fc_id = 351, .cpu_id = 200 }, + { .fc_id = 352, .cpu_id = 201 }, + { .fc_id = 353, .cpu_id = 202 }, + { .fc_id = 354, .cpu_id = 203 }, + { .fc_id = 355, .cpu_id = 204 }, + { .fc_id = 356, .cpu_id = 205 }, + { .fc_id = 357, .cpu_id = 206 }, + { .fc_id = 358, .cpu_id = 207 }, + { .fc_id = 359, .cpu_id = 208 }, + { .fc_id = 360, .cpu_id = 209 }, + { .fc_id = 361, .cpu_id = 210 }, + { .fc_id = 362, .cpu_id = 211 }, + { .fc_id = 363, .cpu_id = 212 }, + { .fc_id = 364, .cpu_id = 213 }, + { .fc_id = 365, .cpu_id = 214 }, + { .fc_id = 366, .cpu_id = 215 }, + { .fc_id = 367, .cpu_id = 216 }, + { .fc_id = 368, .cpu_id = 217 }, + { .fc_id = 369, .cpu_id = 218 }, + { .fc_id = 370, .cpu_id = 219 }, + { .fc_id = 371, .cpu_id = 220 }, + { .fc_id = 372, .cpu_id = 221 }, + { .fc_id = 373, .cpu_id = 222 }, + { .fc_id = 374, .cpu_id = 223 }, + { .fc_id = 375, .cpu_id = 224 }, + { .fc_id = 376, .cpu_id = 225 }, + { .fc_id = 377, .cpu_id = 226 }, + { .fc_id = 378, .cpu_id = 227 }, + { .fc_id = 379, .cpu_id = 228 }, + { .fc_id = 380, .cpu_id = 229 }, + { .fc_id = 381, .cpu_id = 230 }, + { .fc_id = 382, .cpu_id = 231 }, + { .fc_id = 383, .cpu_id = 232 }, + { .fc_id = 384, .cpu_id = 233 }, + { .fc_id = 385, .cpu_id = 234 }, + { .fc_id = 386, .cpu_id = 235 }, + { .fc_id = 387, .cpu_id = 236 }, + { .fc_id = 388, .cpu_id = 237 }, + { .fc_id = 389, .cpu_id = 238 }, + { .fc_id = 390, .cpu_id = 239 }, + { .fc_id = 391, .cpu_id = 240 }, + { .fc_id = 392, .cpu_id = 241 }, + { .fc_id = 393, .cpu_id = 242 }, + { .fc_id = 394, .cpu_id = 243 }, + { .fc_id = 395, .cpu_id = 244 }, + { .fc_id = 396, .cpu_id = 245 }, + { .fc_id = 397, .cpu_id = 246 }, + { .fc_id = 398, .cpu_id = 247 }, + { .fc_id = 399, .cpu_id = 248 }, + { .fc_id = 400, .cpu_id = 249 }, + { .fc_id = 401, .cpu_id = 250 }, + { .fc_id = 402, .cpu_id = 251 }, + { .fc_id = 403, .cpu_id = 252 }, + { .fc_id = 404, .cpu_id = 253 }, + { .fc_id = 405, .cpu_id = 254 }, + { .fc_id = 406, .cpu_id = 255 }, + { .fc_id = 407, .cpu_id = 256 }, + { .fc_id = 408, .cpu_id = 257 }, + { .fc_id = 409, .cpu_id = 258 }, + { .fc_id = 410, .cpu_id = 259 }, + { .fc_id = 411, .cpu_id = 260 }, + { .fc_id = 412, .cpu_id = 261 }, + { .fc_id = 413, .cpu_id = 262 }, + { .fc_id = 414, .cpu_id = 263 }, + { .fc_id = 415, .cpu_id = 264 }, + { .fc_id = 416, .cpu_id = 265 }, + { .fc_id = 417, .cpu_id = 266 }, + { .fc_id = 418, .cpu_id = 267 }, + { .fc_id = 419, .cpu_id = 268 }, + { .fc_id = 420, .cpu_id = 269 }, + { .fc_id = 421, .cpu_id = 270 }, + { .fc_id = 422, .cpu_id = 271 }, + { .fc_id = 423, .cpu_id = 272 }, + { .fc_id = 424, .cpu_id = 273 }, + { .fc_id = 425, .cpu_id = 274 }, + { .fc_id = 426, .cpu_id = 275 }, + { .fc_id = 427, .cpu_id = 276 }, + { .fc_id = 428, .cpu_id = 277 }, + { .fc_id = 429, .cpu_id = 278 }, + { .fc_id = 430, .cpu_id = 279 }, + { .fc_id = 431, .cpu_id = 280 }, + { .fc_id = 432, .cpu_id = 281 }, + { .fc_id = 433, .cpu_id = 282 }, + { .fc_id = 434, .cpu_id = 283 }, + { .fc_id = 435, .cpu_id = 284 }, + { .fc_id = 436, .cpu_id = 285 }, + { .fc_id = 437, .cpu_id = 286 }, + { .fc_id = 438, .cpu_id = 287 }, + { .fc_id = 439, .cpu_id = 288 }, + { .fc_id = 440, .cpu_id = 289 }, + { .fc_id = 441, .cpu_id = 290 }, + { .fc_id = 442, .cpu_id = 291 }, + { .fc_id = 443, .cpu_id = 292 }, + { .fc_id = 444, .cpu_id = 293 }, + { .fc_id = 445, .cpu_id = 294 }, + { .fc_id = 446, .cpu_id = 295 }, + { .fc_id = 447, .cpu_id = 296 }, + { .fc_id = 448, .cpu_id = 297 }, + { .fc_id = 449, .cpu_id = 298 }, + { .fc_id = 450, .cpu_id = 299 }, + { .fc_id = 451, .cpu_id = 300 }, + { .fc_id = 452, .cpu_id = 301 }, + { .fc_id = 453, .cpu_id = 302 }, + { .fc_id = 454, .cpu_id = 303 }, + { .fc_id = 455, .cpu_id = 304 }, + { .fc_id = 456, .cpu_id = 305 }, + { .fc_id = 457, .cpu_id = 306 }, + { .fc_id = 458, .cpu_id = 307 }, + { .fc_id = 459, .cpu_id = 308 }, + { .fc_id = 460, .cpu_id = 309 }, + { .fc_id = 461, .cpu_id = 310 }, + { .fc_id = 462, .cpu_id = 311 }, + { .fc_id = 463, .cpu_id = 312 }, + { .fc_id = 464, .cpu_id = 313 }, + { .fc_id = 465, .cpu_id = 314 }, + { .fc_id = 466, .cpu_id = 315 }, + { .fc_id = 467, .cpu_id = 316 }, + { .fc_id = 468, .cpu_id = 317 }, + { .fc_id = 469, .cpu_id = 318 }, + { .fc_id = 470, .cpu_id = 319 }, + { .fc_id = 471, .cpu_id = 320 }, + { .fc_id = 472, .cpu_id = 321 }, + { .fc_id = 473, .cpu_id = 322 }, + { .fc_id = 474, .cpu_id = 323 }, + { .fc_id = 475, .cpu_id = 324 }, + { .fc_id = 476, .cpu_id = 325 }, + { .fc_id = 477, .cpu_id = 326 }, + { .fc_id = 478, .cpu_id = 327 }, + { .fc_id = 479, .cpu_id = 328 }, + { .fc_id = 480, .cpu_id = 329 }, + { .fc_id = 481, .cpu_id = 330 }, + { .fc_id = 482, .cpu_id = 331 }, + { .fc_id = 483, .cpu_id = 332 }, + { .fc_id = 484, .cpu_id = 333 }, + { .fc_id = 485, .cpu_id = 334 }, + { .fc_id = 486, .cpu_id = 335 }, + { .fc_id = 487, .cpu_id = 336 }, + { .fc_id = 488, .cpu_id = 337 }, + { .fc_id = 489, .cpu_id = 338 }, + { .fc_id = 490, .cpu_id = 339 }, + { .fc_id = 491, .cpu_id = 340 }, + { .fc_id = 492, .cpu_id = 341 }, + { .fc_id = 493, .cpu_id = 342 }, + { .fc_id = 494, .cpu_id = 343 }, + { .fc_id = 495, .cpu_id = 344 }, + { .fc_id = 496, .cpu_id = 345 }, + { .fc_id = 497, .cpu_id = 346 }, + { .fc_id = 498, .cpu_id = 347 }, + { .fc_id = 499, .cpu_id = 348 }, + { .fc_id = 500, .cpu_id = 349 }, + { .fc_id = 501, .cpu_id = 350 }, + { .fc_id = 502, .cpu_id = 351 }, + { .fc_id = 503, .cpu_id = 352 }, + { .fc_id = 504, .cpu_id = 353 }, + { .fc_id = 505, .cpu_id = 354 }, + { .fc_id = 506, .cpu_id = 355 }, + { .fc_id = 507, .cpu_id = 356 }, + { .fc_id = 508, .cpu_id = 357 }, + { .fc_id = 509, .cpu_id = 358 }, + { .fc_id = 510, .cpu_id = 359 }, + { .fc_id = 511, .cpu_id = 360 }, + { .fc_id = 512, .cpu_id = 361 }, + { .fc_id = 513, .cpu_id = 362 }, + { .fc_id = 514, .cpu_id = 363 }, + { .fc_id = 515, .cpu_id = 364 }, + { .fc_id = 516, .cpu_id = 365 }, + { .fc_id = 517, .cpu_id = 366 }, + { .fc_id = 518, .cpu_id = 367 }, + { .fc_id = 519, .cpu_id = 368 }, + { .fc_id = 520, .cpu_id = 369 }, + { .fc_id = 521, .cpu_id = 370 }, + { .fc_id = 522, .cpu_id = 371 }, + { .fc_id = 523, .cpu_id = 372 }, + { .fc_id = 524, .cpu_id = 373 }, + { .fc_id = 525, .cpu_id = 374 }, + { .fc_id = 526, .cpu_id = 375 }, + { .fc_id = 527, .cpu_id = 376 }, + { .fc_id = 528, .cpu_id = 377 }, + { .fc_id = 529, .cpu_id = 378 }, + { .fc_id = 530, .cpu_id = 379 }, + { .fc_id = 531, .cpu_id = 380 }, + { .fc_id = 532, .cpu_id = 381 }, + { .fc_id = 533, .cpu_id = 382 }, + { .fc_id = 534, .cpu_id = 383 }, + { .fc_id = 535, .cpu_id = 384 }, + { .fc_id = 536, .cpu_id = 385 }, + { .fc_id = 537, .cpu_id = 386 }, + { .fc_id = 538, .cpu_id = 387 }, + { .fc_id = 539, .cpu_id = 388 }, + { .fc_id = 540, .cpu_id = 389 }, + { .fc_id = 541, .cpu_id = 390 }, + { .fc_id = 542, .cpu_id = 391 }, + { .fc_id = 543, .cpu_id = 392 }, + { .fc_id = 544, .cpu_id = 393 }, + { .fc_id = 545, .cpu_id = 394 }, + { .fc_id = 546, .cpu_id = 395 }, + { .fc_id = 547, .cpu_id = 396 }, + { .fc_id = 548, .cpu_id = 397 }, + { .fc_id = 549, .cpu_id = 398 }, + { .fc_id = 550, .cpu_id = 399 }, + { .fc_id = 551, .cpu_id = 400 }, + { .fc_id = 552, .cpu_id = 401 }, + { .fc_id = 553, .cpu_id = 402 }, + { .fc_id = 554, .cpu_id = 403 }, + { .fc_id = 555, .cpu_id = 404 }, + { .fc_id = 556, .cpu_id = 405 }, + { .fc_id = 557, .cpu_id = 406 }, + { .fc_id = 558, .cpu_id = 407 }, + { .fc_id = 559, .cpu_id = 408 }, + { .fc_id = 560, .cpu_id = 409 }, + { .fc_id = 561, .cpu_id = 410 }, + { .fc_id = 562, .cpu_id = 411 }, + { .fc_id = 563, .cpu_id = 412 }, + { .fc_id = 564, .cpu_id = 413 }, + { .fc_id = 565, .cpu_id = 414 }, + { .fc_id = 566, .cpu_id = 415 }, + { .fc_id = 567, .cpu_id = 416 }, + { .fc_id = 568, .cpu_id = 417 }, + { .fc_id = 569, .cpu_id = 418 }, + { .fc_id = 570, .cpu_id = 419 }, + { .fc_id = 571, .cpu_id = 420 }, + { .fc_id = 572, .cpu_id = 421 }, + { .fc_id = 573, .cpu_id = 422 }, + { .fc_id = 574, .cpu_id = 423 }, + { .fc_id = 575, .cpu_id = 424 }, + { .fc_id = 576, .cpu_id = 425 }, + { .fc_id = 577, .cpu_id = 426 }, + { .fc_id = 578, .cpu_id = 427 }, + { .fc_id = 579, .cpu_id = 428 }, + { .fc_id = 580, .cpu_id = 429 }, + { .fc_id = 581, .cpu_id = 430 }, + { .fc_id = 582, .cpu_id = 431 }, + { .fc_id = 583, .cpu_id = 432 }, + { .fc_id = 584, .cpu_id = 433 }, + { .fc_id = 585, .cpu_id = 434 }, + { .fc_id = 586, .cpu_id = 435 }, + { .fc_id = 587, .cpu_id = 436 }, + { .fc_id = 588, .cpu_id = 437 }, + { .fc_id = 589, .cpu_id = 438 }, + { .fc_id = 590, .cpu_id = 439 }, + { .fc_id = 591, .cpu_id = 440 }, + { .fc_id = 592, .cpu_id = 441 }, + { .fc_id = 593, .cpu_id = 442 }, + { .fc_id = 594, .cpu_id = 443 }, + { .fc_id = 595, .cpu_id = 444 }, + { .fc_id = 596, .cpu_id = 445 }, + { .fc_id = 597, .cpu_id = 446 }, + { .fc_id = 598, .cpu_id = 447 }, + { .fc_id = 599, .cpu_id = 448 }, + { .fc_id = 600, .cpu_id = 449 }, + { .fc_id = 601, .cpu_id = 450 }, + { .fc_id = 602, .cpu_id = 451 }, + { .fc_id = 603, .cpu_id = 452 }, + { .fc_id = 604, .cpu_id = 453 }, + { .fc_id = 605, .cpu_id = 454 }, + { .fc_id = 606, .cpu_id = 455 }, + { .fc_id = 607, .cpu_id = 456 }, + { .fc_id = 608, .cpu_id = 457 }, + { .fc_id = 609, .cpu_id = 458 }, + { .fc_id = 610, .cpu_id = 459 }, + { .fc_id = 611, .cpu_id = 460 }, + { .fc_id = 612, .cpu_id = 461 }, + { .fc_id = 613, .cpu_id = 462 }, + { .fc_id = 614, .cpu_id = 463 }, + { .fc_id = 615, .cpu_id = 464 }, + { .fc_id = 616, .cpu_id = 465 }, + { .fc_id = 617, .cpu_id = 466 }, + { .fc_id = 618, .cpu_id = 467 }, + { .fc_id = 619, .cpu_id = 468 }, + { .fc_id = 620, .cpu_id = 469 }, + { .fc_id = 621, .cpu_id = 470 }, + { .fc_id = 622, .cpu_id = 471 }, + { .fc_id = 623, .cpu_id = 472 }, + { .fc_id = 624, .cpu_id = 473 }, + { .fc_id = 625, .cpu_id = 474 }, + { .fc_id = 626, .cpu_id = 475 }, + { .fc_id = 627, .cpu_id = 476 }, + { .fc_id = 628, .cpu_id = 477 }, + { .fc_id = 629, .cpu_id = 478 }, + { .fc_id = 630, .cpu_id = 479 }, + { .fc_id = 631, .cpu_id = 480 }, + { .fc_id = 632, .cpu_id = 481 }, + { .fc_id = 633, .cpu_id = 482 }, + { .fc_id = 634, .cpu_id = 483 }, + { .fc_id = 635, .cpu_id = 484 }, + { .fc_id = 636, .cpu_id = 485 }, + { .fc_id = 637, .cpu_id = 486 }, + { .fc_id = 638, .cpu_id = 487 }, + { .fc_id = 639, .cpu_id = 488 }, + { .fc_id = 640, .cpu_id = 489 }, + { .fc_id = 641, .cpu_id = 490 }, + { .fc_id = 642, .cpu_id = 491 }, + { .fc_id = 643, .cpu_id = 492 }, + { .fc_id = 644, .cpu_id = 493 }, + { .fc_id = 645, .cpu_id = 494 }, + { .fc_id = 646, .cpu_id = 495 }, + { .fc_id = 647, .cpu_id = 496 }, + { .fc_id = 648, .cpu_id = 497 }, + { .fc_id = 649, .cpu_id = 498 }, + { .fc_id = 650, .cpu_id = 499 }, + { .fc_id = 651, .cpu_id = 500 }, + { .fc_id = 652, .cpu_id = 501 }, + { .fc_id = 653, .cpu_id = 502 }, + { .fc_id = 654, .cpu_id = 503 }, + { .fc_id = 655, .cpu_id = 504 }, + { .fc_id = 656, .cpu_id = 505 }, + { .fc_id = 657, .cpu_id = 506 }, + { .fc_id = 658, .cpu_id = 507 }, + { .fc_id = 659, .cpu_id = 508 }, + { .fc_id = 660, .cpu_id = 509 }, + { .fc_id = 661, .cpu_id = 510 }, + { .fc_id = 662, .cpu_id = 511 }, +}; + +#endif /* __GAUDI_ASYNC_IDS_MAP_EVENTS_H_ */