habanalabs.h 25.7 KB
Newer Older
O
Oded Gabbay 已提交
1 2 3 4 5 6 7 8 9 10
/* SPDX-License-Identifier: GPL-2.0
 *
 * Copyright 2016-2019 HabanaLabs, Ltd.
 * All Rights Reserved.
 *
 */

#ifndef HABANALABSP_H_
#define HABANALABSP_H_

11
#include "include/armcp_if.h"
O
Oded Gabbay 已提交
12
#include "include/qman_if.h"
13

O
Oded Gabbay 已提交
14 15 16
#define pr_fmt(fmt)			"habanalabs: " fmt

#include <linux/cdev.h>
17
#include <linux/iopoll.h>
18
#include <linux/irqreturn.h>
O
Oded Gabbay 已提交
19 20 21

#define HL_NAME				"habanalabs"

22 23
#define HL_MMAP_CB_MASK			(0x8000000000000000ull >> PAGE_SHIFT)

24 25
#define HL_DEVICE_TIMEOUT_USEC		1000000 /* 1 s */

26 27
#define HL_PLL_LOW_JOB_FREQ_USEC	5000000 /* 5 s */

O
Oded Gabbay 已提交
28 29
#define HL_MAX_QUEUES			128

O
Oded Gabbay 已提交
30
struct hl_device;
31
struct hl_fpriv;
O
Oded Gabbay 已提交
32

O
Oded Gabbay 已提交
33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58
/**
 * enum hl_queue_type - Supported QUEUE types.
 * @QUEUE_TYPE_NA: queue is not available.
 * @QUEUE_TYPE_EXT: external queue which is a DMA channel that may access the
 *                  host.
 * @QUEUE_TYPE_INT: internal queue that performs DMA inside the device's
 *			memories and/or operates the compute engines.
 * @QUEUE_TYPE_CPU: S/W queue for communication with the device's CPU.
 */
enum hl_queue_type {
	QUEUE_TYPE_NA,
	QUEUE_TYPE_EXT,
	QUEUE_TYPE_INT,
	QUEUE_TYPE_CPU
};

/**
 * struct hw_queue_properties - queue information.
 * @type: queue type.
 * @kmd_only: true if only KMD is allowed to send a job to this queue, false
 *            otherwise.
 */
struct hw_queue_properties {
	enum hl_queue_type	type;
	u8			kmd_only;
};
O
Oded Gabbay 已提交
59

O
Oded Gabbay 已提交
60 61
/**
 * struct asic_fixed_properties - ASIC specific immutable properties.
O
Oded Gabbay 已提交
62
 * @hw_queues_props: H/W queues properties.
63 64
 * @armcp_info: received various information from ArmCP regarding the H/W. e.g.
 *		available sensors.
65 66
 * @uboot_ver: F/W U-boot version.
 * @preboot_ver: F/W Preboot version.
O
Oded Gabbay 已提交
67 68 69 70 71 72 73 74 75 76
 * @sram_base_address: SRAM physical start address.
 * @sram_end_address: SRAM physical end address.
 * @sram_user_base_address - SRAM physical start address for user access.
 * @dram_base_address: DRAM physical start address.
 * @dram_end_address: DRAM physical end address.
 * @dram_user_base_address: DRAM physical start address for user access.
 * @dram_size: DRAM total size.
 * @dram_pci_bar_size: size of PCI bar towards DRAM.
 * @host_phys_base_address: base physical address of host memory for
 *				transactions that the device generates.
77
 * @max_power_default: max power of the device after reset
O
Oded Gabbay 已提交
78 79 80 81 82 83 84 85 86 87 88
 * @va_space_host_start_address: base address of virtual memory range for
 *                               mapping host memory.
 * @va_space_host_end_address: end address of virtual memory range for
 *                             mapping host memory.
 * @va_space_dram_start_address: base address of virtual memory range for
 *                               mapping DRAM memory.
 * @va_space_dram_end_address: end address of virtual memory range for
 *                             mapping DRAM memory.
 * @cfg_size: configuration space size on SRAM.
 * @sram_size: total size of SRAM.
 * @max_asid: maximum number of open contexts (ASIDs).
89
 * @num_of_events: number of possible internal H/W IRQs.
90 91 92 93
 * @psoc_pci_pll_nr: PCI PLL NR value.
 * @psoc_pci_pll_nf: PCI PLL NF value.
 * @psoc_pci_pll_od: PCI PLL OD value.
 * @psoc_pci_pll_div_factor: PCI PLL DIV FACTOR 1 value.
O
Oded Gabbay 已提交
94 95
 * @completion_queues_count: number of completion queues.
 * @high_pll: high PLL frequency used by the device.
96 97
 * @cb_pool_cb_cnt: number of CBs in the CB pool.
 * @cb_pool_cb_size: size of each CB in the CB pool.
O
Oded Gabbay 已提交
98 99 100
 * @tpc_enabled_mask: which TPCs are enabled.
 */
struct asic_fixed_properties {
O
Oded Gabbay 已提交
101
	struct hw_queue_properties	hw_queues_props[HL_MAX_QUEUES];
102
	struct armcp_info	armcp_info;
103 104
	char			uboot_ver[VERSION_MAX_LEN];
	char			preboot_ver[VERSION_MAX_LEN];
O
Oded Gabbay 已提交
105 106 107 108 109 110 111 112 113
	u64			sram_base_address;
	u64			sram_end_address;
	u64			sram_user_base_address;
	u64			dram_base_address;
	u64			dram_end_address;
	u64			dram_user_base_address;
	u64			dram_size;
	u64			dram_pci_bar_size;
	u64			host_phys_base_address;
114
	u64			max_power_default;
O
Oded Gabbay 已提交
115 116 117 118 119 120 121
	u64			va_space_host_start_address;
	u64			va_space_host_end_address;
	u64			va_space_dram_start_address;
	u64			va_space_dram_end_address;
	u32			cfg_size;
	u32			sram_size;
	u32			max_asid;
122
	u32			num_of_events;
123 124 125 126
	u32			psoc_pci_pll_nr;
	u32			psoc_pci_pll_nf;
	u32			psoc_pci_pll_od;
	u32			psoc_pci_pll_div_factor;
O
Oded Gabbay 已提交
127
	u32			high_pll;
128 129
	u32			cb_pool_cb_cnt;
	u32			cb_pool_cb_size;
O
Oded Gabbay 已提交
130 131 132 133 134
	u8			completion_queues_count;
	u8			tpc_enabled_mask;
};


135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181
/*
 * Command Buffers
 */

#define HL_MAX_CB_SIZE		0x200000	/* 2MB */

/**
 * struct hl_cb_mgr - describes a Command Buffer Manager.
 * @cb_lock: protects cb_handles.
 * @cb_handles: an idr to hold all command buffer handles.
 */
struct hl_cb_mgr {
	spinlock_t		cb_lock;
	struct idr		cb_handles; /* protected by cb_lock */
};

/**
 * struct hl_cb - describes a Command Buffer.
 * @refcount: reference counter for usage of the CB.
 * @hdev: pointer to device this CB belongs to.
 * @lock: spinlock to protect mmap/cs flows.
 * @pool_list: node in pool list of command buffers.
 * @kernel_address: Holds the CB's kernel virtual address.
 * @bus_address: Holds the CB's DMA address.
 * @mmap_size: Holds the CB's size that was mmaped.
 * @size: holds the CB's size.
 * @id: the CB's ID.
 * @ctx_id: holds the ID of the owner's context.
 * @mmap: true if the CB is currently mmaped to user.
 * @is_pool: true if CB was acquired from the pool, false otherwise.
 */
struct hl_cb {
	struct kref		refcount;
	struct hl_device	*hdev;
	spinlock_t		lock;
	struct list_head	pool_list;
	u64			kernel_address;
	dma_addr_t		bus_address;
	u32			mmap_size;
	u32			size;
	u32			id;
	u32			ctx_id;
	u8			mmap;
	u8			is_pool;
};


O
Oded Gabbay 已提交
182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208
/*
 * QUEUES
 */

struct hl_cs_job;

/*
 * Currently, there are two limitations on the maximum length of a queue:
 *
 * 1. The memory footprint of the queue. The current allocated space for the
 *    queue is PAGE_SIZE. Because each entry in the queue is HL_BD_SIZE,
 *    the maximum length of the queue can be PAGE_SIZE / HL_BD_SIZE,
 *    which currently is 4096/16 = 256 entries.
 *
 *    To increase that, we need either to decrease the size of the
 *    BD (difficult), or allocate more than a single page (easier).
 *
 * 2. Because the size of the JOB handle field in the BD CTL / completion queue
 *    is 10-bit, we can have up to 1024 open jobs per hardware queue.
 *    Therefore, each queue can hold up to 1024 entries.
 *
 * HL_QUEUE_LENGTH is in units of struct hl_bd.
 * HL_QUEUE_LENGTH * sizeof(struct hl_bd) should be <= HL_PAGE_SIZE
 */

#define HL_PAGE_SIZE			4096 /* minimum page size */
/* Must be power of 2 (HL_PAGE_SIZE / HL_BD_SIZE) */
O
Oded Gabbay 已提交
209
#define HL_QUEUE_LENGTH			256
O
Oded Gabbay 已提交
210 211 212 213 214 215 216 217 218
#define HL_QUEUE_SIZE_IN_BYTES		(HL_QUEUE_LENGTH * HL_BD_SIZE)

/*
 * HL_CQ_LENGTH is in units of struct hl_cq_entry.
 * HL_CQ_LENGTH should be <= HL_PAGE_SIZE
 */
#define HL_CQ_LENGTH			HL_QUEUE_LENGTH
#define HL_CQ_SIZE_IN_BYTES		(HL_CQ_LENGTH * HL_CQ_ENTRY_SIZE)

219 220 221
/* Must be power of 2 (HL_PAGE_SIZE / HL_EQ_ENTRY_SIZE) */
#define HL_EQ_LENGTH			64
#define HL_EQ_SIZE_IN_BYTES		(HL_EQ_LENGTH * HL_EQ_ENTRY_SIZE)
O
Oded Gabbay 已提交
222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267


/**
 * struct hl_hw_queue - describes a H/W transport queue.
 * @shadow_queue: pointer to a shadow queue that holds pointers to jobs.
 * @queue_type: type of queue.
 * @kernel_address: holds the queue's kernel virtual address.
 * @bus_address: holds the queue's DMA address.
 * @pi: holds the queue's pi value.
 * @ci: holds the queue's ci value, AS CALCULATED BY THE DRIVER (not real ci).
 * @hw_queue_id: the id of the H/W queue.
 * @int_queue_len: length of internal queue (number of entries).
 * @valid: is the queue valid (we have array of 32 queues, not all of them
 *		exists).
 */
struct hl_hw_queue {
	struct hl_cs_job	**shadow_queue;
	enum hl_queue_type	queue_type;
	u64			kernel_address;
	dma_addr_t		bus_address;
	u32			pi;
	u32			ci;
	u32			hw_queue_id;
	u16			int_queue_len;
	u8			valid;
};

/**
 * struct hl_cq - describes a completion queue
 * @hdev: pointer to the device structure
 * @kernel_address: holds the queue's kernel virtual address
 * @bus_address: holds the queue's DMA address
 * @hw_queue_id: the id of the matching H/W queue
 * @ci: ci inside the queue
 * @pi: pi inside the queue
 * @free_slots_cnt: counter of free slots in queue
 */
struct hl_cq {
	struct hl_device	*hdev;
	u64			kernel_address;
	dma_addr_t		bus_address;
	u32			hw_queue_id;
	u32			ci;
	u32			pi;
	atomic_t		free_slots_cnt;
};
268

269 270 271 272 273 274 275 276 277 278 279 280 281 282
/**
 * struct hl_eq - describes the event queue (single one per device)
 * @hdev: pointer to the device structure
 * @kernel_address: holds the queue's kernel virtual address
 * @bus_address: holds the queue's DMA address
 * @ci: ci inside the queue
 */
struct hl_eq {
	struct hl_device	*hdev;
	u64			kernel_address;
	dma_addr_t		bus_address;
	u32			ci;
};

283

O
Oded Gabbay 已提交
284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299
/*
 * ASICs
 */

/**
 * enum hl_asic_type - supported ASIC types.
 * @ASIC_AUTO_DETECT: ASIC type will be automatically set.
 * @ASIC_GOYA: Goya device.
 * @ASIC_INVALID: Invalid ASIC type.
 */
enum hl_asic_type {
	ASIC_AUTO_DETECT,
	ASIC_GOYA,
	ASIC_INVALID
};

300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323
/**
 * enum hl_pm_mng_profile - power management profile.
 * @PM_AUTO: internal clock is set by KMD.
 * @PM_MANUAL: internal clock is set by the user.
 * @PM_LAST: last power management type.
 */
enum hl_pm_mng_profile {
	PM_AUTO = 1,
	PM_MANUAL,
	PM_LAST
};

/**
 * enum hl_pll_frequency - PLL frequency.
 * @PLL_HIGH: high frequency.
 * @PLL_LOW: low frequency.
 * @PLL_LAST: last frequency values that were configured by the user.
 */
enum hl_pll_frequency {
	PLL_HIGH = 1,
	PLL_LOW,
	PLL_LAST
};

O
Oded Gabbay 已提交
324 325 326 327 328
/**
 * struct hl_asic_funcs - ASIC specific functions that are can be called from
 *                        common code.
 * @early_init: sets up early driver state (pre sw_init), doesn't configure H/W.
 * @early_fini: tears down what was done in early_init.
329 330
 * @late_init: sets up late driver/hw state (post hw_init) - Optional.
 * @late_fini: tears down what was done in late_init (pre hw_fini) - Optional.
O
Oded Gabbay 已提交
331 332
 * @sw_init: sets up driver state, does not configure H/W.
 * @sw_fini: tears down driver state, does not configure H/W.
333 334
 * @hw_init: sets up the H/W state.
 * @hw_fini: tears down the H/W state.
335 336 337
 * @halt_engines: halt engines, needed for reset sequence. This also disables
 *                interrupts from the device. Should be called before
 *                hw_fini and before CS rollback.
O
Oded Gabbay 已提交
338 339
 * @suspend: handles IP specific H/W or SW changes for suspend.
 * @resume: handles IP specific H/W or SW changes for resume.
340 341
 * @mmap: mmap function, does nothing.
 * @cb_mmap: maps a CB.
O
Oded Gabbay 已提交
342 343
 * @ring_doorbell: increment PI on a given QMAN.
 * @flush_pq_write: flush PQ entry write if necessary, WARN if flushing failed.
O
Oded Gabbay 已提交
344 345 346 347 348 349 350 351
 * @dma_alloc_coherent: Allocate coherent DMA memory by calling
 *                      dma_alloc_coherent(). This is ASIC function because its
 *                      implementation is not trivial when the driver is loaded
 *                      in simulation mode (not upstreamed).
 * @dma_free_coherent: Free coherent DMA memory by calling dma_free_coherent().
 *                     This is ASIC function because its implementation is not
 *                     trivial when the driver is loaded in simulation mode
 *                     (not upstreamed).
O
Oded Gabbay 已提交
352 353 354 355 356 357 358
 * @get_int_queue_base: get the internal queue base address.
 * @test_queues: run simple test on all queues for sanity check.
 * @dma_pool_zalloc: small DMA allocation of coherent memory from DMA pool.
 *                   size of allocation is HL_DMA_POOL_BLK_SIZE.
 * @dma_pool_free: free small DMA allocation from pool.
 * @cpu_accessible_dma_pool_alloc: allocate CPU PQ packet from DMA pool.
 * @cpu_accessible_dma_pool_free: free CPU PQ packet from DMA pool.
359
 * @update_eq_ci: update event queue CI.
360
 * @add_device_attr: add ASIC specific device attributes.
361
 * @handle_eqe: handle event queue entry (IRQ) from ArmCP.
362
 * @set_pll_profile: change PLL profile (manual/automatic).
363
 * @get_events_stat: retrieve event queue entries histogram.
364 365
 * @enable_clock_gating: enable clock gating for reducing power consumption.
 * @disable_clock_gating: disable clock for accessing registers on HBW.
O
Oded Gabbay 已提交
366 367
 * @hw_queues_lock: acquire H/W queues lock.
 * @hw_queues_unlock: release H/W queues lock.
368
 * @get_eeprom_data: retrieve EEPROM data from F/W.
O
Oded Gabbay 已提交
369
 * @send_cpu_message: send buffer to ArmCP.
O
Oded Gabbay 已提交
370 371 372 373
 */
struct hl_asic_funcs {
	int (*early_init)(struct hl_device *hdev);
	int (*early_fini)(struct hl_device *hdev);
374 375
	int (*late_init)(struct hl_device *hdev);
	void (*late_fini)(struct hl_device *hdev);
O
Oded Gabbay 已提交
376 377
	int (*sw_init)(struct hl_device *hdev);
	int (*sw_fini)(struct hl_device *hdev);
378 379
	int (*hw_init)(struct hl_device *hdev);
	void (*hw_fini)(struct hl_device *hdev, bool hard_reset);
380
	void (*halt_engines)(struct hl_device *hdev, bool hard_reset);
O
Oded Gabbay 已提交
381 382
	int (*suspend)(struct hl_device *hdev);
	int (*resume)(struct hl_device *hdev);
383 384 385
	int (*mmap)(struct hl_fpriv *hpriv, struct vm_area_struct *vma);
	int (*cb_mmap)(struct hl_device *hdev, struct vm_area_struct *vma,
			u64 kaddress, phys_addr_t paddress, u32 size);
O
Oded Gabbay 已提交
386 387
	void (*ring_doorbell)(struct hl_device *hdev, u32 hw_queue_id, u32 pi);
	void (*flush_pq_write)(struct hl_device *hdev, u64 *pq, u64 exp_val);
O
Oded Gabbay 已提交
388 389 390 391
	void* (*dma_alloc_coherent)(struct hl_device *hdev, size_t size,
					dma_addr_t *dma_handle, gfp_t flag);
	void (*dma_free_coherent)(struct hl_device *hdev, size_t size,
					void *cpu_addr, dma_addr_t dma_handle);
O
Oded Gabbay 已提交
392 393 394 395 396 397 398 399 400 401 402
	void* (*get_int_queue_base)(struct hl_device *hdev, u32 queue_id,
				dma_addr_t *dma_handle, u16 *queue_len);
	int (*test_queues)(struct hl_device *hdev);
	void* (*dma_pool_zalloc)(struct hl_device *hdev, size_t size,
				gfp_t mem_flags, dma_addr_t *dma_handle);
	void (*dma_pool_free)(struct hl_device *hdev, void *vaddr,
				dma_addr_t dma_addr);
	void* (*cpu_accessible_dma_pool_alloc)(struct hl_device *hdev,
				size_t size, dma_addr_t *dma_handle);
	void (*cpu_accessible_dma_pool_free)(struct hl_device *hdev,
				size_t size, void *vaddr);
403
	void (*update_eq_ci)(struct hl_device *hdev, u32 val);
404 405
	void (*add_device_attr)(struct hl_device *hdev,
				struct attribute_group *dev_attr_grp);
406 407
	void (*handle_eqe)(struct hl_device *hdev,
				struct hl_eq_entry *eq_entry);
408 409
	void (*set_pll_profile)(struct hl_device *hdev,
			enum hl_pll_frequency freq);
410
	void* (*get_events_stat)(struct hl_device *hdev, u32 *size);
411 412
	void (*enable_clock_gating)(struct hl_device *hdev);
	void (*disable_clock_gating)(struct hl_device *hdev);
O
Oded Gabbay 已提交
413 414
	void (*hw_queues_lock)(struct hl_device *hdev);
	void (*hw_queues_unlock)(struct hl_device *hdev);
415 416
	int (*get_eeprom_data)(struct hl_device *hdev, void *data,
				size_t max_size);
O
Oded Gabbay 已提交
417 418
	int (*send_cpu_message)(struct hl_device *hdev, u32 *msg,
				u16 len, u32 timeout, long *result);
O
Oded Gabbay 已提交
419
};
O
Oded Gabbay 已提交
420

421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453

/*
 * CONTEXTS
 */

#define HL_KERNEL_ASID_ID	0

/**
 * struct hl_ctx - user/kernel context.
 * @hpriv: pointer to the private (KMD) data of the process (fd).
 * @hdev: pointer to the device structure.
 * @refcount: reference counter for the context. Context is released only when
 *		this hits 0l. It is incremented on CS and CS_WAIT.
 * @asid: context's unique address space ID in the device's MMU.
 */
struct hl_ctx {
	struct hl_fpriv		*hpriv;
	struct hl_device	*hdev;
	struct kref		refcount;
	u32			asid;
};

/**
 * struct hl_ctx_mgr - for handling multiple contexts.
 * @ctx_lock: protects ctx_handles.
 * @ctx_handles: idr to hold all ctx handles.
 */
struct hl_ctx_mgr {
	struct mutex		ctx_lock;
	struct idr		ctx_handles;
};


O
Oded Gabbay 已提交
454 455 456 457 458 459 460 461 462
/**
 * struct hl_cs_job - command submission job.
 * @finish_work: workqueue object to run when job is completed.
 * @id: the id of this job inside a CS.
 */
struct hl_cs_job {
	struct work_struct	finish_work;
	u32			id;
};
463 464


O
Oded Gabbay 已提交
465 466 467 468 469 470 471 472 473
/*
 * FILE PRIVATE STRUCTURE
 */

/**
 * struct hl_fpriv - process information stored in FD private data.
 * @hdev: habanalabs device structure.
 * @filp: pointer to the given file structure.
 * @taskpid: current process ID.
474 475
 * @ctx: current executing context.
 * @ctx_mgr: context manager to handle multiple context for this FD.
476
 * @cb_mgr: command buffer manager to handle multiple buffers for this FD.
O
Oded Gabbay 已提交
477 478 479 480 481 482
 * @refcount: number of related contexts.
 */
struct hl_fpriv {
	struct hl_device	*hdev;
	struct file		*filp;
	struct pid		*taskpid;
483 484
	struct hl_ctx		*ctx; /* TODO: remove for multiple ctx */
	struct hl_ctx_mgr	ctx_mgr;
485
	struct hl_cb_mgr	cb_mgr;
O
Oded Gabbay 已提交
486 487 488 489 490 491 492 493 494 495 496 497 498
	struct kref		refcount;
};


/*
 * DEVICES
 */

/* Theoretical limit only. A single host can only contain up to 4 or 8 PCIe
 * x16 cards. In extereme cases, there are hosts that can accommodate 16 cards
 */
#define HL_MAX_MINORS	256

O
Oded Gabbay 已提交
499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529
/*
 * Registers read & write functions.
 */

u32 hl_rreg(struct hl_device *hdev, u32 reg);
void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);

#define hl_poll_timeout(hdev, addr, val, cond, sleep_us, timeout_us) \
	readl_poll_timeout(hdev->rmmio + addr, val, cond, sleep_us, timeout_us)

#define RREG32(reg) hl_rreg(hdev, (reg))
#define WREG32(reg, v) hl_wreg(hdev, (reg), (v))
#define DREG32(reg) pr_info("REGISTER: " #reg " : 0x%08X\n",	\
				hl_rreg(hdev, (reg)))

#define WREG32_P(reg, val, mask)				\
	do {							\
		u32 tmp_ = RREG32(reg);				\
		tmp_ &= (mask);					\
		tmp_ |= ((val) & ~(mask));			\
		WREG32(reg, tmp_);				\
	} while (0)
#define WREG32_AND(reg, and) WREG32_P(reg, 0, and)
#define WREG32_OR(reg, or) WREG32_P(reg, or, ~(or))

#define REG_FIELD_SHIFT(reg, field) reg##_##field##_SHIFT
#define REG_FIELD_MASK(reg, field) reg##_##field##_MASK
#define WREG32_FIELD(reg, field, val)	\
	WREG32(mm##reg, (RREG32(mm##reg) & ~REG_FIELD_MASK(reg, field)) | \
			(val) << REG_FIELD_SHIFT(reg, field))

530 531
struct hwmon_chip_info;

O
Oded Gabbay 已提交
532 533 534
/**
 * struct hl_device - habanalabs device structure.
 * @pdev: pointer to PCI device, can be NULL in case of simulator device.
O
Oded Gabbay 已提交
535 536
 * @pcie_bar: array of available PCIe bars.
 * @rmmio: configuration area address on SRAM.
O
Oded Gabbay 已提交
537 538
 * @cdev: related char device.
 * @dev: realted kernel basic device structure.
539
 * @work_freq: delayed work to lower device frequency if possible.
O
Oded Gabbay 已提交
540 541
 * @asic_name: ASIC specific nmae.
 * @asic_type: ASIC specific type.
O
Oded Gabbay 已提交
542 543 544
 * @completion_queue: array of hl_cq.
 * @cq_wq: work queue of completion queues for executing work in process context
 * @eq_wq: work queue of event queue for executing work in process context.
545
 * @kernel_ctx: KMD context structure.
O
Oded Gabbay 已提交
546
 * @kernel_queues: array of hl_hw_queue.
547
 * @kernel_cb_mgr: command buffer manager for creating/destroying/handling CGs.
548
 * @event_queue: event queue for IRQ from ArmCP.
O
Oded Gabbay 已提交
549 550 551 552
 * @dma_pool: DMA pool for small allocations.
 * @cpu_accessible_dma_mem: KMD <-> ArmCP shared memory CPU address.
 * @cpu_accessible_dma_address: KMD <-> ArmCP shared memory DMA address.
 * @cpu_accessible_dma_pool: KMD <-> ArmCP shared memory pool.
553 554 555 556 557 558 559 560
 * @asid_bitmap: holds used/available ASIDs.
 * @asid_mutex: protects asid_bitmap.
 * @fd_open_cnt_lock: lock for updating fd_open_cnt in hl_device_open. Although
 *                    fd_open_cnt is atomic, we need this lock to serialize
 *                    the open function because the driver currently supports
 *                    only a single process at a time. In addition, we need a
 *                    lock here so we can flush user processes which are opening
 *                    the device while we are trying to hard reset it
O
Oded Gabbay 已提交
561
 * @send_cpu_message_lock: enforces only one message in KMD <-> ArmCP queue.
O
Oded Gabbay 已提交
562 563 564
 * @asic_prop: ASIC specific immutable properties.
 * @asic_funcs: ASIC specific functions.
 * @asic_specific: ASIC specific information to use only from ASIC files.
565 566 567
 * @hwmon_dev: H/W monitor device.
 * @pm_mng_profile: current power management profile.
 * @hl_chip_info: ASIC's sensors information.
568 569
 * @cb_pool: list of preallocated CBs.
 * @cb_pool_lock: protects the CB pool.
570
 * @user_ctx: current user context executing.
571
 * @curr_pll_profile: current PLL profile.
572
 * @fd_open_cnt: number of open user processes.
573 574 575
 * @max_power: the max power of the device, as configured by the sysadmin. This
 *             value is saved so in case of hard-reset, KMD will restore this
 *             value and update the F/W after the re-initialization
O
Oded Gabbay 已提交
576
 * @major: habanalabs KMD major.
577
 * @high_pll: high PLL profile frequency.
O
Oded Gabbay 已提交
578 579
 * @id: device minor.
 * @disabled: is device disabled.
580 581
 * @late_init_done: is late init stage was done during initialization.
 * @hwmon_initialized: is H/W monitor sensors was initialized.
O
Oded Gabbay 已提交
582 583 584
 */
struct hl_device {
	struct pci_dev			*pdev;
O
Oded Gabbay 已提交
585 586
	void __iomem			*pcie_bar[6];
	void __iomem			*rmmio;
O
Oded Gabbay 已提交
587 588
	struct cdev			cdev;
	struct device			*dev;
589
	struct delayed_work		work_freq;
O
Oded Gabbay 已提交
590 591
	char				asic_name[16];
	enum hl_asic_type		asic_type;
O
Oded Gabbay 已提交
592 593
	struct hl_cq			*completion_queue;
	struct workqueue_struct		*cq_wq;
594
	struct workqueue_struct		*eq_wq;
595
	struct hl_ctx			*kernel_ctx;
O
Oded Gabbay 已提交
596
	struct hl_hw_queue		*kernel_queues;
597
	struct hl_cb_mgr		kernel_cb_mgr;
598
	struct hl_eq			event_queue;
O
Oded Gabbay 已提交
599 600 601 602
	struct dma_pool			*dma_pool;
	void				*cpu_accessible_dma_mem;
	dma_addr_t			cpu_accessible_dma_address;
	struct gen_pool			*cpu_accessible_dma_pool;
603 604 605 606
	unsigned long			*asid_bitmap;
	struct mutex			asid_mutex;
	/* TODO: remove fd_open_cnt_lock for multiple process support */
	struct mutex			fd_open_cnt_lock;
O
Oded Gabbay 已提交
607
	struct mutex			send_cpu_message_lock;
O
Oded Gabbay 已提交
608 609 610
	struct asic_fixed_properties	asic_prop;
	const struct hl_asic_funcs	*asic_funcs;
	void				*asic_specific;
611 612 613
	struct device			*hwmon_dev;
	enum hl_pm_mng_profile		pm_mng_profile;
	struct hwmon_chip_info		*hl_chip_info;
614 615 616 617

	struct list_head		cb_pool;
	spinlock_t			cb_pool_lock;

618 619
	/* TODO: remove user_ctx for multiple process support */
	struct hl_ctx			*user_ctx;
620 621

	atomic_t			curr_pll_profile;
622
	atomic_t			fd_open_cnt;
623
	u64				max_power;
O
Oded Gabbay 已提交
624
	u32				major;
625
	u32				high_pll;
O
Oded Gabbay 已提交
626 627
	u16				id;
	u8				disabled;
628 629
	u8				late_init_done;
	u8				hwmon_initialized;
O
Oded Gabbay 已提交
630 631

	/* Parameters for bring-up */
632
	u8				cpu_enable;
O
Oded Gabbay 已提交
633
	u8				reset_pcilink;
O
Oded Gabbay 已提交
634
	u8				cpu_queues_enable;
635 636
	u8				fw_loading;
	u8				pldm;
O
Oded Gabbay 已提交
637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676
};


/*
 * IOCTLs
 */

/**
 * typedef hl_ioctl_t - typedef for ioctl function in the driver
 * @hpriv: pointer to the FD's private data, which contains state of
 *		user process
 * @data: pointer to the input/output arguments structure of the IOCTL
 *
 * Return: 0 for success, negative value for error
 */
typedef int hl_ioctl_t(struct hl_fpriv *hpriv, void *data);

/**
 * struct hl_ioctl_desc - describes an IOCTL entry of the driver.
 * @cmd: the IOCTL code as created by the kernel macros.
 * @func: pointer to the driver's function that should be called for this IOCTL.
 */
struct hl_ioctl_desc {
	unsigned int cmd;
	hl_ioctl_t *func;
};


/*
 * Kernel module functions that can be accessed by entire module
 */

int hl_device_open(struct inode *inode, struct file *filp);
int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
		enum hl_asic_type asic_type, int minor);
void destroy_hdev(struct hl_device *hdev);
int hl_poll_timeout_memory(struct hl_device *hdev, u64 addr, u32 timeout_us,
				u32 *val);
int hl_poll_timeout_device_memory(struct hl_device *hdev, void __iomem *addr,
				u32 timeout_us, u32 *val);
O
Oded Gabbay 已提交
677 678 679 680 681 682 683 684 685 686 687 688
int hl_hw_queues_create(struct hl_device *hdev);
void hl_hw_queues_destroy(struct hl_device *hdev);
int hl_hw_queue_send_cb_no_cmpl(struct hl_device *hdev, u32 hw_queue_id,
				u32 cb_size, u64 cb_ptr);
u32 hl_hw_queue_add_ptr(u32 ptr, u16 val);
void hl_hw_queue_inc_ci_kernel(struct hl_device *hdev, u32 hw_queue_id);

#define hl_queue_inc_ptr(p)		hl_hw_queue_add_ptr(p, 1)
#define hl_pi_2_offset(pi)		((pi) & (HL_QUEUE_LENGTH - 1))

int hl_cq_init(struct hl_device *hdev, struct hl_cq *q, u32 hw_queue_id);
void hl_cq_fini(struct hl_device *hdev, struct hl_cq *q);
689 690 691 692
int hl_eq_init(struct hl_device *hdev, struct hl_eq *q);
void hl_eq_fini(struct hl_device *hdev, struct hl_eq *q);
irqreturn_t hl_irq_handler_cq(int irq, void *arg);
irqreturn_t hl_irq_handler_eq(int irq, void *arg);
693 694 695 696 697 698 699 700 701 702 703
int hl_asid_init(struct hl_device *hdev);
void hl_asid_fini(struct hl_device *hdev);
unsigned long hl_asid_alloc(struct hl_device *hdev);
void hl_asid_free(struct hl_device *hdev, unsigned long asid);

int hl_ctx_create(struct hl_device *hdev, struct hl_fpriv *hpriv);
void hl_ctx_free(struct hl_device *hdev, struct hl_ctx *ctx);
int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx);
int hl_ctx_put(struct hl_ctx *ctx);
void hl_ctx_mgr_init(struct hl_ctx_mgr *mgr);
void hl_ctx_mgr_fini(struct hl_device *hdev, struct hl_ctx_mgr *mgr);
O
Oded Gabbay 已提交
704 705 706 707
int hl_device_init(struct hl_device *hdev, struct class *hclass);
void hl_device_fini(struct hl_device *hdev);
int hl_device_suspend(struct hl_device *hdev);
int hl_device_resume(struct hl_device *hdev);
708 709
void hl_hpriv_get(struct hl_fpriv *hpriv);
void hl_hpriv_put(struct hl_fpriv *hpriv);
710 711 712 713 714 715 716 717 718
int hl_device_set_frequency(struct hl_device *hdev, enum hl_pll_frequency freq);
int hl_build_hwmon_channel_info(struct hl_device *hdev,
		struct armcp_sensor *sensors_arr);

int hl_sysfs_init(struct hl_device *hdev);
void hl_sysfs_fini(struct hl_device *hdev);

int hl_hwmon_init(struct hl_device *hdev);
void hl_hwmon_fini(struct hl_device *hdev);
O
Oded Gabbay 已提交
719

720 721 722 723 724 725 726 727 728 729 730 731 732
int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr, u32 cb_size,
		u64 *handle, int ctx_id);
int hl_cb_destroy(struct hl_device *hdev, struct hl_cb_mgr *mgr, u64 cb_handle);
int hl_cb_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma);
struct hl_cb *hl_cb_get(struct hl_device *hdev,	struct hl_cb_mgr *mgr,
			u32 handle);
void hl_cb_put(struct hl_cb *cb);
void hl_cb_mgr_init(struct hl_cb_mgr *mgr);
void hl_cb_mgr_fini(struct hl_device *hdev, struct hl_cb_mgr *mgr);
struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size);
int hl_cb_pool_init(struct hl_device *hdev);
int hl_cb_pool_fini(struct hl_device *hdev);

O
Oded Gabbay 已提交
733 734
void goya_set_asic_funcs(struct hl_device *hdev);

735 736 737 738 739 740 741 742 743 744 745 746
long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr);
void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq);
long hl_get_temperature(struct hl_device *hdev, int sensor_index, u32 attr);
long hl_get_voltage(struct hl_device *hdev, int sensor_index, u32 attr);
long hl_get_current(struct hl_device *hdev, int sensor_index, u32 attr);
long hl_get_fan_speed(struct hl_device *hdev, int sensor_index, u32 attr);
long hl_get_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr);
void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr,
			long value);
u64 hl_get_max_power(struct hl_device *hdev);
void hl_set_max_power(struct hl_device *hdev, u64 value);

747 748 749 750
/* IOCTLs */
long hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data);

O
Oded Gabbay 已提交
751
#endif /* HABANALABSP_H_ */