提交 cb2d47a4 编写于 作者: D Dave Airlie

Merge tag 'drm-amdkfd-next-2015-06-03' of git://people.freedesktop.org/~gabbayo/linux into drm-next

drm-amdkfd-next-2015-06-03:

- Add the H/W debugger support module, including new IOCTLs to:
  - register/unregister a process as a debugged process
  - Set address watch-point in the debugged process's GPU kernel
  - Do a wave control operation in the debugged process's waves
  See the commit messages for more details on the available operations.

  The debugged process can only perform debug operation on itself. It is
  blocked by the amdkfd+H/W from performing operations on other processes's
  waves or GPU kernels. The blocking is done by setting the VMID and PASID of
  the debugged process in the packets that are sent to the CP with the debug
  instructions.

- Add support for static user-mode queues. These queues are regular queues,
  but because they belong to the debugged process, we need to make sure the CP
  doesn't preempt them during a debug operation. Therefore, we mark them as
  static for the CP ignore them during preemption.

- Support killing all the waves when a process is terminated. This is needed
  in case a process is terminated but we can't UNMAP its queues (can occur due
  to several reasons). In that case, the CP could be stuck unless we kill all
  its waves. This function is *very* important as it provides the kernel a high
  level of control over the GPU. The reason we didn't upstream this function
  so far, is because it is implemented using the H/W debugger module functions,
  so we had to wait until we can upstream the H/W debugger module.

- Replace declaration of bitmap from unsigned long to standard DECLARE_BITMAP

* tag 'drm-amdkfd-next-2015-06-03' of git://people.freedesktop.org/~gabbayo/linux:
  drm/amdkfd: Enforce kill all waves on process termination
  drm/radeon: Add ATC VMID<-->PASID functions to kfd->kgd
  drm/amdkfd: Implement address watch debugger IOCTL
  drm/amdkfd: Implement wave control debugger IOCTL
  drm/amdkfd: Implement (un)register debugger IOCTLs
  drm/amdkfd: Add address watch operation to debugger
  drm/amdkfd: Add wave control operation to debugger
  drm/amdkfd: Add skeleton H/W debugger module support
  drm/amdkfd: Add static user-mode queues support
  drm/amdkfd: add H/W debugger IOCTL set definitions
  drm/radeon: Add H/W debugger kfd->kgd functions
  drm/amdkfd: Use DECLARE_BITMAP
......@@ -12,6 +12,7 @@ amdkfd-y := kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o \
kfd_kernel_queue_vi.o kfd_packet_manager.o \
kfd_process_queue_manager.o kfd_device_queue_manager.o \
kfd_device_queue_manager_cik.o kfd_device_queue_manager_vi.o \
kfd_interrupt.o kfd_events.o cik_event_interrupt.o
kfd_interrupt.o kfd_events.o cik_event_interrupt.o \
kfd_dbgdev.o kfd_dbgmgr.o
obj-$(CONFIG_HSA_AMD) += amdkfd.o
......@@ -35,6 +35,7 @@
#include <asm/processor.h>
#include "kfd_priv.h"
#include "kfd_device_queue_manager.h"
#include "kfd_dbgmgr.h"
static long kfd_ioctl(struct file *, unsigned int, unsigned long);
static int kfd_open(struct inode *, struct file *);
......@@ -432,6 +433,301 @@ static int kfd_ioctl_set_memory_policy(struct file *filep,
return err;
}
static int kfd_ioctl_dbg_register(struct file *filep,
struct kfd_process *p, void *data)
{
struct kfd_ioctl_dbg_register_args *args = data;
struct kfd_dev *dev;
struct kfd_dbgmgr *dbgmgr_ptr;
struct kfd_process_device *pdd;
bool create_ok;
long status = 0;
dev = kfd_device_by_id(args->gpu_id);
if (dev == NULL)
return -EINVAL;
if (dev->device_info->asic_family == CHIP_CARRIZO) {
pr_debug("kfd_ioctl_dbg_register not supported on CZ\n");
return -EINVAL;
}
mutex_lock(kfd_get_dbgmgr_mutex());
mutex_lock(&p->mutex);
/*
* make sure that we have pdd, if this the first queue created for
* this process
*/
pdd = kfd_bind_process_to_device(dev, p);
if (IS_ERR(pdd)) {
mutex_unlock(&p->mutex);
mutex_unlock(kfd_get_dbgmgr_mutex());
return PTR_ERR(pdd);
}
if (dev->dbgmgr == NULL) {
/* In case of a legal call, we have no dbgmgr yet */
create_ok = kfd_dbgmgr_create(&dbgmgr_ptr, dev);
if (create_ok) {
status = kfd_dbgmgr_register(dbgmgr_ptr, p);
if (status != 0)
kfd_dbgmgr_destroy(dbgmgr_ptr);
else
dev->dbgmgr = dbgmgr_ptr;
}
} else {
pr_debug("debugger already registered\n");
status = -EINVAL;
}
mutex_unlock(&p->mutex);
mutex_unlock(kfd_get_dbgmgr_mutex());
return status;
}
static int kfd_ioctl_dbg_unrgesiter(struct file *filep,
struct kfd_process *p, void *data)
{
struct kfd_ioctl_dbg_unregister_args *args = data;
struct kfd_dev *dev;
long status;
dev = kfd_device_by_id(args->gpu_id);
if (dev == NULL)
return -EINVAL;
if (dev->device_info->asic_family == CHIP_CARRIZO) {
pr_debug("kfd_ioctl_dbg_unrgesiter not supported on CZ\n");
return -EINVAL;
}
mutex_lock(kfd_get_dbgmgr_mutex());
status = kfd_dbgmgr_unregister(dev->dbgmgr, p);
if (status == 0) {
kfd_dbgmgr_destroy(dev->dbgmgr);
dev->dbgmgr = NULL;
}
mutex_unlock(kfd_get_dbgmgr_mutex());
return status;
}
/*
* Parse and generate variable size data structure for address watch.
* Total size of the buffer and # watch points is limited in order
* to prevent kernel abuse. (no bearing to the much smaller HW limitation
* which is enforced by dbgdev module)
* please also note that the watch address itself are not "copied from user",
* since it be set into the HW in user mode values.
*
*/
static int kfd_ioctl_dbg_address_watch(struct file *filep,
struct kfd_process *p, void *data)
{
struct kfd_ioctl_dbg_address_watch_args *args = data;
struct kfd_dev *dev;
struct dbg_address_watch_info aw_info;
unsigned char *args_buff;
long status;
void __user *cmd_from_user;
uint64_t watch_mask_value = 0;
unsigned int args_idx = 0;
memset((void *) &aw_info, 0, sizeof(struct dbg_address_watch_info));
dev = kfd_device_by_id(args->gpu_id);
if (dev == NULL)
return -EINVAL;
if (dev->device_info->asic_family == CHIP_CARRIZO) {
pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n");
return -EINVAL;
}
cmd_from_user = (void __user *) args->content_ptr;
/* Validate arguments */
if ((args->buf_size_in_bytes > MAX_ALLOWED_AW_BUFF_SIZE) ||
(args->buf_size_in_bytes <= sizeof(*args)) ||
(cmd_from_user == NULL))
return -EINVAL;
/* this is the actual buffer to work with */
args_buff = kmalloc(args->buf_size_in_bytes -
sizeof(*args), GFP_KERNEL);
if (args_buff == NULL)
return -ENOMEM;
status = copy_from_user(args_buff, cmd_from_user,
args->buf_size_in_bytes - sizeof(*args));
if (status != 0) {
pr_debug("Failed to copy address watch user data\n");
kfree(args_buff);
return -EINVAL;
}
aw_info.process = p;
aw_info.num_watch_points = *((uint32_t *)(&args_buff[args_idx]));
args_idx += sizeof(aw_info.num_watch_points);
aw_info.watch_mode = (enum HSA_DBG_WATCH_MODE *) &args_buff[args_idx];
args_idx += sizeof(enum HSA_DBG_WATCH_MODE) * aw_info.num_watch_points;
/*
* set watch address base pointer to point on the array base
* within args_buff
*/
aw_info.watch_address = (uint64_t *) &args_buff[args_idx];
/* skip over the addresses buffer */
args_idx += sizeof(aw_info.watch_address) * aw_info.num_watch_points;
if (args_idx >= args->buf_size_in_bytes) {
kfree(args_buff);
return -EINVAL;
}
watch_mask_value = (uint64_t) args_buff[args_idx];
if (watch_mask_value > 0) {
/*
* There is an array of masks.
* set watch mask base pointer to point on the array base
* within args_buff
*/
aw_info.watch_mask = (uint64_t *) &args_buff[args_idx];
/* skip over the masks buffer */
args_idx += sizeof(aw_info.watch_mask) *
aw_info.num_watch_points;
} else {
/* just the NULL mask, set to NULL and skip over it */
aw_info.watch_mask = NULL;
args_idx += sizeof(aw_info.watch_mask);
}
if (args_idx > args->buf_size_in_bytes) {
kfree(args_buff);
return -EINVAL;
}
/* Currently HSA Event is not supported for DBG */
aw_info.watch_event = NULL;
mutex_lock(kfd_get_dbgmgr_mutex());
status = kfd_dbgmgr_address_watch(dev->dbgmgr, &aw_info);
mutex_unlock(kfd_get_dbgmgr_mutex());
kfree(args_buff);
return status;
}
/* Parse and generate fixed size data structure for wave control */
static int kfd_ioctl_dbg_wave_control(struct file *filep,
struct kfd_process *p, void *data)
{
struct kfd_ioctl_dbg_wave_control_args *args = data;
struct kfd_dev *dev;
struct dbg_wave_control_info wac_info;
unsigned char *args_buff;
uint32_t computed_buff_size;
long status;
void __user *cmd_from_user;
unsigned int args_idx = 0;
memset((void *) &wac_info, 0, sizeof(struct dbg_wave_control_info));
/* we use compact form, independent of the packing attribute value */
computed_buff_size = sizeof(*args) +
sizeof(wac_info.mode) +
sizeof(wac_info.operand) +
sizeof(wac_info.dbgWave_msg.DbgWaveMsg) +
sizeof(wac_info.dbgWave_msg.MemoryVA) +
sizeof(wac_info.trapId);
dev = kfd_device_by_id(args->gpu_id);
if (dev == NULL)
return -EINVAL;
if (dev->device_info->asic_family == CHIP_CARRIZO) {
pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n");
return -EINVAL;
}
/* input size must match the computed "compact" size */
if (args->buf_size_in_bytes != computed_buff_size) {
pr_debug("size mismatch, computed : actual %u : %u\n",
args->buf_size_in_bytes, computed_buff_size);
return -EINVAL;
}
cmd_from_user = (void __user *) args->content_ptr;
if (cmd_from_user == NULL)
return -EINVAL;
/* this is the actual buffer to work with */
args_buff = kmalloc(args->buf_size_in_bytes - sizeof(*args),
GFP_KERNEL);
if (args_buff == NULL)
return -ENOMEM;
/* Now copy the entire buffer from user */
status = copy_from_user(args_buff, cmd_from_user,
args->buf_size_in_bytes - sizeof(*args));
if (status != 0) {
pr_debug("Failed to copy wave control user data\n");
kfree(args_buff);
return -EINVAL;
}
/* move ptr to the start of the "pay-load" area */
wac_info.process = p;
wac_info.operand = *((enum HSA_DBG_WAVEOP *)(&args_buff[args_idx]));
args_idx += sizeof(wac_info.operand);
wac_info.mode = *((enum HSA_DBG_WAVEMODE *)(&args_buff[args_idx]));
args_idx += sizeof(wac_info.mode);
wac_info.trapId = *((uint32_t *)(&args_buff[args_idx]));
args_idx += sizeof(wac_info.trapId);
wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value =
*((uint32_t *)(&args_buff[args_idx]));
wac_info.dbgWave_msg.MemoryVA = NULL;
mutex_lock(kfd_get_dbgmgr_mutex());
pr_debug("Calling dbg manager process %p, operand %u, mode %u, trapId %u, message %u\n",
wac_info.process, wac_info.operand,
wac_info.mode, wac_info.trapId,
wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
status = kfd_dbgmgr_wave_control(dev->dbgmgr, &wac_info);
pr_debug("Returned status of dbg manager is %ld\n", status);
mutex_unlock(kfd_get_dbgmgr_mutex());
kfree(args_buff);
return status;
}
static int kfd_ioctl_get_clock_counters(struct file *filep,
struct kfd_process *p, void *data)
{
......@@ -612,6 +908,18 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
AMDKFD_IOCTL_DEF(AMDKFD_IOC_WAIT_EVENTS,
kfd_ioctl_wait_events, 0),
AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_REGISTER,
kfd_ioctl_dbg_register, 0),
AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_UNREGISTER,
kfd_ioctl_dbg_unrgesiter, 0),
AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_ADDRESS_WATCH,
kfd_ioctl_dbg_address_watch, 0),
AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_WAVE_CONTROL,
kfd_ioctl_dbg_wave_control, 0),
};
#define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls)
......
/*
* Copyright 2014 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/log2.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/mutex.h>
#include <linux/device.h>
#include "kfd_pm4_headers.h"
#include "kfd_pm4_headers_diq.h"
#include "kfd_kernel_queue.h"
#include "kfd_priv.h"
#include "kfd_pm4_opcodes.h"
#include "cik_regs.h"
#include "kfd_dbgmgr.h"
#include "kfd_dbgdev.h"
#include "kfd_device_queue_manager.h"
#include "../../radeon/cik_reg.h"
static void dbgdev_address_watch_disable_nodiq(struct kfd_dev *dev)
{
BUG_ON(!dev || !dev->kfd2kgd);
dev->kfd2kgd->address_watch_disable(dev->kgd);
}
static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev,
unsigned int pasid, uint64_t vmid0_address,
uint32_t *packet_buff, size_t size_in_bytes)
{
struct pm4__release_mem *rm_packet;
struct pm4__indirect_buffer_pasid *ib_packet;
struct kfd_mem_obj *mem_obj;
size_t pq_packets_size_in_bytes;
union ULARGE_INTEGER *largep;
union ULARGE_INTEGER addr;
struct kernel_queue *kq;
uint64_t *rm_state;
unsigned int *ib_packet_buff;
int status;
BUG_ON(!dbgdev || !dbgdev->kq || !packet_buff || !size_in_bytes);
kq = dbgdev->kq;
pq_packets_size_in_bytes = sizeof(struct pm4__release_mem) +
sizeof(struct pm4__indirect_buffer_pasid);
/*
* We acquire a buffer from DIQ
* The receive packet buff will be sitting on the Indirect Buffer
* and in the PQ we put the IB packet + sync packet(s).
*/
status = kq->ops.acquire_packet_buffer(kq,
pq_packets_size_in_bytes / sizeof(uint32_t),
&ib_packet_buff);
if (status != 0) {
pr_err("amdkfd: acquire_packet_buffer failed\n");
return status;
}
memset(ib_packet_buff, 0, pq_packets_size_in_bytes);
ib_packet = (struct pm4__indirect_buffer_pasid *) (ib_packet_buff);
ib_packet->header.count = 3;
ib_packet->header.opcode = IT_INDIRECT_BUFFER_PASID;
ib_packet->header.type = PM4_TYPE_3;
largep = (union ULARGE_INTEGER *) &vmid0_address;
ib_packet->bitfields2.ib_base_lo = largep->u.low_part >> 2;
ib_packet->bitfields3.ib_base_hi = largep->u.high_part;
ib_packet->control = (1 << 23) | (1 << 31) |
((size_in_bytes / sizeof(uint32_t)) & 0xfffff);
ib_packet->bitfields5.pasid = pasid;
/*
* for now we use release mem for GPU-CPU synchronization
* Consider WaitRegMem + WriteData as a better alternative
* we get a GART allocations ( gpu/cpu mapping),
* for the sync variable, and wait until:
* (a) Sync with HW
* (b) Sync var is written by CP to mem.
*/
rm_packet = (struct pm4__release_mem *) (ib_packet_buff +
(sizeof(struct pm4__indirect_buffer_pasid) /
sizeof(unsigned int)));
status = kfd_gtt_sa_allocate(dbgdev->dev, sizeof(uint64_t),
&mem_obj);
if (status != 0) {
pr_err("amdkfd: Failed to allocate GART memory\n");
kq->ops.rollback_packet(kq);
return status;
}
rm_state = (uint64_t *) mem_obj->cpu_ptr;
*rm_state = QUEUESTATE__ACTIVE_COMPLETION_PENDING;
rm_packet->header.opcode = IT_RELEASE_MEM;
rm_packet->header.type = PM4_TYPE_3;
rm_packet->header.count = sizeof(struct pm4__release_mem) /
sizeof(unsigned int) - 2;
rm_packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
rm_packet->bitfields2.event_index =
event_index___release_mem__end_of_pipe;
rm_packet->bitfields2.cache_policy = cache_policy___release_mem__lru;
rm_packet->bitfields2.atc = 0;
rm_packet->bitfields2.tc_wb_action_ena = 1;
addr.quad_part = mem_obj->gpu_addr;
rm_packet->bitfields4.address_lo_32b = addr.u.low_part >> 2;
rm_packet->address_hi = addr.u.high_part;
rm_packet->bitfields3.data_sel =
data_sel___release_mem__send_64_bit_data;
rm_packet->bitfields3.int_sel =
int_sel___release_mem__send_data_after_write_confirm;
rm_packet->bitfields3.dst_sel =
dst_sel___release_mem__memory_controller;
rm_packet->data_lo = QUEUESTATE__ACTIVE;
kq->ops.submit_packet(kq);
/* Wait till CP writes sync code: */
status = amdkfd_fence_wait_timeout(
(unsigned int *) rm_state,
QUEUESTATE__ACTIVE, 1500);
kfd_gtt_sa_free(dbgdev->dev, mem_obj);
return status;
}
static int dbgdev_register_nodiq(struct kfd_dbgdev *dbgdev)
{
BUG_ON(!dbgdev);
/*
* no action is needed in this case,
* just make sure diq will not be used
*/
dbgdev->kq = NULL;
return 0;
}
static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev)
{
struct queue_properties properties;
unsigned int qid;
struct kernel_queue *kq = NULL;
int status;
BUG_ON(!dbgdev || !dbgdev->pqm || !dbgdev->dev);
status = pqm_create_queue(dbgdev->pqm, dbgdev->dev, NULL,
&properties, 0, KFD_QUEUE_TYPE_DIQ,
&qid);
if (status) {
pr_err("amdkfd: Failed to create DIQ\n");
return status;
}
pr_debug("DIQ Created with queue id: %d\n", qid);
kq = pqm_get_kernel_queue(dbgdev->pqm, qid);
if (kq == NULL) {
pr_err("amdkfd: Error getting DIQ\n");
pqm_destroy_queue(dbgdev->pqm, qid);
return -EFAULT;
}
dbgdev->kq = kq;
return status;
}
static int dbgdev_unregister_nodiq(struct kfd_dbgdev *dbgdev)
{
BUG_ON(!dbgdev || !dbgdev->dev);
/* disable watch address */
dbgdev_address_watch_disable_nodiq(dbgdev->dev);
return 0;
}
static int dbgdev_unregister_diq(struct kfd_dbgdev *dbgdev)
{
/* todo - disable address watch */
int status;
BUG_ON(!dbgdev || !dbgdev->pqm || !dbgdev->kq);
status = pqm_destroy_queue(dbgdev->pqm,
dbgdev->kq->queue->properties.queue_id);
dbgdev->kq = NULL;
return status;
}
static void dbgdev_address_watch_set_registers(
const struct dbg_address_watch_info *adw_info,
union TCP_WATCH_ADDR_H_BITS *addrHi,
union TCP_WATCH_ADDR_L_BITS *addrLo,
union TCP_WATCH_CNTL_BITS *cntl,
unsigned int index, unsigned int vmid)
{
union ULARGE_INTEGER addr;
BUG_ON(!adw_info || !addrHi || !addrLo || !cntl);
addr.quad_part = 0;
addrHi->u32All = 0;
addrLo->u32All = 0;
cntl->u32All = 0;
if (adw_info->watch_mask != NULL)
cntl->bitfields.mask =
(uint32_t) (adw_info->watch_mask[index] &
ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK);
else
cntl->bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK;
addr.quad_part = (unsigned long long) adw_info->watch_address[index];
addrHi->bitfields.addr = addr.u.high_part &
ADDRESS_WATCH_REG_ADDHIGH_MASK;
addrLo->bitfields.addr =
(addr.u.low_part >> ADDRESS_WATCH_REG_ADDLOW_SHIFT);
cntl->bitfields.mode = adw_info->watch_mode[index];
cntl->bitfields.vmid = (uint32_t) vmid;
/* for now assume it is an ATC address */
cntl->u32All |= ADDRESS_WATCH_REG_CNTL_ATC_BIT;
pr_debug("\t\t%20s %08x\n", "set reg mask :", cntl->bitfields.mask);
pr_debug("\t\t%20s %08x\n", "set reg add high :",
addrHi->bitfields.addr);
pr_debug("\t\t%20s %08x\n", "set reg add low :",
addrLo->bitfields.addr);
}
static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev,
struct dbg_address_watch_info *adw_info)
{
union TCP_WATCH_ADDR_H_BITS addrHi;
union TCP_WATCH_ADDR_L_BITS addrLo;
union TCP_WATCH_CNTL_BITS cntl;
struct kfd_process_device *pdd;
unsigned int i;
BUG_ON(!dbgdev || !dbgdev->dev || !adw_info);
/* taking the vmid for that process on the safe way using pdd */
pdd = kfd_get_process_device_data(dbgdev->dev,
adw_info->process);
if (!pdd) {
pr_err("amdkfd: Failed to get pdd for wave control no DIQ\n");
return -EFAULT;
}
addrHi.u32All = 0;
addrLo.u32All = 0;
cntl.u32All = 0;
if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) ||
(adw_info->num_watch_points == 0)) {
pr_err("amdkfd: num_watch_points is invalid\n");
return -EINVAL;
}
if ((adw_info->watch_mode == NULL) ||
(adw_info->watch_address == NULL)) {
pr_err("amdkfd: adw_info fields are not valid\n");
return -EINVAL;
}
for (i = 0 ; i < adw_info->num_watch_points ; i++) {
dbgdev_address_watch_set_registers(adw_info, &addrHi, &addrLo,
&cntl, i, pdd->qpd.vmid);
pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
pr_debug("\t\t%20s %08x\n", "register index :", i);
pr_debug("\t\t%20s %08x\n", "vmid is :", pdd->qpd.vmid);
pr_debug("\t\t%20s %08x\n", "Address Low is :",
addrLo.bitfields.addr);
pr_debug("\t\t%20s %08x\n", "Address high is :",
addrHi.bitfields.addr);
pr_debug("\t\t%20s %08x\n", "Address high is :",
addrHi.bitfields.addr);
pr_debug("\t\t%20s %08x\n", "Control Mask is :",
cntl.bitfields.mask);
pr_debug("\t\t%20s %08x\n", "Control Mode is :",
cntl.bitfields.mode);
pr_debug("\t\t%20s %08x\n", "Control Vmid is :",
cntl.bitfields.vmid);
pr_debug("\t\t%20s %08x\n", "Control atc is :",
cntl.bitfields.atc);
pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
pdd->dev->kfd2kgd->address_watch_execute(
dbgdev->dev->kgd,
i,
cntl.u32All,
addrHi.u32All,
addrLo.u32All);
}
return 0;
}
static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev,
struct dbg_address_watch_info *adw_info)
{
struct pm4__set_config_reg *packets_vec;
union TCP_WATCH_ADDR_H_BITS addrHi;
union TCP_WATCH_ADDR_L_BITS addrLo;
union TCP_WATCH_CNTL_BITS cntl;
struct kfd_mem_obj *mem_obj;
unsigned int aw_reg_add_dword;
uint32_t *packet_buff_uint;
unsigned int i;
int status;
size_t ib_size = sizeof(struct pm4__set_config_reg) * 4;
/* we do not control the vmid in DIQ mode, just a place holder */
unsigned int vmid = 0;
BUG_ON(!dbgdev || !dbgdev->dev || !adw_info);
addrHi.u32All = 0;
addrLo.u32All = 0;
cntl.u32All = 0;
if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) ||
(adw_info->num_watch_points == 0)) {
pr_err("amdkfd: num_watch_points is invalid\n");
return -EINVAL;
}
if ((NULL == adw_info->watch_mode) ||
(NULL == adw_info->watch_address)) {
pr_err("amdkfd: adw_info fields are not valid\n");
return -EINVAL;
}
status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj);
if (status != 0) {
pr_err("amdkfd: Failed to allocate GART memory\n");
return status;
}
packet_buff_uint = mem_obj->cpu_ptr;
memset(packet_buff_uint, 0, ib_size);
packets_vec = (struct pm4__set_config_reg *) (packet_buff_uint);
packets_vec[0].header.count = 1;
packets_vec[0].header.opcode = IT_SET_CONFIG_REG;
packets_vec[0].header.type = PM4_TYPE_3;
packets_vec[0].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET;
packets_vec[0].bitfields2.insert_vmid = 1;
packets_vec[1].ordinal1 = packets_vec[0].ordinal1;
packets_vec[1].bitfields2.insert_vmid = 0;
packets_vec[2].ordinal1 = packets_vec[0].ordinal1;
packets_vec[2].bitfields2.insert_vmid = 0;
packets_vec[3].ordinal1 = packets_vec[0].ordinal1;
packets_vec[3].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET;
packets_vec[3].bitfields2.insert_vmid = 1;
for (i = 0; i < adw_info->num_watch_points; i++) {
dbgdev_address_watch_set_registers(adw_info,
&addrHi,
&addrLo,
&cntl,
i,
vmid);
pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
pr_debug("\t\t%20s %08x\n", "register index :", i);
pr_debug("\t\t%20s %08x\n", "vmid is :", vmid);
pr_debug("\t\t%20s %p\n", "Add ptr is :",
adw_info->watch_address);
pr_debug("\t\t%20s %08llx\n", "Add is :",
adw_info->watch_address[i]);
pr_debug("\t\t%20s %08x\n", "Address Low is :",
addrLo.bitfields.addr);
pr_debug("\t\t%20s %08x\n", "Address high is :",
addrHi.bitfields.addr);
pr_debug("\t\t%20s %08x\n", "Control Mask is :",
cntl.bitfields.mask);
pr_debug("\t\t%20s %08x\n", "Control Mode is :",
cntl.bitfields.mode);
pr_debug("\t\t%20s %08x\n", "Control Vmid is :",
cntl.bitfields.vmid);
pr_debug("\t\t%20s %08x\n", "Control atc is :",
cntl.bitfields.atc);
pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
aw_reg_add_dword =
dbgdev->dev->kfd2kgd->address_watch_get_offset(
dbgdev->dev->kgd,
i,
ADDRESS_WATCH_REG_CNTL);
aw_reg_add_dword /= sizeof(uint32_t);
packets_vec[0].bitfields2.reg_offset =
aw_reg_add_dword - CONFIG_REG_BASE;
packets_vec[0].reg_data[0] = cntl.u32All;
aw_reg_add_dword =
dbgdev->dev->kfd2kgd->address_watch_get_offset(
dbgdev->dev->kgd,
i,
ADDRESS_WATCH_REG_ADDR_HI);
aw_reg_add_dword /= sizeof(uint32_t);
packets_vec[1].bitfields2.reg_offset =
aw_reg_add_dword - CONFIG_REG_BASE;
packets_vec[1].reg_data[0] = addrHi.u32All;
aw_reg_add_dword =
dbgdev->dev->kfd2kgd->address_watch_get_offset(
dbgdev->dev->kgd,
i,
ADDRESS_WATCH_REG_ADDR_LO);
aw_reg_add_dword /= sizeof(uint32_t);
packets_vec[2].bitfields2.reg_offset =
aw_reg_add_dword - CONFIG_REG_BASE;
packets_vec[2].reg_data[0] = addrLo.u32All;
/* enable watch flag if address is not zero*/
if (adw_info->watch_address[i] > 0)
cntl.bitfields.valid = 1;
else
cntl.bitfields.valid = 0;
aw_reg_add_dword =
dbgdev->dev->kfd2kgd->address_watch_get_offset(
dbgdev->dev->kgd,
i,
ADDRESS_WATCH_REG_CNTL);
aw_reg_add_dword /= sizeof(uint32_t);
packets_vec[3].bitfields2.reg_offset =
aw_reg_add_dword - CONFIG_REG_BASE;
packets_vec[3].reg_data[0] = cntl.u32All;
status = dbgdev_diq_submit_ib(
dbgdev,
adw_info->process->pasid,
mem_obj->gpu_addr,
packet_buff_uint,
ib_size);
if (status != 0) {
pr_err("amdkfd: Failed to submit IB to DIQ\n");
break;
}
}
kfd_gtt_sa_free(dbgdev->dev, mem_obj);
return status;
}
static int dbgdev_wave_control_set_registers(
struct dbg_wave_control_info *wac_info,
union SQ_CMD_BITS *in_reg_sq_cmd,
union GRBM_GFX_INDEX_BITS *in_reg_gfx_index)
{
int status;
union SQ_CMD_BITS reg_sq_cmd;
union GRBM_GFX_INDEX_BITS reg_gfx_index;
struct HsaDbgWaveMsgAMDGen2 *pMsg;
BUG_ON(!wac_info || !in_reg_sq_cmd || !in_reg_gfx_index);
reg_sq_cmd.u32All = 0;
reg_gfx_index.u32All = 0;
pMsg = &wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2;
switch (wac_info->mode) {
/* Send command to single wave */
case HSA_DBG_WAVEMODE_SINGLE:
/*
* Limit access to the process waves only,
* by setting vmid check
*/
reg_sq_cmd.bits.check_vmid = 1;
reg_sq_cmd.bits.simd_id = pMsg->ui32.SIMD;
reg_sq_cmd.bits.wave_id = pMsg->ui32.WaveId;
reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_SINGLE;
reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray;
reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine;
reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU;
break;
/* Send command to all waves with matching VMID */
case HSA_DBG_WAVEMODE_BROADCAST_PROCESS:
reg_gfx_index.bits.sh_broadcast_writes = 1;
reg_gfx_index.bits.se_broadcast_writes = 1;
reg_gfx_index.bits.instance_broadcast_writes = 1;
reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
break;
/* Send command to all CU waves with matching VMID */
case HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU:
reg_sq_cmd.bits.check_vmid = 1;
reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray;
reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine;
reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU;
break;
default:
return -EINVAL;
}
switch (wac_info->operand) {
case HSA_DBG_WAVEOP_HALT:
reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_HALT;
break;
case HSA_DBG_WAVEOP_RESUME:
reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_RESUME;
break;
case HSA_DBG_WAVEOP_KILL:
reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL;
break;
case HSA_DBG_WAVEOP_DEBUG:
reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_DEBUG;
break;
case HSA_DBG_WAVEOP_TRAP:
if (wac_info->trapId < MAX_TRAPID) {
reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_TRAP;
reg_sq_cmd.bits.trap_id = wac_info->trapId;
} else {
status = -EINVAL;
}
break;
default:
status = -EINVAL;
break;
}
if (status == 0) {
*in_reg_sq_cmd = reg_sq_cmd;
*in_reg_gfx_index = reg_gfx_index;
}
return status;
}
static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev,
struct dbg_wave_control_info *wac_info)
{
int status;
union SQ_CMD_BITS reg_sq_cmd;
union GRBM_GFX_INDEX_BITS reg_gfx_index;
struct kfd_mem_obj *mem_obj;
uint32_t *packet_buff_uint;
struct pm4__set_config_reg *packets_vec;
size_t ib_size = sizeof(struct pm4__set_config_reg) * 3;
BUG_ON(!dbgdev || !wac_info);
reg_sq_cmd.u32All = 0;
status = dbgdev_wave_control_set_registers(wac_info, &reg_sq_cmd,
&reg_gfx_index);
if (status) {
pr_err("amdkfd: Failed to set wave control registers\n");
return status;
}
/* we do not control the VMID in DIQ,so reset it to a known value */
reg_sq_cmd.bits.vm_id = 0;
pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
pr_debug("\t\t mode is: %u\n", wac_info->mode);
pr_debug("\t\t operand is: %u\n", wac_info->operand);
pr_debug("\t\t trap id is: %u\n", wac_info->trapId);
pr_debug("\t\t msg value is: %u\n",
wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
pr_debug("\t\t vmid is: N/A\n");
pr_debug("\t\t chk_vmid is : %u\n", reg_sq_cmd.bitfields.check_vmid);
pr_debug("\t\t command is : %u\n", reg_sq_cmd.bitfields.cmd);
pr_debug("\t\t queue id is : %u\n", reg_sq_cmd.bitfields.queue_id);
pr_debug("\t\t simd id is : %u\n", reg_sq_cmd.bitfields.simd_id);
pr_debug("\t\t mode is : %u\n", reg_sq_cmd.bitfields.mode);
pr_debug("\t\t vm_id is : %u\n", reg_sq_cmd.bitfields.vm_id);
pr_debug("\t\t wave_id is : %u\n", reg_sq_cmd.bitfields.wave_id);
pr_debug("\t\t ibw is : %u\n",
reg_gfx_index.bitfields.instance_broadcast_writes);
pr_debug("\t\t ii is : %u\n",
reg_gfx_index.bitfields.instance_index);
pr_debug("\t\t sebw is : %u\n",
reg_gfx_index.bitfields.se_broadcast_writes);
pr_debug("\t\t se_ind is : %u\n", reg_gfx_index.bitfields.se_index);
pr_debug("\t\t sh_ind is : %u\n", reg_gfx_index.bitfields.sh_index);
pr_debug("\t\t sbw is : %u\n",
reg_gfx_index.bitfields.sh_broadcast_writes);
pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj);
if (status != 0) {
pr_err("amdkfd: Failed to allocate GART memory\n");
return status;
}
packet_buff_uint = mem_obj->cpu_ptr;
memset(packet_buff_uint, 0, ib_size);
packets_vec = (struct pm4__set_config_reg *) packet_buff_uint;
packets_vec[0].header.count = 1;
packets_vec[0].header.opcode = IT_SET_UCONFIG_REG;
packets_vec[0].header.type = PM4_TYPE_3;
packets_vec[0].bitfields2.reg_offset =
GRBM_GFX_INDEX / (sizeof(uint32_t)) -
USERCONFIG_REG_BASE;
packets_vec[0].bitfields2.insert_vmid = 0;
packets_vec[0].reg_data[0] = reg_gfx_index.u32All;
packets_vec[1].header.count = 1;
packets_vec[1].header.opcode = IT_SET_CONFIG_REG;
packets_vec[1].header.type = PM4_TYPE_3;
packets_vec[1].bitfields2.reg_offset = SQ_CMD / (sizeof(uint32_t)) -
CONFIG_REG_BASE;
packets_vec[1].bitfields2.vmid_shift = SQ_CMD_VMID_OFFSET;
packets_vec[1].bitfields2.insert_vmid = 1;
packets_vec[1].reg_data[0] = reg_sq_cmd.u32All;
/* Restore the GRBM_GFX_INDEX register */
reg_gfx_index.u32All = 0;
reg_gfx_index.bits.sh_broadcast_writes = 1;
reg_gfx_index.bits.instance_broadcast_writes = 1;
reg_gfx_index.bits.se_broadcast_writes = 1;
packets_vec[2].ordinal1 = packets_vec[0].ordinal1;
packets_vec[2].bitfields2.reg_offset =
GRBM_GFX_INDEX / (sizeof(uint32_t)) -
USERCONFIG_REG_BASE;
packets_vec[2].bitfields2.insert_vmid = 0;
packets_vec[2].reg_data[0] = reg_gfx_index.u32All;
status = dbgdev_diq_submit_ib(
dbgdev,
wac_info->process->pasid,
mem_obj->gpu_addr,
packet_buff_uint,
ib_size);
if (status != 0)
pr_err("amdkfd: Failed to submit IB to DIQ\n");
kfd_gtt_sa_free(dbgdev->dev, mem_obj);
return status;
}
static int dbgdev_wave_control_nodiq(struct kfd_dbgdev *dbgdev,
struct dbg_wave_control_info *wac_info)
{
int status;
union SQ_CMD_BITS reg_sq_cmd;
union GRBM_GFX_INDEX_BITS reg_gfx_index;
struct kfd_process_device *pdd;
BUG_ON(!dbgdev || !dbgdev->dev || !wac_info);
reg_sq_cmd.u32All = 0;
/* taking the VMID for that process on the safe way using PDD */
pdd = kfd_get_process_device_data(dbgdev->dev, wac_info->process);
if (!pdd) {
pr_err("amdkfd: Failed to get pdd for wave control no DIQ\n");
return -EFAULT;
}
status = dbgdev_wave_control_set_registers(wac_info, &reg_sq_cmd,
&reg_gfx_index);
if (status) {
pr_err("amdkfd: Failed to set wave control registers\n");
return status;
}
/* for non DIQ we need to patch the VMID: */
reg_sq_cmd.bits.vm_id = pdd->qpd.vmid;
pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
pr_debug("\t\t mode is: %u\n", wac_info->mode);
pr_debug("\t\t operand is: %u\n", wac_info->operand);
pr_debug("\t\t trap id is: %u\n", wac_info->trapId);
pr_debug("\t\t msg value is: %u\n",
wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
pr_debug("\t\t vmid is: %u\n", pdd->qpd.vmid);
pr_debug("\t\t chk_vmid is : %u\n", reg_sq_cmd.bitfields.check_vmid);
pr_debug("\t\t command is : %u\n", reg_sq_cmd.bitfields.cmd);
pr_debug("\t\t queue id is : %u\n", reg_sq_cmd.bitfields.queue_id);
pr_debug("\t\t simd id is : %u\n", reg_sq_cmd.bitfields.simd_id);
pr_debug("\t\t mode is : %u\n", reg_sq_cmd.bitfields.mode);
pr_debug("\t\t vm_id is : %u\n", reg_sq_cmd.bitfields.vm_id);
pr_debug("\t\t wave_id is : %u\n", reg_sq_cmd.bitfields.wave_id);
pr_debug("\t\t ibw is : %u\n",
reg_gfx_index.bitfields.instance_broadcast_writes);
pr_debug("\t\t ii is : %u\n",
reg_gfx_index.bitfields.instance_index);
pr_debug("\t\t sebw is : %u\n",
reg_gfx_index.bitfields.se_broadcast_writes);
pr_debug("\t\t se_ind is : %u\n", reg_gfx_index.bitfields.se_index);
pr_debug("\t\t sh_ind is : %u\n", reg_gfx_index.bitfields.sh_index);
pr_debug("\t\t sbw is : %u\n",
reg_gfx_index.bitfields.sh_broadcast_writes);
pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
return dbgdev->dev->kfd2kgd->wave_control_execute(dbgdev->dev->kgd,
reg_gfx_index.u32All,
reg_sq_cmd.u32All);
}
int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p)
{
int status = 0;
unsigned int vmid;
union SQ_CMD_BITS reg_sq_cmd;
union GRBM_GFX_INDEX_BITS reg_gfx_index;
struct kfd_process_device *pdd;
struct dbg_wave_control_info wac_info;
int temp;
int first_vmid_to_scan = 8;
int last_vmid_to_scan = 15;
first_vmid_to_scan = ffs(dev->shared_resources.compute_vmid_bitmap) - 1;
temp = dev->shared_resources.compute_vmid_bitmap >> first_vmid_to_scan;
last_vmid_to_scan = first_vmid_to_scan + ffz(temp);
reg_sq_cmd.u32All = 0;
status = 0;
wac_info.mode = HSA_DBG_WAVEMODE_BROADCAST_PROCESS;
wac_info.operand = HSA_DBG_WAVEOP_KILL;
pr_debug("Killing all process wavefronts\n");
/* Scan all registers in the range ATC_VMID8_PASID_MAPPING ..
* ATC_VMID15_PASID_MAPPING
* to check which VMID the current process is mapped to. */
for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) {
if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_valid
(dev->kgd, vmid)) {
if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_valid
(dev->kgd, vmid) == p->pasid) {
pr_debug("Killing wave fronts of vmid %d and pasid %d\n",
vmid, p->pasid);
break;
}
}
}
if (vmid > last_vmid_to_scan) {
pr_err("amdkfd: didn't found vmid for pasid (%d)\n", p->pasid);
return -EFAULT;
}
/* taking the VMID for that process on the safe way using PDD */
pdd = kfd_get_process_device_data(dev, p);
if (!pdd)
return -EFAULT;
status = dbgdev_wave_control_set_registers(&wac_info, &reg_sq_cmd,
&reg_gfx_index);
if (status != 0)
return -EINVAL;
/* for non DIQ we need to patch the VMID: */
reg_sq_cmd.bits.vm_id = vmid;
dev->kfd2kgd->wave_control_execute(dev->kgd,
reg_gfx_index.u32All,
reg_sq_cmd.u32All);
return 0;
}
void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev,
enum DBGDEV_TYPE type)
{
BUG_ON(!pdbgdev || !pdev);
pdbgdev->dev = pdev;
pdbgdev->kq = NULL;
pdbgdev->type = type;
pdbgdev->pqm = NULL;
switch (type) {
case DBGDEV_TYPE_NODIQ:
pdbgdev->dbgdev_register = dbgdev_register_nodiq;
pdbgdev->dbgdev_unregister = dbgdev_unregister_nodiq;
pdbgdev->dbgdev_wave_control = dbgdev_wave_control_nodiq;
pdbgdev->dbgdev_address_watch = dbgdev_address_watch_nodiq;
break;
case DBGDEV_TYPE_DIQ:
default:
pdbgdev->dbgdev_register = dbgdev_register_diq;
pdbgdev->dbgdev_unregister = dbgdev_unregister_diq;
pdbgdev->dbgdev_wave_control = dbgdev_wave_control_diq;
pdbgdev->dbgdev_address_watch = dbgdev_address_watch_diq;
break;
}
}
/*
* Copyright 2014 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef KFD_DBGDEV_H_
#define KFD_DBGDEV_H_
enum {
SQ_CMD_VMID_OFFSET = 28,
ADDRESS_WATCH_CNTL_OFFSET = 24
};
enum {
PRIV_QUEUE_SYNC_TIME_MS = 200
};
/* CONTEXT reg space definition */
enum {
CONTEXT_REG_BASE = 0xA000,
CONTEXT_REG_END = 0xA400,
CONTEXT_REG_SIZE = CONTEXT_REG_END - CONTEXT_REG_BASE
};
/* USER CONFIG reg space definition */
enum {
USERCONFIG_REG_BASE = 0xC000,
USERCONFIG_REG_END = 0x10000,
USERCONFIG_REG_SIZE = USERCONFIG_REG_END - USERCONFIG_REG_BASE
};
/* CONFIG reg space definition */
enum {
CONFIG_REG_BASE = 0x2000, /* in dwords */
CONFIG_REG_END = 0x2B00,
CONFIG_REG_SIZE = CONFIG_REG_END - CONFIG_REG_BASE
};
/* SH reg space definition */
enum {
SH_REG_BASE = 0x2C00,
SH_REG_END = 0x3000,
SH_REG_SIZE = SH_REG_END - SH_REG_BASE
};
enum SQ_IND_CMD_CMD {
SQ_IND_CMD_CMD_NULL = 0x00000000,
SQ_IND_CMD_CMD_HALT = 0x00000001,
SQ_IND_CMD_CMD_RESUME = 0x00000002,
SQ_IND_CMD_CMD_KILL = 0x00000003,
SQ_IND_CMD_CMD_DEBUG = 0x00000004,
SQ_IND_CMD_CMD_TRAP = 0x00000005,
};
enum SQ_IND_CMD_MODE {
SQ_IND_CMD_MODE_SINGLE = 0x00000000,
SQ_IND_CMD_MODE_BROADCAST = 0x00000001,
SQ_IND_CMD_MODE_BROADCAST_QUEUE = 0x00000002,
SQ_IND_CMD_MODE_BROADCAST_PIPE = 0x00000003,
SQ_IND_CMD_MODE_BROADCAST_ME = 0x00000004,
};
union SQ_IND_INDEX_BITS {
struct {
uint32_t wave_id:4;
uint32_t simd_id:2;
uint32_t thread_id:6;
uint32_t:1;
uint32_t force_read:1;
uint32_t read_timeout:1;
uint32_t unindexed:1;
uint32_t index:16;
} bitfields, bits;
uint32_t u32All;
signed int i32All;
float f32All;
};
union SQ_IND_CMD_BITS {
struct {
uint32_t data:32;
} bitfields, bits;
uint32_t u32All;
signed int i32All;
float f32All;
};
union SQ_CMD_BITS {
struct {
uint32_t cmd:3;
uint32_t:1;
uint32_t mode:3;
uint32_t check_vmid:1;
uint32_t trap_id:3;
uint32_t:5;
uint32_t wave_id:4;
uint32_t simd_id:2;
uint32_t:2;
uint32_t queue_id:3;
uint32_t:1;
uint32_t vm_id:4;
} bitfields, bits;
uint32_t u32All;
signed int i32All;
float f32All;
};
union SQ_IND_DATA_BITS {
struct {
uint32_t data:32;
} bitfields, bits;
uint32_t u32All;
signed int i32All;
float f32All;
};
union GRBM_GFX_INDEX_BITS {
struct {
uint32_t instance_index:8;
uint32_t sh_index:8;
uint32_t se_index:8;
uint32_t:5;
uint32_t sh_broadcast_writes:1;
uint32_t instance_broadcast_writes:1;
uint32_t se_broadcast_writes:1;
} bitfields, bits;
uint32_t u32All;
signed int i32All;
float f32All;
};
union TCP_WATCH_ADDR_H_BITS {
struct {
uint32_t addr:16;
uint32_t:16;
} bitfields, bits;
uint32_t u32All;
signed int i32All;
float f32All;
};
union TCP_WATCH_ADDR_L_BITS {
struct {
uint32_t:6;
uint32_t addr:26;
} bitfields, bits;
uint32_t u32All;
signed int i32All;
float f32All;
};
enum {
QUEUESTATE__INVALID = 0, /* so by default we'll get invalid state */
QUEUESTATE__ACTIVE_COMPLETION_PENDING,
QUEUESTATE__ACTIVE
};
union ULARGE_INTEGER {
struct {
uint32_t low_part;
uint32_t high_part;
} u;
unsigned long long quad_part;
};
#define KFD_CIK_VMID_START_OFFSET (8)
#define KFD_CIK_VMID_END_OFFSET (KFD_CIK_VMID_START_OFFSET + (8))
void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev,
enum DBGDEV_TYPE type);
#endif /* KFD_DBGDEV_H_ */
/*
* Copyright 2014 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/log2.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/device.h>
#include "kfd_priv.h"
#include "cik_regs.h"
#include "kfd_pm4_headers.h"
#include "kfd_pm4_headers_diq.h"
#include "kfd_dbgmgr.h"
#include "kfd_dbgdev.h"
static DEFINE_MUTEX(kfd_dbgmgr_mutex);
struct mutex *kfd_get_dbgmgr_mutex(void)
{
return &kfd_dbgmgr_mutex;
}
static void kfd_dbgmgr_uninitialize(struct kfd_dbgmgr *pmgr)
{
BUG_ON(!pmgr);
kfree(pmgr->dbgdev);
pmgr->dbgdev = NULL;
pmgr->pasid = 0;
pmgr->dev = NULL;
}
void kfd_dbgmgr_destroy(struct kfd_dbgmgr *pmgr)
{
if (pmgr != NULL) {
kfd_dbgmgr_uninitialize(pmgr);
kfree(pmgr);
}
}
bool kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev)
{
enum DBGDEV_TYPE type = DBGDEV_TYPE_DIQ;
struct kfd_dbgmgr *new_buff;
BUG_ON(pdev == NULL);
BUG_ON(!pdev->init_complete);
new_buff = kfd_alloc_struct(new_buff);
if (!new_buff) {
pr_err("amdkfd: Failed to allocate dbgmgr instance\n");
return false;
}
new_buff->pasid = 0;
new_buff->dev = pdev;
new_buff->dbgdev = kfd_alloc_struct(new_buff->dbgdev);
if (!new_buff->dbgdev) {
pr_err("amdkfd: Failed to allocate dbgdev instance\n");
kfree(new_buff);
return false;
}
/* get actual type of DBGDevice cpsch or not */
if (sched_policy == KFD_SCHED_POLICY_NO_HWS)
type = DBGDEV_TYPE_NODIQ;
kfd_dbgdev_init(new_buff->dbgdev, pdev, type);
*ppmgr = new_buff;
return true;
}
long kfd_dbgmgr_register(struct kfd_dbgmgr *pmgr, struct kfd_process *p)
{
BUG_ON(!p || !pmgr || !pmgr->dbgdev);
if (pmgr->pasid != 0) {
pr_debug("H/W debugger is already active using pasid %d\n",
pmgr->pasid);
return -EBUSY;
}
/* remember pasid */
pmgr->pasid = p->pasid;
/* provide the pqm for diq generation */
pmgr->dbgdev->pqm = &p->pqm;
/* activate the actual registering */
pmgr->dbgdev->dbgdev_register(pmgr->dbgdev);
return 0;
}
long kfd_dbgmgr_unregister(struct kfd_dbgmgr *pmgr, struct kfd_process *p)
{
BUG_ON(!p || !pmgr || !pmgr->dbgdev);
/* Is the requests coming from the already registered process? */
if (pmgr->pasid != p->pasid) {
pr_debug("H/W debugger is not registered by calling pasid %d\n",
p->pasid);
return -EINVAL;
}
pmgr->dbgdev->dbgdev_unregister(pmgr->dbgdev);
pmgr->pasid = 0;
return 0;
}
long kfd_dbgmgr_wave_control(struct kfd_dbgmgr *pmgr,
struct dbg_wave_control_info *wac_info)
{
BUG_ON(!pmgr || !pmgr->dbgdev || !wac_info);
/* Is the requests coming from the already registered process? */
if (pmgr->pasid != wac_info->process->pasid) {
pr_debug("H/W debugger support was not registered for requester pasid %d\n",
wac_info->process->pasid);
return -EINVAL;
}
return (long) pmgr->dbgdev->dbgdev_wave_control(pmgr->dbgdev, wac_info);
}
long kfd_dbgmgr_address_watch(struct kfd_dbgmgr *pmgr,
struct dbg_address_watch_info *adw_info)
{
BUG_ON(!pmgr || !pmgr->dbgdev || !adw_info);
/* Is the requests coming from the already registered process? */
if (pmgr->pasid != adw_info->process->pasid) {
pr_debug("H/W debugger support was not registered for requester pasid %d\n",
adw_info->process->pasid);
return -EINVAL;
}
return (long) pmgr->dbgdev->dbgdev_address_watch(pmgr->dbgdev,
adw_info);
}
/*
* Copyright 2014 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef KFD_DBGMGR_H_
#define KFD_DBGMGR_H_
#include "kfd_priv.h"
/* must align with hsakmttypes definition */
#pragma pack(push, 4)
enum HSA_DBG_WAVEOP {
HSA_DBG_WAVEOP_HALT = 1, /* Halts a wavefront */
HSA_DBG_WAVEOP_RESUME = 2, /* Resumes a wavefront */
HSA_DBG_WAVEOP_KILL = 3, /* Kills a wavefront */
HSA_DBG_WAVEOP_DEBUG = 4, /* Causes wavefront to enter
debug mode */
HSA_DBG_WAVEOP_TRAP = 5, /* Causes wavefront to take
a trap */
HSA_DBG_NUM_WAVEOP = 5,
HSA_DBG_MAX_WAVEOP = 0xFFFFFFFF
};
enum HSA_DBG_WAVEMODE {
/* send command to a single wave */
HSA_DBG_WAVEMODE_SINGLE = 0,
/*
* Broadcast to all wavefronts of all processes is not
* supported for HSA user mode
*/
/* send to waves within current process */
HSA_DBG_WAVEMODE_BROADCAST_PROCESS = 2,
/* send to waves within current process on CU */
HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU = 3,
HSA_DBG_NUM_WAVEMODE = 3,
HSA_DBG_MAX_WAVEMODE = 0xFFFFFFFF
};
enum HSA_DBG_WAVEMSG_TYPE {
HSA_DBG_WAVEMSG_AUTO = 0,
HSA_DBG_WAVEMSG_USER = 1,
HSA_DBG_WAVEMSG_ERROR = 2,
HSA_DBG_NUM_WAVEMSG,
HSA_DBG_MAX_WAVEMSG = 0xFFFFFFFF
};
enum HSA_DBG_WATCH_MODE {
HSA_DBG_WATCH_READ = 0, /* Read operations only */
HSA_DBG_WATCH_NONREAD = 1, /* Write or Atomic operations only */
HSA_DBG_WATCH_ATOMIC = 2, /* Atomic Operations only */
HSA_DBG_WATCH_ALL = 3, /* Read, Write or Atomic operations */
HSA_DBG_WATCH_NUM,
HSA_DBG_WATCH_SIZE = 0xFFFFFFFF
};
/* This structure is hardware specific and may change in the future */
struct HsaDbgWaveMsgAMDGen2 {
union {
struct ui32 {
uint32_t UserData:8; /* user data */
uint32_t ShaderArray:1; /* Shader array */
uint32_t Priv:1; /* Privileged */
uint32_t Reserved0:4; /* This field is reserved,
should be 0 */
uint32_t WaveId:4; /* wave id */
uint32_t SIMD:2; /* SIMD id */
uint32_t HSACU:4; /* Compute unit */
uint32_t ShaderEngine:2;/* Shader engine */
uint32_t MessageType:2; /* see HSA_DBG_WAVEMSG_TYPE */
uint32_t Reserved1:4; /* This field is reserved,
should be 0 */
} ui32;
uint32_t Value;
};
uint32_t Reserved2;
};
union HsaDbgWaveMessageAMD {
struct HsaDbgWaveMsgAMDGen2 WaveMsgInfoGen2;
/* for future HsaDbgWaveMsgAMDGen3; */
};
struct HsaDbgWaveMessage {
void *MemoryVA; /* ptr to associated host-accessible data */
union HsaDbgWaveMessageAMD DbgWaveMsg;
};
/*
* TODO: This definitions to be MOVED to kfd_event, once it is implemented.
*
* HSA sync primitive, Event and HW Exception notification API definitions.
* The API functions allow the runtime to define a so-called sync-primitive,
* a SW object combining a user-mode provided "syncvar" and a scheduler event
* that can be signaled through a defined GPU interrupt. A syncvar is
* a process virtual memory location of a certain size that can be accessed
* by CPU and GPU shader code within the process to set and query the content
* within that memory. The definition of the content is determined by the HSA
* runtime and potentially GPU shader code interfacing with the HSA runtime.
* The syncvar values may be commonly written through an PM4 WRITE_DATA packet
* in the user mode instruction stream. The OS scheduler event is typically
* associated and signaled by an interrupt issued by the GPU, but other HSA
* system interrupt conditions from other HW (e.g. IOMMUv2) may be surfaced
* by the KFD by this mechanism, too. */
/* these are the new definitions for events */
enum HSA_EVENTTYPE {
HSA_EVENTTYPE_SIGNAL = 0, /* user-mode generated GPU signal */
HSA_EVENTTYPE_NODECHANGE = 1, /* HSA node change (attach/detach) */
HSA_EVENTTYPE_DEVICESTATECHANGE = 2, /* HSA device state change
(start/stop) */
HSA_EVENTTYPE_HW_EXCEPTION = 3, /* GPU shader exception event */
HSA_EVENTTYPE_SYSTEM_EVENT = 4, /* GPU SYSCALL with parameter info */
HSA_EVENTTYPE_DEBUG_EVENT = 5, /* GPU signal for debugging */
HSA_EVENTTYPE_PROFILE_EVENT = 6,/* GPU signal for profiling */
HSA_EVENTTYPE_QUEUE_EVENT = 7, /* GPU signal queue idle state
(EOP pm4) */
/* ... */
HSA_EVENTTYPE_MAXID,
HSA_EVENTTYPE_TYPE_SIZE = 0xFFFFFFFF
};
/* Sub-definitions for various event types: Syncvar */
struct HsaSyncVar {
union SyncVar {
void *UserData; /* pointer to user mode data */
uint64_t UserDataPtrValue; /* 64bit compatibility of value */
} SyncVar;
uint64_t SyncVarSize;
};
/* Sub-definitions for various event types: NodeChange */
enum HSA_EVENTTYPE_NODECHANGE_FLAGS {
HSA_EVENTTYPE_NODECHANGE_ADD = 0,
HSA_EVENTTYPE_NODECHANGE_REMOVE = 1,
HSA_EVENTTYPE_NODECHANGE_SIZE = 0xFFFFFFFF
};
struct HsaNodeChange {
/* HSA node added/removed on the platform */
enum HSA_EVENTTYPE_NODECHANGE_FLAGS Flags;
};
/* Sub-definitions for various event types: DeviceStateChange */
enum HSA_EVENTTYPE_DEVICESTATECHANGE_FLAGS {
/* device started (and available) */
HSA_EVENTTYPE_DEVICESTATUSCHANGE_START = 0,
/* device stopped (i.e. unavailable) */
HSA_EVENTTYPE_DEVICESTATUSCHANGE_STOP = 1,
HSA_EVENTTYPE_DEVICESTATUSCHANGE_SIZE = 0xFFFFFFFF
};
enum HSA_DEVICE {
HSA_DEVICE_CPU = 0,
HSA_DEVICE_GPU = 1,
MAX_HSA_DEVICE = 2
};
struct HsaDeviceStateChange {
uint32_t NodeId; /* F-NUMA node that contains the device */
enum HSA_DEVICE Device; /* device type: GPU or CPU */
enum HSA_EVENTTYPE_DEVICESTATECHANGE_FLAGS Flags; /* event flags */
};
struct HsaEventData {
enum HSA_EVENTTYPE EventType; /* event type */
union EventData {
/*
* return data associated with HSA_EVENTTYPE_SIGNAL
* and other events
*/
struct HsaSyncVar SyncVar;
/* data associated with HSA_EVENTTYPE_NODE_CHANGE */
struct HsaNodeChange NodeChangeState;
/* data associated with HSA_EVENTTYPE_DEVICE_STATE_CHANGE */
struct HsaDeviceStateChange DeviceState;
} EventData;
/* the following data entries are internal to the KFD & thunk itself */
/* internal thunk store for Event data (OsEventHandle) */
uint64_t HWData1;
/* internal thunk store for Event data (HWAddress) */
uint64_t HWData2;
/* internal thunk store for Event data (HWData) */
uint32_t HWData3;
};
struct HsaEventDescriptor {
/* event type to allocate */
enum HSA_EVENTTYPE EventType;
/* H-NUMA node containing GPU device that is event source */
uint32_t NodeId;
/* pointer to user mode syncvar data, syncvar->UserDataPtrValue
* may be NULL
*/
struct HsaSyncVar SyncVar;
};
struct HsaEvent {
uint32_t EventId;
struct HsaEventData EventData;
};
#pragma pack(pop)
enum DBGDEV_TYPE {
DBGDEV_TYPE_ILLEGAL = 0,
DBGDEV_TYPE_NODIQ = 1,
DBGDEV_TYPE_DIQ = 2,
DBGDEV_TYPE_TEST = 3
};
struct dbg_address_watch_info {
struct kfd_process *process;
enum HSA_DBG_WATCH_MODE *watch_mode;
uint64_t *watch_address;
uint64_t *watch_mask;
struct HsaEvent *watch_event;
uint32_t num_watch_points;
};
struct dbg_wave_control_info {
struct kfd_process *process;
uint32_t trapId;
enum HSA_DBG_WAVEOP operand;
enum HSA_DBG_WAVEMODE mode;
struct HsaDbgWaveMessage dbgWave_msg;
};
struct kfd_dbgdev {
/* The device that owns this data. */
struct kfd_dev *dev;
/* kernel queue for DIQ */
struct kernel_queue *kq;
/* a pointer to the pqm of the calling process */
struct process_queue_manager *pqm;
/* type of debug device ( DIQ, non DIQ, etc. ) */
enum DBGDEV_TYPE type;
/* virtualized function pointers to device dbg */
int (*dbgdev_register)(struct kfd_dbgdev *dbgdev);
int (*dbgdev_unregister)(struct kfd_dbgdev *dbgdev);
int (*dbgdev_address_watch)(struct kfd_dbgdev *dbgdev,
struct dbg_address_watch_info *adw_info);
int (*dbgdev_wave_control)(struct kfd_dbgdev *dbgdev,
struct dbg_wave_control_info *wac_info);
};
struct kfd_dbgmgr {
unsigned int pasid;
struct kfd_dev *dev;
struct kfd_dbgdev *dbgdev;
};
/* prototypes for debug manager functions */
struct mutex *kfd_get_dbgmgr_mutex(void);
void kfd_dbgmgr_destroy(struct kfd_dbgmgr *pmgr);
bool kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev);
long kfd_dbgmgr_register(struct kfd_dbgmgr *pmgr, struct kfd_process *p);
long kfd_dbgmgr_unregister(struct kfd_dbgmgr *pmgr, struct kfd_process *p);
long kfd_dbgmgr_wave_control(struct kfd_dbgmgr *pmgr,
struct dbg_wave_control_info *wac_info);
long kfd_dbgmgr_address_watch(struct kfd_dbgmgr *pmgr,
struct dbg_address_watch_info *adw_info);
#endif /* KFD_DBGMGR_H_ */
......@@ -33,8 +33,11 @@
static const struct kfd_device_info kaveri_device_info = {
.asic_family = CHIP_KAVERI,
.max_pasid_bits = 16,
/* max num of queues for KV.TODO should be a dynamic value */
.max_no_of_hqd = 24,
.ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik,
.num_of_watch_points = 4,
.mqd_size_aligned = MQD_SIZE_ALIGNED
};
......@@ -294,6 +297,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
goto dqm_start_error;
}
kfd->dbgmgr = NULL;
kfd->init_complete = true;
dev_info(kfd_device, "added device (%x:%x)\n", kfd->pdev->vendor,
kfd->pdev->device);
......
......@@ -45,7 +45,8 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock);
static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock);
static int destroy_queues_cpsch(struct device_queue_manager *dqm,
bool preempt_static_queues, bool lock);
static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
struct queue *q,
......@@ -775,7 +776,7 @@ static int stop_cpsch(struct device_queue_manager *dqm)
BUG_ON(!dqm);
destroy_queues_cpsch(dqm, true);
destroy_queues_cpsch(dqm, true, true);
list_for_each_entry(node, &dqm->queues, list) {
pdd = qpd_to_pdd(node->qpd);
......@@ -829,7 +830,8 @@ static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
pr_debug("kfd: In %s\n", __func__);
mutex_lock(&dqm->lock);
destroy_queues_cpsch(dqm, false);
/* here we actually preempt the DIQ */
destroy_queues_cpsch(dqm, true, false);
list_del(&kq->list);
dqm->queue_count--;
qpd->is_debug = false;
......@@ -913,7 +915,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
return retval;
}
static int amdkfd_fence_wait_timeout(unsigned int *fence_addr,
int amdkfd_fence_wait_timeout(unsigned int *fence_addr,
unsigned int fence_value,
unsigned long timeout)
{
......@@ -935,13 +937,16 @@ static int destroy_sdma_queues(struct device_queue_manager *dqm,
unsigned int sdma_engine)
{
return pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA,
KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES, 0, false,
KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES, 0, false,
sdma_engine);
}
static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock)
static int destroy_queues_cpsch(struct device_queue_manager *dqm,
bool preempt_static_queues, bool lock)
{
int retval;
enum kfd_preempt_type_filter preempt_type;
struct kfd_process *p;
BUG_ON(!dqm);
......@@ -960,8 +965,12 @@ static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock)
destroy_sdma_queues(dqm, 1);
}
preempt_type = preempt_static_queues ?
KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES :
KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES;
retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE,
KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES, 0, false, 0);
preempt_type, 0, false, 0);
if (retval != 0)
goto out;
......@@ -969,8 +978,13 @@ static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock)
pm_send_query_status(&dqm->packets, dqm->fence_gpu_addr,
KFD_FENCE_COMPLETED);
/* should be timed out */
amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED,
retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED,
QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS);
if (retval != 0) {
p = kfd_get_process(current);
p->reset_wavefronts = true;
goto out;
}
pm_release_ib(&dqm->packets);
dqm->active_runlist = false;
......@@ -989,7 +1003,7 @@ static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock)
if (lock)
mutex_lock(&dqm->lock);
retval = destroy_queues_cpsch(dqm, false);
retval = destroy_queues_cpsch(dqm, false, false);
if (retval != 0) {
pr_err("kfd: the cp might be in an unrecoverable state due to an unsuccessful queues preemption");
goto out;
......@@ -1024,13 +1038,27 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
{
int retval;
struct mqd_manager *mqd;
bool preempt_all_queues;
BUG_ON(!dqm || !qpd || !q);
preempt_all_queues = false;
retval = 0;
/* remove queue from list to prevent rescheduling after preemption */
mutex_lock(&dqm->lock);
if (qpd->is_debug) {
/*
* error, currently we do not allow to destroy a queue
* of a currently debugged process
*/
retval = -EBUSY;
goto failed_try_destroy_debugged_queue;
}
mqd = dqm->ops.get_mqd_manager(dqm,
get_mqd_type_from_queue_type(q->properties.type));
if (!mqd) {
......@@ -1062,6 +1090,8 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
return 0;
failed:
failed_try_destroy_debugged_queue:
mutex_unlock(&dqm->lock);
return retval;
}
......
......@@ -88,9 +88,11 @@ struct device_queue_manager_ops {
struct queue *q,
struct qcm_process_device *qpd,
int *allocate_vmid);
int (*destroy_queue)(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
struct queue *q);
int (*update_queue)(struct device_queue_manager *dqm,
struct queue *q);
......@@ -100,8 +102,10 @@ struct device_queue_manager_ops {
int (*register_process)(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
int (*unregister_process)(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
int (*initialize)(struct device_queue_manager *dqm);
int (*start)(struct device_queue_manager *dqm);
int (*stop)(struct device_queue_manager *dqm);
......@@ -109,9 +113,11 @@ struct device_queue_manager_ops {
int (*create_kernel_queue)(struct device_queue_manager *dqm,
struct kernel_queue *kq,
struct qcm_process_device *qpd);
void (*destroy_kernel_queue)(struct device_queue_manager *dqm,
struct kernel_queue *kq,
struct qcm_process_device *qpd);
bool (*set_cache_memory_policy)(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
enum cache_policy default_policy,
......
......@@ -163,7 +163,7 @@ static int pm_create_map_process(struct packet_manager *pm, uint32_t *buffer,
num_queues = 0;
list_for_each_entry(cur, &qpd->queues_list, list)
num_queues++;
packet->bitfields10.num_queues = num_queues;
packet->bitfields10.num_queues = (qpd->is_debug) ? 0 : num_queues;
packet->sh_mem_config = qpd->sh_mem_config;
packet->sh_mem_bases = qpd->sh_mem_bases;
......@@ -177,9 +177,10 @@ static int pm_create_map_process(struct packet_manager *pm, uint32_t *buffer,
}
static int pm_create_map_queue(struct packet_manager *pm, uint32_t *buffer,
struct queue *q)
struct queue *q, bool is_static)
{
struct pm4_map_queues *packet;
bool use_static = is_static;
BUG_ON(!pm || !buffer || !q);
......@@ -209,6 +210,7 @@ static int pm_create_map_queue(struct packet_manager *pm, uint32_t *buffer,
case KFD_QUEUE_TYPE_SDMA:
packet->bitfields2.engine_sel =
engine_sel__mes_map_queues__sdma0;
use_static = false; /* no static queues under SDMA */
break;
default:
BUG();
......@@ -218,6 +220,9 @@ static int pm_create_map_queue(struct packet_manager *pm, uint32_t *buffer,
packet->mes_map_queues_ordinals[0].bitfields3.doorbell_offset =
q->properties.doorbell_off;
packet->mes_map_queues_ordinals[0].bitfields3.is_static =
(use_static == true) ? 1 : 0;
packet->mes_map_queues_ordinals[0].mqd_addr_lo =
lower_32_bits(q->gart_mqd_addr);
......@@ -271,9 +276,11 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
pm_release_ib(pm);
return -ENOMEM;
}
retval = pm_create_map_process(pm, &rl_buffer[rl_wptr], qpd);
if (retval != 0)
return retval;
proccesses_mapped++;
inc_wptr(&rl_wptr, sizeof(struct pm4_map_process),
alloc_size_bytes);
......@@ -281,23 +288,36 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
list_for_each_entry(kq, &qpd->priv_queue_list, list) {
if (kq->queue->properties.is_active != true)
continue;
pr_debug("kfd: static_queue, mapping kernel q %d, is debug status %d\n",
kq->queue->queue, qpd->is_debug);
retval = pm_create_map_queue(pm, &rl_buffer[rl_wptr],
kq->queue);
kq->queue, qpd->is_debug);
if (retval != 0)
return retval;
inc_wptr(&rl_wptr, sizeof(struct pm4_map_queues),
alloc_size_bytes);
inc_wptr(&rl_wptr,
sizeof(struct pm4_map_queues),
alloc_size_bytes);
}
list_for_each_entry(q, &qpd->queues_list, list) {
if (q->properties.is_active != true)
continue;
retval = pm_create_map_queue(pm,
&rl_buffer[rl_wptr], q);
pr_debug("kfd: static_queue, mapping user queue %d, is debug status %d\n",
q->queue, qpd->is_debug);
retval = pm_create_map_queue(pm, &rl_buffer[rl_wptr],
q, qpd->is_debug);
if (retval != 0)
return retval;
inc_wptr(&rl_wptr, sizeof(struct pm4_map_queues),
alloc_size_bytes);
inc_wptr(&rl_wptr,
sizeof(struct pm4_map_queues),
alloc_size_bytes);
}
}
......@@ -488,7 +508,8 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
packet = (struct pm4_unmap_queues *)buffer;
memset(buffer, 0, sizeof(struct pm4_unmap_queues));
pr_debug("kfd: static_queue: unmapping queues: mode is %d , reset is %d , type is %d\n",
mode, reset, type);
packet->header.u32all = build_pm4_header(IT_UNMAP_QUEUES,
sizeof(struct pm4_unmap_queues));
switch (type) {
......@@ -529,6 +550,11 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
packet->bitfields2.queue_sel =
queue_sel__mes_unmap_queues__perform_request_on_all_active_queues;
break;
case KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES:
/* in this case, we do not preempt static queues */
packet->bitfields2.queue_sel =
queue_sel__mes_unmap_queues__perform_request_on_dynamic_queues_only;
break;
default:
BUG();
break;
......
......@@ -237,7 +237,8 @@ struct pm4_map_queues {
struct {
union {
struct {
uint32_t reserved5:2;
uint32_t is_static:1;
uint32_t reserved5:1;
uint32_t doorbell_offset:21;
uint32_t reserved6:3;
uint32_t queue:6;
......@@ -328,7 +329,8 @@ enum unmap_queues_action_enum {
enum unmap_queues_queue_sel_enum {
queue_sel__mes_unmap_queues__perform_request_on_specified_queues = 0,
queue_sel__mes_unmap_queues__perform_request_on_pasid_queues = 1,
queue_sel__mes_unmap_queues__perform_request_on_all_active_queues = 2
queue_sel__mes_unmap_queues__perform_request_on_all_active_queues = 2,
queue_sel__mes_unmap_queues__perform_request_on_dynamic_queues_only = 3
};
enum unmap_queues_engine_sel_enum {
......
/*
* Copyright 2014 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef KFD_PM4_HEADERS_DIQ_H_
#define KFD_PM4_HEADERS_DIQ_H_
/*--------------------_INDIRECT_BUFFER-------------------- */
#ifndef _PM4__INDIRECT_BUFFER_DEFINED
#define _PM4__INDIRECT_BUFFER_DEFINED
enum _INDIRECT_BUFFER_cache_policy_enum {
cache_policy___indirect_buffer__lru = 0,
cache_policy___indirect_buffer__stream = 1,
cache_policy___indirect_buffer__bypass = 2
};
enum {
IT_INDIRECT_BUFFER_PASID = 0x5C
};
struct pm4__indirect_buffer_pasid {
union {
union PM4_MES_TYPE_3_HEADER header; /* header */
unsigned int ordinal1;
};
union {
struct {
unsigned int reserved1:2;
unsigned int ib_base_lo:30;
} bitfields2;
unsigned int ordinal2;
};
union {
struct {
unsigned int ib_base_hi:16;
unsigned int reserved2:16;
} bitfields3;
unsigned int ordinal3;
};
union {
unsigned int control;
unsigned int ordinal4;
};
union {
struct {
unsigned int pasid:10;
unsigned int reserved4:22;
} bitfields5;
unsigned int ordinal5;
};
};
#endif
/*--------------------_RELEASE_MEM-------------------- */
#ifndef _PM4__RELEASE_MEM_DEFINED
#define _PM4__RELEASE_MEM_DEFINED
enum _RELEASE_MEM_event_index_enum {
event_index___release_mem__end_of_pipe = 5,
event_index___release_mem__shader_done = 6
};
enum _RELEASE_MEM_cache_policy_enum {
cache_policy___release_mem__lru = 0,
cache_policy___release_mem__stream = 1,
cache_policy___release_mem__bypass = 2
};
enum _RELEASE_MEM_dst_sel_enum {
dst_sel___release_mem__memory_controller = 0,
dst_sel___release_mem__tc_l2 = 1,
dst_sel___release_mem__queue_write_pointer_register = 2,
dst_sel___release_mem__queue_write_pointer_poll_mask_bit = 3
};
enum _RELEASE_MEM_int_sel_enum {
int_sel___release_mem__none = 0,
int_sel___release_mem__send_interrupt_only = 1,
int_sel___release_mem__send_interrupt_after_write_confirm = 2,
int_sel___release_mem__send_data_after_write_confirm = 3
};
enum _RELEASE_MEM_data_sel_enum {
data_sel___release_mem__none = 0,
data_sel___release_mem__send_32_bit_low = 1,
data_sel___release_mem__send_64_bit_data = 2,
data_sel___release_mem__send_gpu_clock_counter = 3,
data_sel___release_mem__send_cp_perfcounter_hi_lo = 4,
data_sel___release_mem__store_gds_data_to_memory = 5
};
struct pm4__release_mem {
union {
union PM4_MES_TYPE_3_HEADER header; /*header */
unsigned int ordinal1;
};
union {
struct {
unsigned int event_type:6;
unsigned int reserved1:2;
enum _RELEASE_MEM_event_index_enum event_index:4;
unsigned int tcl1_vol_action_ena:1;
unsigned int tc_vol_action_ena:1;
unsigned int reserved2:1;
unsigned int tc_wb_action_ena:1;
unsigned int tcl1_action_ena:1;
unsigned int tc_action_ena:1;
unsigned int reserved3:6;
unsigned int atc:1;
enum _RELEASE_MEM_cache_policy_enum cache_policy:2;
unsigned int reserved4:5;
} bitfields2;
unsigned int ordinal2;
};
union {
struct {
unsigned int reserved5:16;
enum _RELEASE_MEM_dst_sel_enum dst_sel:2;
unsigned int reserved6:6;
enum _RELEASE_MEM_int_sel_enum int_sel:3;
unsigned int reserved7:2;
enum _RELEASE_MEM_data_sel_enum data_sel:3;
} bitfields3;
unsigned int ordinal3;
};
union {
struct {
unsigned int reserved8:2;
unsigned int address_lo_32b:30;
} bitfields4;
struct {
unsigned int reserved9:3;
unsigned int address_lo_64b:29;
} bitfields5;
unsigned int ordinal4;
};
unsigned int address_hi;
unsigned int data_lo;
unsigned int data_hi;
};
#endif
/*--------------------_SET_CONFIG_REG-------------------- */
#ifndef _PM4__SET_CONFIG_REG_DEFINED
#define _PM4__SET_CONFIG_REG_DEFINED
struct pm4__set_config_reg {
union {
union PM4_MES_TYPE_3_HEADER header; /*header */
unsigned int ordinal1;
};
union {
struct {
unsigned int reg_offset:16;
unsigned int reserved1:7;
unsigned int vmid_shift:5;
unsigned int insert_vmid:1;
unsigned int reserved2:3;
} bitfields2;
unsigned int ordinal2;
};
unsigned int reg_data[1]; /*1..N of these fields */
};
#endif
/*--------------------_WAIT_REG_MEM-------------------- */
#ifndef _PM4__WAIT_REG_MEM_DEFINED
#define _PM4__WAIT_REG_MEM_DEFINED
enum _WAIT_REG_MEM_function_enum {
function___wait_reg_mem__always_pass = 0,
function___wait_reg_mem__less_than_ref_value = 1,
function___wait_reg_mem__less_than_equal_to_the_ref_value = 2,
function___wait_reg_mem__equal_to_the_reference_value = 3,
function___wait_reg_mem__not_equal_reference_value = 4,
function___wait_reg_mem__greater_than_or_equal_reference_value = 5,
function___wait_reg_mem__greater_than_reference_value = 6,
function___wait_reg_mem__reserved = 7
};
enum _WAIT_REG_MEM_mem_space_enum {
mem_space___wait_reg_mem__register_space = 0,
mem_space___wait_reg_mem__memory_space = 1
};
enum _WAIT_REG_MEM_operation_enum {
operation___wait_reg_mem__wait_reg_mem = 0,
operation___wait_reg_mem__wr_wait_wr_reg = 1
};
struct pm4__wait_reg_mem {
union {
union PM4_MES_TYPE_3_HEADER header; /*header */
unsigned int ordinal1;
};
union {
struct {
enum _WAIT_REG_MEM_function_enum function:3;
unsigned int reserved1:1;
enum _WAIT_REG_MEM_mem_space_enum mem_space:2;
enum _WAIT_REG_MEM_operation_enum operation:2;
unsigned int reserved2:24;
} bitfields2;
unsigned int ordinal2;
};
union {
struct {
unsigned int reserved3:2;
unsigned int memory_poll_addr_lo:30;
} bitfields3;
struct {
unsigned int register_poll_addr:16;
unsigned int reserved4:16;
} bitfields4;
struct {
unsigned int register_write_addr:16;
unsigned int reserved5:16;
} bitfields5;
unsigned int ordinal3;
};
union {
struct {
unsigned int poll_address_hi:16;
unsigned int reserved6:16;
} bitfields6;
struct {
unsigned int register_write_addr:16;
unsigned int reserved7:16;
} bitfields7;
unsigned int ordinal4;
};
unsigned int reference;
unsigned int mask;
union {
struct {
unsigned int poll_interval:16;
unsigned int reserved8:16;
} bitfields8;
unsigned int ordinal7;
};
};
#endif
#endif /* KFD_PM4_HEADERS_DIQ_H_ */
......@@ -128,6 +128,7 @@ struct kfd_device_info {
unsigned int asic_family;
const struct kfd_event_interrupt_class *event_interrupt_class;
unsigned int max_pasid_bits;
unsigned int max_no_of_hqd;
size_t ih_ring_entry_size;
uint8_t num_of_watch_points;
uint16_t mqd_size_aligned;
......@@ -167,8 +168,8 @@ struct kfd_dev {
const struct kfd2kgd_calls *kfd2kgd;
struct mutex doorbell_mutex;
unsigned long doorbell_available_index[DIV_ROUND_UP(
KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, BITS_PER_LONG)];
DECLARE_BITMAP(doorbell_available_index,
KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
void *gtt_mem;
uint64_t gtt_start_gpu_addr;
......@@ -195,6 +196,9 @@ struct kfd_dev {
* from the HW ring into a SW ring.
*/
bool interrupts_active;
/* Debug manager */
struct kfd_dbgmgr *dbgmgr;
};
/* KGD2KFD callbacks */
......@@ -231,6 +235,7 @@ struct device *kfd_chardev(void);
enum kfd_preempt_type_filter {
KFD_PREEMPT_TYPE_FILTER_SINGLE_QUEUE,
KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES,
KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES,
KFD_PREEMPT_TYPE_FILTER_BY_PASID
};
......@@ -503,8 +508,6 @@ struct kfd_process {
/* Size is queue_array_size, up to MAX_PROCESS_QUEUES. */
struct kfd_queue **queues;
unsigned long allocated_queue_bitmap[DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, BITS_PER_LONG)];
/*Is the user space process 32 bit?*/
bool is_32bit_user_mode;
......@@ -516,6 +519,11 @@ struct kfd_process {
event_pages */
u32 next_nonsignal_event_id;
size_t signal_event_count;
/*
* This flag tells if we should reset all wavefronts on
* process termination
*/
bool reset_wavefronts;
};
/**
......@@ -650,6 +658,12 @@ int pqm_create_queue(struct process_queue_manager *pqm,
int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid);
int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid,
struct queue_properties *p);
struct kernel_queue *pqm_get_kernel_queue(struct process_queue_manager *pqm,
unsigned int qid);
int amdkfd_fence_wait_timeout(unsigned int *fence_addr,
unsigned int fence_value,
unsigned long timeout);
/* Packet Manager */
......@@ -717,4 +731,6 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p,
uint64_t *event_page_offset, uint32_t *event_slot_index);
int kfd_event_destroy(struct kfd_process *p, uint32_t event_id);
int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p);
#endif
......@@ -31,6 +31,7 @@
struct mm_struct;
#include "kfd_priv.h"
#include "kfd_dbgmgr.h"
/*
* Initial size for the array of queues.
......@@ -172,6 +173,9 @@ static void kfd_process_wq_release(struct work_struct *work)
pr_debug("Releasing pdd (topology id %d) for process (pasid %d) in workqueue\n",
pdd->dev->id, p->pasid);
if (p->reset_wavefronts)
dbgdev_wave_reset_wavefronts(pdd->dev, p);
amd_iommu_unbind_pasid(pdd->dev->pdev, p->pasid);
list_del(&pdd->per_device_list);
......@@ -301,6 +305,8 @@ static struct kfd_process *create_process(const struct task_struct *thread)
if (kfd_init_apertures(process) != 0)
goto err_init_apretures;
process->reset_wavefronts = false;
return process;
err_init_apretures:
......@@ -399,7 +405,12 @@ void kfd_unbind_process_from_device(struct kfd_dev *dev, unsigned int pasid)
mutex_lock(&p->mutex);
if ((dev->dbgmgr) && (dev->dbgmgr->pasid == p->pasid))
kfd_dbgmgr_destroy(dev->dbgmgr);
pqm_uninit(&p->pqm);
if (p->reset_wavefronts)
dbgdev_wave_reset_wavefronts(dev, p);
pdd = kfd_get_process_device_data(dev, p);
......
......@@ -158,6 +158,8 @@ int pqm_create_queue(struct process_queue_manager *pqm,
struct queue *q;
struct process_queue_node *pqn;
struct kernel_queue *kq;
int num_queues = 0;
struct queue *cur;
BUG_ON(!pqm || !dev || !properties || !qid);
......@@ -172,6 +174,20 @@ int pqm_create_queue(struct process_queue_manager *pqm,
return -1;
}
/*
* for debug process, verify that it is within the static queues limit
* currently limit is set to half of the total avail HQD slots
* If we are just about to create DIQ, the is_debug flag is not set yet
* Hence we also check the type as well
*/
if ((pdd->qpd.is_debug) ||
(type == KFD_QUEUE_TYPE_DIQ)) {
list_for_each_entry(cur, &pdd->qpd.queues_list, list)
num_queues++;
if (num_queues >= dev->device_info->max_no_of_hqd/2)
return (-ENOSPC);
}
retval = find_available_queue_slot(pqm, qid);
if (retval != 0)
return retval;
......@@ -341,7 +357,7 @@ int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid,
return 0;
}
static __attribute__((unused)) struct kernel_queue *pqm_get_kernel_queue(
struct kernel_queue *pqm_get_kernel_queue(
struct process_queue_manager *pqm,
unsigned int qid)
{
......
......@@ -163,6 +163,27 @@ struct kfd2kgd_calls {
int (*hqd_sdma_destroy)(struct kgd_dev *kgd, void *mqd,
unsigned int timeout);
int (*address_watch_disable)(struct kgd_dev *kgd);
int (*address_watch_execute)(struct kgd_dev *kgd,
unsigned int watch_point_id,
uint32_t cntl_val,
uint32_t addr_hi,
uint32_t addr_lo);
int (*wave_control_execute)(struct kgd_dev *kgd,
uint32_t gfx_index_val,
uint32_t sq_cmd);
uint32_t (*address_watch_get_offset)(struct kgd_dev *kgd,
unsigned int watch_point_id,
unsigned int reg_offset);
bool (*get_atc_vmid_pasid_mapping_valid)(
struct kgd_dev *kgd,
uint8_t vmid);
uint16_t (*get_atc_vmid_pasid_mapping_pasid)(
struct kgd_dev *kgd,
uint8_t vmid);
void (*write_vmid_invalidate_request)(struct kgd_dev *kgd,
uint8_t vmid);
uint16_t (*get_fw_version)(struct kgd_dev *kgd,
enum kgd_engine_type type);
};
......
......@@ -149,10 +149,30 @@
#define KFD_CIK_SDMA_QUEUE_OFFSET 0x200
#define SQ_IND_INDEX 0x8DE0
#define SQ_CMD 0x8DEC
#define SQ_IND_DATA 0x8DE4
/*
* The TCP_WATCHx_xxxx addresses that are shown here are in dwords,
* and that's why they are multiplied by 4
*/
#define TCP_WATCH0_ADDR_H (0x32A0*4)
#define TCP_WATCH1_ADDR_H (0x32A3*4)
#define TCP_WATCH2_ADDR_H (0x32A6*4)
#define TCP_WATCH3_ADDR_H (0x32A9*4)
#define TCP_WATCH0_ADDR_L (0x32A1*4)
#define TCP_WATCH1_ADDR_L (0x32A4*4)
#define TCP_WATCH2_ADDR_L (0x32A7*4)
#define TCP_WATCH3_ADDR_L (0x32AA*4)
#define TCP_WATCH0_CNTL (0x32A2*4)
#define TCP_WATCH1_CNTL (0x32A5*4)
#define TCP_WATCH2_CNTL (0x32A8*4)
#define TCP_WATCH3_CNTL (0x32AB*4)
#define CPC_INT_CNTL 0xC2D0
#define CP_HQD_IQ_RPTR 0xC970u
#define AQL_ENABLE (1U << 0)
#define SDMA0_RLC0_RB_CNTL 0xD400u
#define SDMA_RB_VMID(x) (x << 24)
#define SDMA0_RLC0_RB_BASE 0xD404u
......@@ -186,4 +206,38 @@
#define SDMA0_CNTL 0xD010
#define SDMA1_CNTL 0xD810
enum {
MAX_TRAPID = 8, /* 3 bits in the bitfield. */
MAX_WATCH_ADDRESSES = 4
};
enum {
ADDRESS_WATCH_REG_ADDR_HI = 0,
ADDRESS_WATCH_REG_ADDR_LO,
ADDRESS_WATCH_REG_CNTL,
ADDRESS_WATCH_REG_MAX
};
enum { /* not defined in the CI/KV reg file */
ADDRESS_WATCH_REG_CNTL_ATC_BIT = 0x10000000UL,
ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK = 0x00FFFFFF,
ADDRESS_WATCH_REG_ADDLOW_MASK_EXTENSION = 0x03000000,
/* extend the mask to 26 bits in order to match the low address field */
ADDRESS_WATCH_REG_ADDLOW_SHIFT = 6,
ADDRESS_WATCH_REG_ADDHIGH_MASK = 0xFFFF
};
union TCP_WATCH_CNTL_BITS {
struct {
uint32_t mask:24;
uint32_t vmid:4;
uint32_t atc:1;
uint32_t mode:2;
uint32_t valid:1;
} bitfields, bits;
uint32_t u32All;
signed int i32All;
float f32All;
};
#endif
......@@ -2148,9 +2148,12 @@
#define VCE_CMD_IB_AUTO 0x00000005
#define VCE_CMD_SEMAPHORE 0x00000006
#define ATC_VMID0_PASID_MAPPING 0x339Cu
#define ATC_VMID_PASID_MAPPING_UPDATE_STATUS 0x3398u
#define ATC_VMID_PASID_MAPPING_VALID (1U << 31)
#define ATC_VMID_PASID_MAPPING_UPDATE_STATUS 0x3398u
#define ATC_VMID0_PASID_MAPPING 0x339Cu
#define ATC_VMID_PASID_MAPPING_PASID_MASK (0xFFFF)
#define ATC_VMID_PASID_MAPPING_PASID_SHIFT 0
#define ATC_VMID_PASID_MAPPING_VALID_MASK (0x1 << 31)
#define ATC_VMID_PASID_MAPPING_VALID_SHIFT 31
#define ATC_VM_APERTURE0_CNTL 0x3310u
#define ATS_ACCESS_MODE_NEVER 0
......
......@@ -34,6 +34,13 @@
#define CIK_PIPE_PER_MEC (4)
static const uint32_t watchRegs[MAX_WATCH_ADDRESSES * ADDRESS_WATCH_REG_MAX] = {
TCP_WATCH0_ADDR_H, TCP_WATCH0_ADDR_L, TCP_WATCH0_CNTL,
TCP_WATCH1_ADDR_H, TCP_WATCH1_ADDR_L, TCP_WATCH1_CNTL,
TCP_WATCH2_ADDR_H, TCP_WATCH2_ADDR_L, TCP_WATCH2_CNTL,
TCP_WATCH3_ADDR_H, TCP_WATCH3_ADDR_L, TCP_WATCH3_CNTL
};
struct kgd_mem {
struct radeon_bo *bo;
uint64_t gpu_addr;
......@@ -79,6 +86,23 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type,
static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd);
static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
unsigned int timeout);
static int kgd_address_watch_disable(struct kgd_dev *kgd);
static int kgd_address_watch_execute(struct kgd_dev *kgd,
unsigned int watch_point_id,
uint32_t cntl_val,
uint32_t addr_hi,
uint32_t addr_lo);
static int kgd_wave_control_execute(struct kgd_dev *kgd,
uint32_t gfx_index_val,
uint32_t sq_cmd);
static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
unsigned int watch_point_id,
unsigned int reg_offset);
static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid);
static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
uint8_t vmid);
static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid);
static const struct kfd2kgd_calls kfd2kgd = {
.init_gtt_mem_allocation = alloc_gtt_mem,
......@@ -96,6 +120,13 @@ static const struct kfd2kgd_calls kfd2kgd = {
.hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
.hqd_destroy = kgd_hqd_destroy,
.hqd_sdma_destroy = kgd_hqd_sdma_destroy,
.address_watch_disable = kgd_address_watch_disable,
.address_watch_execute = kgd_address_watch_execute,
.wave_control_execute = kgd_wave_control_execute,
.address_watch_get_offset = kgd_address_watch_get_offset,
.get_atc_vmid_pasid_mapping_pasid = get_atc_vmid_pasid_mapping_pasid,
.get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid,
.write_vmid_invalidate_request = write_vmid_invalidate_request,
.get_fw_version = get_fw_version
};
......@@ -372,8 +403,8 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
* the SW cleared it.
* So the protocol is to always wait & clear.
*/
uint32_t pasid_mapping = (pasid == 0) ? 0 :
(uint32_t)pasid | ATC_VMID_PASID_MAPPING_VALID;
uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid |
ATC_VMID_PASID_MAPPING_VALID_MASK;
write_register(kgd, ATC_VMID0_PASID_MAPPING + vmid*sizeof(uint32_t),
pasid_mapping);
......@@ -665,6 +696,122 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
return 0;
}
static int kgd_address_watch_disable(struct kgd_dev *kgd)
{
union TCP_WATCH_CNTL_BITS cntl;
unsigned int i;
cntl.u32All = 0;
cntl.bitfields.valid = 0;
cntl.bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK;
cntl.bitfields.atc = 1;
/* Turning off this address until we set all the registers */
for (i = 0; i < MAX_WATCH_ADDRESSES; i++)
write_register(kgd,
watchRegs[i * ADDRESS_WATCH_REG_MAX +
ADDRESS_WATCH_REG_CNTL],
cntl.u32All);
return 0;
}
static int kgd_address_watch_execute(struct kgd_dev *kgd,
unsigned int watch_point_id,
uint32_t cntl_val,
uint32_t addr_hi,
uint32_t addr_lo)
{
union TCP_WATCH_CNTL_BITS cntl;
cntl.u32All = cntl_val;
/* Turning off this watch point until we set all the registers */
cntl.bitfields.valid = 0;
write_register(kgd,
watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
ADDRESS_WATCH_REG_CNTL],
cntl.u32All);
write_register(kgd,
watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
ADDRESS_WATCH_REG_ADDR_HI],
addr_hi);
write_register(kgd,
watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
ADDRESS_WATCH_REG_ADDR_LO],
addr_lo);
/* Enable the watch point */
cntl.bitfields.valid = 1;
write_register(kgd,
watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
ADDRESS_WATCH_REG_CNTL],
cntl.u32All);
return 0;
}
static int kgd_wave_control_execute(struct kgd_dev *kgd,
uint32_t gfx_index_val,
uint32_t sq_cmd)
{
struct radeon_device *rdev = get_radeon_device(kgd);
uint32_t data;
mutex_lock(&rdev->grbm_idx_mutex);
write_register(kgd, GRBM_GFX_INDEX, gfx_index_val);
write_register(kgd, SQ_CMD, sq_cmd);
/* Restore the GRBM_GFX_INDEX register */
data = INSTANCE_BROADCAST_WRITES | SH_BROADCAST_WRITES |
SE_BROADCAST_WRITES;
write_register(kgd, GRBM_GFX_INDEX, data);
mutex_unlock(&rdev->grbm_idx_mutex);
return 0;
}
static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
unsigned int watch_point_id,
unsigned int reg_offset)
{
return watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + reg_offset];
}
static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid)
{
uint32_t reg;
struct radeon_device *rdev = (struct radeon_device *) kgd;
reg = RREG32(ATC_VMID0_PASID_MAPPING + vmid*4);
return reg & ATC_VMID_PASID_MAPPING_VALID_MASK;
}
static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
uint8_t vmid)
{
uint32_t reg;
struct radeon_device *rdev = (struct radeon_device *) kgd;
reg = RREG32(ATC_VMID0_PASID_MAPPING + vmid*4);
return reg & ATC_VMID_PASID_MAPPING_PASID_MASK;
}
static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
{
struct radeon_device *rdev = (struct radeon_device *) kgd;
return WREG32(VM_INVALIDATE_REQUEST, 1 << vmid);
}
static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
{
struct radeon_device *rdev = (struct radeon_device *) kgd;
......
......@@ -128,6 +128,32 @@ struct kfd_ioctl_get_process_apertures_args {
uint32_t pad;
};
#define MAX_ALLOWED_NUM_POINTS 100
#define MAX_ALLOWED_AW_BUFF_SIZE 4096
#define MAX_ALLOWED_WAC_BUFF_SIZE 128
struct kfd_ioctl_dbg_register_args {
uint32_t gpu_id; /* to KFD */
uint32_t pad;
};
struct kfd_ioctl_dbg_unregister_args {
uint32_t gpu_id; /* to KFD */
uint32_t pad;
};
struct kfd_ioctl_dbg_address_watch_args {
uint64_t content_ptr; /* a pointer to the actual content */
uint32_t gpu_id; /* to KFD */
uint32_t buf_size_in_bytes; /*including gpu_id and buf_size */
};
struct kfd_ioctl_dbg_wave_control_args {
uint64_t content_ptr; /* a pointer to the actual content */
uint32_t gpu_id; /* to KFD */
uint32_t buf_size_in_bytes; /*including gpu_id and buf_size */
};
/* Matching HSA_EVENTTYPE */
#define KFD_IOC_EVENT_SIGNAL 0
#define KFD_IOC_EVENT_NODECHANGE 1
......@@ -198,7 +224,8 @@ struct kfd_event_data {
};
struct kfd_ioctl_wait_events_args {
uint64_t events_ptr; /* to KFD */
uint64_t events_ptr; /* pointed to struct
kfd_event_data array, to KFD */
uint32_t num_events; /* to KFD */
uint32_t wait_for_all; /* to KFD */
uint32_t timeout; /* to KFD */
......@@ -247,7 +274,19 @@ struct kfd_ioctl_wait_events_args {
#define AMDKFD_IOC_WAIT_EVENTS \
AMDKFD_IOWR(0x0C, struct kfd_ioctl_wait_events_args)
#define AMDKFD_IOC_DBG_REGISTER \
AMDKFD_IOW(0x0D, struct kfd_ioctl_dbg_register_args)
#define AMDKFD_IOC_DBG_UNREGISTER \
AMDKFD_IOW(0x0E, struct kfd_ioctl_dbg_unregister_args)
#define AMDKFD_IOC_DBG_ADDRESS_WATCH \
AMDKFD_IOW(0x0F, struct kfd_ioctl_dbg_address_watch_args)
#define AMDKFD_IOC_DBG_WAVE_CONTROL \
AMDKFD_IOW(0x10, struct kfd_ioctl_dbg_wave_control_args)
#define AMDKFD_COMMAND_START 0x01
#define AMDKFD_COMMAND_END 0x0D
#define AMDKFD_COMMAND_END 0x11
#endif
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册