提交 fc93ff60 编写于 作者: D Dave Airlie

Merge tag 'drm-intel-next-2016-08-08' of git://anongit.freedesktop.org/drm-intel into drm-next

- refactor ddi buffer programming a bit (Ville)
- large-scale renaming to untangle naming in the gem code (Chris)
- rework vma/active tracking for accurately reaping idle mappings of shared
  objects (Chris)
- misc dp sst/mst probing corner case fixes (Ville)
- tons of cleanup&tunings all around in gem
- lockless (rcu-protected) request lookup, plus use it everywhere for
  non(b)locking waits (Chris)
- pipe crc debugfs fixes (Rodrigo)
- random fixes all over

* tag 'drm-intel-next-2016-08-08' of git://anongit.freedesktop.org/drm-intel: (222 commits)
  drm/i915: Update DRIVER_DATE to 20160808
  drm/i915: fix aliasing_ppgtt leak
  drm/i915: Update comment before i915_spin_request
  drm/i915: Use drm official vblank_no_hw_counter callback.
  drm/i915: Fix copy_to_user usage for pipe_crc
  Revert "drm/i915: Track active streams also for DP SST"
  drm/i915: fix WaInsertDummyPushConstPs
  drm/i915: Assert that the request hasn't been retired
  drm/i915: Repack fence tiling mode and stride into a single integer
  drm/i915: Document and reject invalid tiling modes
  drm/i915: Remove locking for get_tiling
  drm/i915: Remove pinned check from madvise ioctl
  drm/i915: Reduce locking inside swfinish ioctl
  drm/i915: Remove (struct_mutex) locking for busy-ioctl
  drm/i915: Remove (struct_mutex) locking for wait-ioctl
  drm/i915: Do a nonblocking wait first in pread/pwrite
  drm/i915: Remove unused no-shrinker-steal
  drm/i915: Tidy generation of the GTT mmap offset
  drm/i915/shrinker: Wait before acquiring struct_mutex under oom
  drm/i915: Simplify do_idling() (Ironlake vt-d w/a)
  ...
......@@ -70,6 +70,9 @@ Frontbuffer Tracking
.. kernel-doc:: drivers/gpu/drm/i915/intel_frontbuffer.c
:doc: frontbuffer tracking
.. kernel-doc:: drivers/gpu/drm/i915/intel_frontbuffer.h
:internal:
.. kernel-doc:: drivers/gpu/drm/i915/intel_frontbuffer.c
:internal:
......
......@@ -25,7 +25,6 @@ i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o
i915-y += i915_cmd_parser.o \
i915_gem_batch_pool.o \
i915_gem_context.o \
i915_gem_debug.o \
i915_gem_dmabuf.o \
i915_gem_evict.o \
i915_gem_execbuffer.o \
......@@ -33,6 +32,7 @@ i915-y += i915_cmd_parser.o \
i915_gem_gtt.o \
i915_gem.o \
i915_gem_render_state.o \
i915_gem_request.o \
i915_gem_shrinker.o \
i915_gem_stolen.o \
i915_gem_tiling.o \
......@@ -40,6 +40,7 @@ i915-y += i915_cmd_parser.o \
i915_gpu_error.o \
i915_trace_points.o \
intel_breadcrumbs.o \
intel_engine_cs.o \
intel_lrc.o \
intel_mocs.o \
intel_ringbuffer.o \
......
......@@ -62,23 +62,23 @@
* The parser always rejects such commands.
*
* The majority of the problematic commands fall in the MI_* range, with only a
* few specific commands on each ring (e.g. PIPE_CONTROL and MI_FLUSH_DW).
* few specific commands on each engine (e.g. PIPE_CONTROL and MI_FLUSH_DW).
*
* Implementation:
* Each ring maintains tables of commands and registers which the parser uses in
* scanning batch buffers submitted to that ring.
* Each engine maintains tables of commands and registers which the parser
* uses in scanning batch buffers submitted to that engine.
*
* Since the set of commands that the parser must check for is significantly
* smaller than the number of commands supported, the parser tables contain only
* those commands required by the parser. This generally works because command
* opcode ranges have standard command length encodings. So for commands that
* the parser does not need to check, it can easily skip them. This is
* implemented via a per-ring length decoding vfunc.
* implemented via a per-engine length decoding vfunc.
*
* Unfortunately, there are a number of commands that do not follow the standard
* length encoding for their opcode range, primarily amongst the MI_* commands.
* To handle this, the parser provides a way to define explicit "skip" entries
* in the per-ring command tables.
* in the per-engine command tables.
*
* Other command table entries map fairly directly to high level categories
* mentioned above: rejected, master-only, register whitelist. The parser
......@@ -603,7 +603,7 @@ static u32 gen7_blt_get_cmd_length_mask(u32 cmd_header)
return 0;
}
static bool validate_cmds_sorted(struct intel_engine_cs *engine,
static bool validate_cmds_sorted(const struct intel_engine_cs *engine,
const struct drm_i915_cmd_table *cmd_tables,
int cmd_table_count)
{
......@@ -624,8 +624,10 @@ static bool validate_cmds_sorted(struct intel_engine_cs *engine,
u32 curr = desc->cmd.value & desc->cmd.mask;
if (curr < previous) {
DRM_ERROR("CMD: table not sorted ring=%d table=%d entry=%d cmd=0x%08X prev=0x%08X\n",
engine->id, i, j, curr, previous);
DRM_ERROR("CMD: %s [%d] command table not sorted: "
"table=%d entry=%d cmd=0x%08X prev=0x%08X\n",
engine->name, engine->id,
i, j, curr, previous);
ret = false;
}
......@@ -636,7 +638,7 @@ static bool validate_cmds_sorted(struct intel_engine_cs *engine,
return ret;
}
static bool check_sorted(int ring_id,
static bool check_sorted(const struct intel_engine_cs *engine,
const struct drm_i915_reg_descriptor *reg_table,
int reg_count)
{
......@@ -648,8 +650,10 @@ static bool check_sorted(int ring_id,
u32 curr = i915_mmio_reg_offset(reg_table[i].addr);
if (curr < previous) {
DRM_ERROR("CMD: table not sorted ring=%d entry=%d reg=0x%08X prev=0x%08X\n",
ring_id, i, curr, previous);
DRM_ERROR("CMD: %s [%d] register table not sorted: "
"entry=%d reg=0x%08X prev=0x%08X\n",
engine->name, engine->id,
i, curr, previous);
ret = false;
}
......@@ -666,7 +670,7 @@ static bool validate_regs_sorted(struct intel_engine_cs *engine)
for (i = 0; i < engine->reg_table_count; i++) {
table = &engine->reg_tables[i];
if (!check_sorted(engine->id, table->regs, table->num_regs))
if (!check_sorted(engine, table->regs, table->num_regs))
return false;
}
......@@ -736,7 +740,7 @@ static void fini_hash_table(struct intel_engine_cs *engine)
}
/**
* i915_cmd_parser_init_ring() - set cmd parser related fields for a ringbuffer
* intel_engine_init_cmd_parser() - set cmd parser related fields for an engine
* @engine: the engine to initialize
*
* Optionally initializes fields related to batch buffer command parsing in the
......@@ -745,7 +749,7 @@ static void fini_hash_table(struct intel_engine_cs *engine)
*
* Return: non-zero if initialization fails
*/
int i915_cmd_parser_init_ring(struct intel_engine_cs *engine)
int intel_engine_init_cmd_parser(struct intel_engine_cs *engine)
{
const struct drm_i915_cmd_table *cmd_tables;
int cmd_table_count;
......@@ -806,8 +810,7 @@ int i915_cmd_parser_init_ring(struct intel_engine_cs *engine)
engine->get_cmd_length_mask = gen7_bsd_get_cmd_length_mask;
break;
default:
DRM_ERROR("CMD: cmd_parser_init with unknown ring: %d\n",
engine->id);
MISSING_CASE(engine->id);
BUG();
}
......@@ -829,13 +832,13 @@ int i915_cmd_parser_init_ring(struct intel_engine_cs *engine)
}
/**
* i915_cmd_parser_fini_ring() - clean up cmd parser related fields
* intel_engine_cleanup_cmd_parser() - clean up cmd parser related fields
* @engine: the engine to clean up
*
* Releases any resources related to command parsing that may have been
* initialized for the specified ring.
* initialized for the specified engine.
*/
void i915_cmd_parser_fini_ring(struct intel_engine_cs *engine)
void intel_engine_cleanup_cmd_parser(struct intel_engine_cs *engine)
{
if (!engine->needs_cmd_parser)
return;
......@@ -866,9 +869,9 @@ find_cmd_in_table(struct intel_engine_cs *engine,
* Returns a pointer to a descriptor for the command specified by cmd_header.
*
* The caller must supply space for a default descriptor via the default_desc
* parameter. If no descriptor for the specified command exists in the ring's
* parameter. If no descriptor for the specified command exists in the engine's
* command parser tables, this function fills in default_desc based on the
* ring's default length encoding and returns default_desc.
* engine's default length encoding and returns default_desc.
*/
static const struct drm_i915_cmd_descriptor*
find_cmd(struct intel_engine_cs *engine,
......@@ -1023,15 +1026,16 @@ static u32 *copy_batch(struct drm_i915_gem_object *dest_obj,
}
/**
* i915_needs_cmd_parser() - should a given ring use software command parsing?
* intel_engine_needs_cmd_parser() - should a given engine use software
* command parsing?
* @engine: the engine in question
*
* Only certain platforms require software batch buffer command parsing, and
* only when enabled via module parameter.
*
* Return: true if the ring requires software command parsing
* Return: true if the engine requires software command parsing
*/
bool i915_needs_cmd_parser(struct intel_engine_cs *engine)
bool intel_engine_needs_cmd_parser(struct intel_engine_cs *engine)
{
if (!engine->needs_cmd_parser)
return false;
......@@ -1078,8 +1082,8 @@ static bool check_cmd(const struct intel_engine_cs *engine,
reg_addr);
if (!reg) {
DRM_DEBUG_DRIVER("CMD: Rejected register 0x%08X in command: 0x%08X (ring=%d)\n",
reg_addr, *cmd, engine->id);
DRM_DEBUG_DRIVER("CMD: Rejected register 0x%08X in command: 0x%08X (exec_id=%d)\n",
reg_addr, *cmd, engine->exec_id);
return false;
}
......@@ -1159,11 +1163,11 @@ static bool check_cmd(const struct intel_engine_cs *engine,
desc->bits[i].mask;
if (dword != desc->bits[i].expected) {
DRM_DEBUG_DRIVER("CMD: Rejected command 0x%08X for bitmask 0x%08X (exp=0x%08X act=0x%08X) (ring=%d)\n",
DRM_DEBUG_DRIVER("CMD: Rejected command 0x%08X for bitmask 0x%08X (exp=0x%08X act=0x%08X) (exec_id=%d)\n",
*cmd,
desc->bits[i].mask,
desc->bits[i].expected,
dword, engine->id);
dword, engine->exec_id);
return false;
}
}
......@@ -1189,7 +1193,7 @@ static bool check_cmd(const struct intel_engine_cs *engine,
* Return: non-zero if the parser finds violations or otherwise fails; -EACCES
* if the batch appears legal but should use hardware parsing
*/
int i915_parse_cmds(struct intel_engine_cs *engine,
int intel_engine_cmd_parser(struct intel_engine_cs *engine,
struct drm_i915_gem_object *batch_obj,
struct drm_i915_gem_object *shadow_batch_obj,
u32 batch_start_offset,
......@@ -1295,7 +1299,7 @@ int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv)
/* If the command parser is not enabled, report 0 - unsupported */
for_each_engine(engine, dev_priv) {
if (i915_needs_cmd_parser(engine)) {
if (intel_engine_needs_cmd_parser(engine)) {
active = true;
break;
}
......
......@@ -91,7 +91,7 @@ static int i915_capabilities(struct seq_file *m, void *data)
static char get_active_flag(struct drm_i915_gem_object *obj)
{
return obj->active ? '*' : ' ';
return i915_gem_object_is_active(obj) ? '*' : ' ';
}
static char get_pin_flag(struct drm_i915_gem_object *obj)
......@@ -101,7 +101,7 @@ static char get_pin_flag(struct drm_i915_gem_object *obj)
static char get_tiling_flag(struct drm_i915_gem_object *obj)
{
switch (obj->tiling_mode) {
switch (i915_gem_object_get_tiling(obj)) {
default:
case I915_TILING_NONE: return ' ';
case I915_TILING_X: return 'X';
......@@ -125,7 +125,7 @@ static u64 i915_gem_obj_total_ggtt_size(struct drm_i915_gem_object *obj)
struct i915_vma *vma;
list_for_each_entry(vma, &obj->vma_list, obj_link) {
if (vma->is_ggtt && drm_mm_node_allocated(&vma->node))
if (i915_vma_is_ggtt(vma) && drm_mm_node_allocated(&vma->node))
size += vma->node.size;
}
......@@ -138,6 +138,7 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
struct intel_engine_cs *engine;
struct i915_vma *vma;
unsigned int frontbuffer_bits;
int pin_count = 0;
enum intel_engine_id id;
......@@ -155,17 +156,20 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
obj->base.write_domain);
for_each_engine_id(engine, dev_priv, id)
seq_printf(m, "%x ",
i915_gem_request_get_seqno(obj->last_read_req[id]));
i915_gem_active_get_seqno(&obj->last_read[id],
&obj->base.dev->struct_mutex));
seq_printf(m, "] %x %x%s%s%s",
i915_gem_request_get_seqno(obj->last_write_req),
i915_gem_request_get_seqno(obj->last_fenced_req),
i915_gem_active_get_seqno(&obj->last_write,
&obj->base.dev->struct_mutex),
i915_gem_active_get_seqno(&obj->last_fence,
&obj->base.dev->struct_mutex),
i915_cache_level_str(to_i915(obj->base.dev), obj->cache_level),
obj->dirty ? " dirty" : "",
obj->madv == I915_MADV_DONTNEED ? " purgeable" : "");
if (obj->base.name)
seq_printf(m, " (name: %d)", obj->base.name);
list_for_each_entry(vma, &obj->vma_list, obj_link) {
if (vma->pin_count > 0)
if (i915_vma_is_pinned(vma))
pin_count++;
}
seq_printf(m, " (pinned x %d)", pin_count);
......@@ -174,10 +178,13 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
if (obj->fence_reg != I915_FENCE_REG_NONE)
seq_printf(m, " (fence: %d)", obj->fence_reg);
list_for_each_entry(vma, &obj->vma_list, obj_link) {
if (!drm_mm_node_allocated(&vma->node))
continue;
seq_printf(m, " (%sgtt offset: %08llx, size: %08llx",
vma->is_ggtt ? "g" : "pp",
i915_vma_is_ggtt(vma) ? "g" : "pp",
vma->node.start, vma->node.size);
if (vma->is_ggtt)
if (i915_vma_is_ggtt(vma))
seq_printf(m, ", type: %u", vma->ggtt_view.type);
seq_puts(m, ")");
}
......@@ -192,11 +199,15 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
*t = '\0';
seq_printf(m, " (%s mappable)", s);
}
if (obj->last_write_req != NULL)
seq_printf(m, " (%s)",
i915_gem_request_get_engine(obj->last_write_req)->name);
if (obj->frontbuffer_bits)
seq_printf(m, " (frontbuffer: 0x%03x)", obj->frontbuffer_bits);
engine = i915_gem_active_get_engine(&obj->last_write,
&obj->base.dev->struct_mutex);
if (engine)
seq_printf(m, " (%s)", engine->name);
frontbuffer_bits = atomic_read(&obj->frontbuffer_bits);
if (frontbuffer_bits)
seq_printf(m, " (frontbuffer: 0x%03x)", frontbuffer_bits);
}
static int i915_gem_object_list_info(struct seq_file *m, void *data)
......@@ -338,47 +349,30 @@ static int per_file_stats(int id, void *ptr, void *data)
stats->count++;
stats->total += obj->base.size;
if (!obj->bind_count)
stats->unbound += obj->base.size;
if (obj->base.name || obj->base.dma_buf)
stats->shared += obj->base.size;
if (USES_FULL_PPGTT(obj->base.dev)) {
list_for_each_entry(vma, &obj->vma_list, obj_link) {
struct i915_hw_ppgtt *ppgtt;
if (!drm_mm_node_allocated(&vma->node))
continue;
if (vma->is_ggtt) {
stats->global += obj->base.size;
continue;
}
if (i915_vma_is_ggtt(vma)) {
stats->global += vma->node.size;
} else {
struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vma->vm);
ppgtt = container_of(vma->vm, struct i915_hw_ppgtt, base);
if (ppgtt->file_priv != stats->file_priv)
if (ppgtt->base.file != stats->file_priv)
continue;
if (obj->active) /* XXX per-vma statistic */
stats->active += obj->base.size;
else
stats->inactive += obj->base.size;
return 0;
}
} else {
if (i915_gem_obj_ggtt_bound(obj)) {
stats->global += obj->base.size;
if (obj->active)
stats->active += obj->base.size;
if (i915_vma_is_active(vma))
stats->active += vma->node.size;
else
stats->inactive += obj->base.size;
return 0;
}
stats->inactive += vma->node.size;
}
if (!list_empty(&obj->global_list))
stats->unbound += obj->base.size;
return 0;
}
......@@ -425,8 +419,8 @@ static int per_file_ctx_stats(int id, void *ptr, void *data)
for (n = 0; n < ARRAY_SIZE(ctx->engine); n++) {
if (ctx->engine[n].state)
per_file_stats(0, ctx->engine[n].state, data);
if (ctx->engine[n].ringbuf)
per_file_stats(0, ctx->engine[n].ringbuf->obj, data);
if (ctx->engine[n].ring)
per_file_stats(0, ctx->engine[n].ring->obj, data);
}
return 0;
......@@ -754,13 +748,13 @@ static int i915_gem_request_info(struct seq_file *m, void *data)
int count;
count = 0;
list_for_each_entry(req, &engine->request_list, list)
list_for_each_entry(req, &engine->request_list, link)
count++;
if (count == 0)
continue;
seq_printf(m, "%s requests: %d\n", engine->name, count);
list_for_each_entry(req, &engine->request_list, list) {
list_for_each_entry(req, &engine->request_list, link) {
struct task_struct *task;
rcu_read_lock();
......@@ -768,7 +762,7 @@ static int i915_gem_request_info(struct seq_file *m, void *data)
if (req->pid)
task = pid_task(req->pid, PIDTYPE_PID);
seq_printf(m, " %x @ %d: %s [%d]\n",
req->seqno,
req->fence.seqno,
(int) (jiffies - req->emitted_jiffies),
task ? task->comm : "<unknown>",
task ? task->pid : -1);
......@@ -1205,8 +1199,6 @@ static int i915_frequency_info(struct seq_file *m, void *unused)
intel_runtime_pm_get(dev_priv);
flush_delayed_work(&dev_priv->rps.delayed_resume_work);
if (IS_GEN5(dev)) {
u16 rgvswctl = I915_READ16(MEMSWCTL);
u16 rgvstat = I915_READ16(MEMSTAT_ILK);
......@@ -1381,6 +1373,8 @@ static int i915_frequency_info(struct seq_file *m, void *unused)
intel_gpu_freq(dev_priv, dev_priv->rps.idle_freq));
seq_printf(m, "Min freq: %d MHz\n",
intel_gpu_freq(dev_priv, dev_priv->rps.min_freq));
seq_printf(m, "Boost freq: %d MHz\n",
intel_gpu_freq(dev_priv, dev_priv->rps.boost_freq));
seq_printf(m, "Max freq: %d MHz\n",
intel_gpu_freq(dev_priv, dev_priv->rps.max_freq));
seq_printf(m,
......@@ -1419,7 +1413,7 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
intel_runtime_pm_get(dev_priv);
for_each_engine_id(engine, dev_priv, id) {
acthd[id] = intel_ring_get_active_head(engine);
acthd[id] = intel_engine_get_active_head(engine);
seqno[id] = intel_engine_get_seqno(engine);
}
......@@ -1602,6 +1596,7 @@ static int gen6_drpc_info(struct seq_file *m)
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = to_i915(dev);
u32 rpmodectl1, gt_core_status, rcctl1, rc6vids = 0;
u32 gen9_powergate_enable = 0, gen9_powergate_status = 0;
unsigned forcewake_count;
int count = 0, ret;
......@@ -1629,6 +1624,10 @@ static int gen6_drpc_info(struct seq_file *m)
rpmodectl1 = I915_READ(GEN6_RP_CONTROL);
rcctl1 = I915_READ(GEN6_RC_CONTROL);
if (INTEL_INFO(dev)->gen >= 9) {
gen9_powergate_enable = I915_READ(GEN9_PG_ENABLE);
gen9_powergate_status = I915_READ(GEN9_PWRGT_DOMAIN_STATUS);
}
mutex_unlock(&dev->struct_mutex);
mutex_lock(&dev_priv->rps.hw_lock);
sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids);
......@@ -1647,6 +1646,12 @@ static int gen6_drpc_info(struct seq_file *m)
yesno(rcctl1 & GEN6_RC_CTL_RC1e_ENABLE));
seq_printf(m, "RC6 Enabled: %s\n",
yesno(rcctl1 & GEN6_RC_CTL_RC6_ENABLE));
if (INTEL_INFO(dev)->gen >= 9) {
seq_printf(m, "Render Well Gating Enabled: %s\n",
yesno(gen9_powergate_enable & GEN9_RENDER_PG_ENABLE));
seq_printf(m, "Media Well Gating Enabled: %s\n",
yesno(gen9_powergate_enable & GEN9_MEDIA_PG_ENABLE));
}
seq_printf(m, "Deep RC6 Enabled: %s\n",
yesno(rcctl1 & GEN6_RC_CTL_RC6p_ENABLE));
seq_printf(m, "Deepest RC6 Enabled: %s\n",
......@@ -1675,6 +1680,14 @@ static int gen6_drpc_info(struct seq_file *m)
seq_printf(m, "Core Power Down: %s\n",
yesno(gt_core_status & GEN6_CORE_CPD_STATE_MASK));
if (INTEL_INFO(dev)->gen >= 9) {
seq_printf(m, "Render Power Well: %s\n",
(gen9_powergate_status &
GEN9_PWRGT_RENDER_STATUS_MASK) ? "Up" : "Down");
seq_printf(m, "Media Power Well: %s\n",
(gen9_powergate_status &
GEN9_PWRGT_MEDIA_STATUS_MASK) ? "Up" : "Down");
}
/* Not exactly sure what this is */
seq_printf(m, "RC6 \"Locked to RPn\" residency since boot: %u\n",
......@@ -1692,7 +1705,7 @@ static int gen6_drpc_info(struct seq_file *m)
GEN6_DECODE_RC6_VID(((rc6vids >> 8) & 0xff)));
seq_printf(m, "RC6++ voltage: %dmV\n",
GEN6_DECODE_RC6_VID(((rc6vids >> 16) & 0xff)));
return 0;
return i915_forcewake_domains(m, NULL);
}
static int i915_drpc_info(struct seq_file *m, void *unused)
......@@ -1896,8 +1909,6 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused)
intel_runtime_pm_get(dev_priv);
flush_delayed_work(&dev_priv->rps.delayed_resume_work);
ret = mutex_lock_interruptible(&dev_priv->rps.hw_lock);
if (ret)
goto out;
......@@ -2019,12 +2030,11 @@ static int i915_gem_framebuffer_info(struct seq_file *m, void *data)
return 0;
}
static void describe_ctx_ringbuf(struct seq_file *m,
struct intel_ringbuffer *ringbuf)
static void describe_ctx_ring(struct seq_file *m, struct intel_ring *ring)
{
seq_printf(m, " (ringbuffer, space: %d, head: %u, tail: %u, last head: %d)",
ringbuf->space, ringbuf->head, ringbuf->tail,
ringbuf->last_retired_head);
ring->space, ring->head, ring->tail,
ring->last_retired_head);
}
static int i915_context_status(struct seq_file *m, void *unused)
......@@ -2068,8 +2078,8 @@ static int i915_context_status(struct seq_file *m, void *unused)
seq_putc(m, ce->initialised ? 'I' : 'i');
if (ce->state)
describe_obj(m, ce->state);
if (ce->ringbuf)
describe_ctx_ringbuf(m, ce->ringbuf);
if (ce->ring)
describe_ctx_ring(m, ce->ring);
seq_putc(m, '\n');
}
......@@ -2467,13 +2477,7 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
list_empty(&file_priv->rps.link) ? "" : ", active");
rcu_read_unlock();
}
seq_printf(m, "Semaphore boosts: %d%s\n",
dev_priv->rps.semaphores.boosts,
list_empty(&dev_priv->rps.semaphores.link) ? "" : ", active");
seq_printf(m, "MMIO flip boosts: %d%s\n",
dev_priv->rps.mmioflips.boosts,
list_empty(&dev_priv->rps.mmioflips.link) ? "" : ", active");
seq_printf(m, "Kernel boosts: %d\n", dev_priv->rps.boosts);
seq_printf(m, "Kernel (anonymous) boosts: %d\n", dev_priv->rps.boosts);
spin_unlock(&dev_priv->rps.client_lock);
mutex_unlock(&dev->filelist_mutex);
......@@ -3228,7 +3232,7 @@ static int i915_semaphore_status(struct seq_file *m, void *unused)
enum intel_engine_id id;
int j, ret;
if (!i915_semaphore_is_enabled(dev_priv)) {
if (!i915.semaphores) {
seq_puts(m, "Semaphores are disabled\n");
return 0;
}
......@@ -3621,7 +3625,6 @@ i915_pipe_crc_read(struct file *filep, char __user *user_buf, size_t count,
while (n_entries > 0) {
struct intel_pipe_crc_entry *entry =
&pipe_crc->entries[pipe_crc->tail];
int ret;
if (CIRC_CNT(pipe_crc->head, pipe_crc->tail,
INTEL_PIPE_CRC_ENTRIES_NR) < 1)
......@@ -3638,8 +3641,7 @@ i915_pipe_crc_read(struct file *filep, char __user *user_buf, size_t count,
spin_unlock_irq(&pipe_crc->lock);
ret = copy_to_user(user_buf, buf, PIPE_CRC_LINE_LEN);
if (ret == PIPE_CRC_LINE_LEN)
if (copy_to_user(user_buf, buf, PIPE_CRC_LINE_LEN))
return -EFAULT;
user_buf += PIPE_CRC_LINE_LEN;
......@@ -4921,7 +4923,7 @@ i915_drop_caches_set(void *data, u64 val)
return ret;
if (val & DROP_ACTIVE) {
ret = i915_gem_wait_for_idle(dev_priv);
ret = i915_gem_wait_for_idle(dev_priv, true);
if (ret)
goto unlock;
}
......@@ -4950,20 +4952,11 @@ i915_max_freq_get(void *data, u64 *val)
{
struct drm_device *dev = data;
struct drm_i915_private *dev_priv = to_i915(dev);
int ret;
if (INTEL_INFO(dev)->gen < 6)
return -ENODEV;
flush_delayed_work(&dev_priv->rps.delayed_resume_work);
ret = mutex_lock_interruptible(&dev_priv->rps.hw_lock);
if (ret)
return ret;
*val = intel_gpu_freq(dev_priv, dev_priv->rps.max_freq_softlimit);
mutex_unlock(&dev_priv->rps.hw_lock);
return 0;
}
......@@ -4978,8 +4971,6 @@ i915_max_freq_set(void *data, u64 val)
if (INTEL_INFO(dev)->gen < 6)
return -ENODEV;
flush_delayed_work(&dev_priv->rps.delayed_resume_work);
DRM_DEBUG_DRIVER("Manually setting max freq to %llu\n", val);
ret = mutex_lock_interruptible(&dev_priv->rps.hw_lock);
......@@ -5017,20 +5008,11 @@ i915_min_freq_get(void *data, u64 *val)
{
struct drm_device *dev = data;
struct drm_i915_private *dev_priv = to_i915(dev);
int ret;
if (INTEL_INFO(dev)->gen < 6)
if (INTEL_GEN(dev_priv) < 6)
return -ENODEV;
flush_delayed_work(&dev_priv->rps.delayed_resume_work);
ret = mutex_lock_interruptible(&dev_priv->rps.hw_lock);
if (ret)
return ret;
*val = intel_gpu_freq(dev_priv, dev_priv->rps.min_freq_softlimit);
mutex_unlock(&dev_priv->rps.hw_lock);
return 0;
}
......@@ -5042,11 +5024,9 @@ i915_min_freq_set(void *data, u64 val)
u32 hw_max, hw_min;
int ret;
if (INTEL_INFO(dev)->gen < 6)
if (INTEL_GEN(dev_priv) < 6)
return -ENODEV;
flush_delayed_work(&dev_priv->rps.delayed_resume_work);
DRM_DEBUG_DRIVER("Manually setting min freq to %llu\n", val);
ret = mutex_lock_interruptible(&dev_priv->rps.hw_lock);
......@@ -5268,7 +5248,8 @@ static void broadwell_sseu_device_status(struct drm_device *dev,
static int i915_sseu_status(struct seq_file *m, void *unused)
{
struct drm_info_node *node = (struct drm_info_node *) m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = to_i915(node->minor->dev);
struct drm_device *dev = &dev_priv->drm;
struct sseu_dev_status stat;
if (INTEL_INFO(dev)->gen < 8)
......@@ -5298,6 +5279,9 @@ static int i915_sseu_status(struct seq_file *m, void *unused)
seq_puts(m, "SSEU Device Status\n");
memset(&stat, 0, sizeof(stat));
intel_runtime_pm_get(dev_priv);
if (IS_CHERRYVIEW(dev)) {
cherryview_sseu_device_status(dev, &stat);
} else if (IS_BROADWELL(dev)) {
......@@ -5305,6 +5289,9 @@ static int i915_sseu_status(struct seq_file *m, void *unused)
} else if (INTEL_INFO(dev)->gen >= 9) {
gen9_sseu_device_status(dev, &stat);
}
intel_runtime_pm_put(dev_priv);
seq_printf(m, " Enabled Slice Total: %u\n",
stat.slice_total);
seq_printf(m, " Enabled Subslice Total: %u\n",
......
......@@ -228,27 +228,6 @@ static void intel_detect_pch(struct drm_device *dev)
pci_dev_put(pch);
}
bool i915_semaphore_is_enabled(struct drm_i915_private *dev_priv)
{
if (INTEL_GEN(dev_priv) < 6)
return false;
if (i915.semaphores >= 0)
return i915.semaphores;
/* TODO: make semaphores and Execlists play nicely together */
if (i915.enable_execlists)
return false;
#ifdef CONFIG_INTEL_IOMMU
/* Enable semaphores on SNB when IO remapping is off */
if (IS_GEN6(dev_priv) && intel_iommu_gfx_mapped)
return false;
#endif
return true;
}
static int i915_getparam(struct drm_device *dev, void *data,
struct drm_file *file_priv)
{
......@@ -324,7 +303,7 @@ static int i915_getparam(struct drm_device *dev, void *data,
value = 1;
break;
case I915_PARAM_HAS_SEMAPHORES:
value = i915_semaphore_is_enabled(dev_priv);
value = i915.semaphores;
break;
case I915_PARAM_HAS_PRIME_VMAP_FLUSH:
value = 1;
......@@ -999,6 +978,9 @@ static void intel_sanitize_options(struct drm_i915_private *dev_priv)
i915.enable_ppgtt =
intel_sanitize_enable_ppgtt(dev_priv, i915.enable_ppgtt);
DRM_DEBUG_DRIVER("ppgtt mode: %i\n", i915.enable_ppgtt);
i915.semaphores = intel_sanitize_semaphores(dev_priv, i915.semaphores);
DRM_DEBUG_DRIVER("use GPU sempahores? %s\n", yesno(i915.semaphores));
}
/**
......@@ -1011,8 +993,6 @@ static void intel_sanitize_options(struct drm_i915_private *dev_priv)
static int i915_driver_init_hw(struct drm_i915_private *dev_priv)
{
struct drm_device *dev = &dev_priv->drm;
struct i915_ggtt *ggtt = &dev_priv->ggtt;
uint32_t aperture_size;
int ret;
if (i915_inject_load_failure())
......@@ -1022,16 +1002,10 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv)
intel_sanitize_options(dev_priv);
ret = i915_ggtt_init_hw(dev);
ret = i915_ggtt_probe_hw(dev_priv);
if (ret)
return ret;
ret = i915_ggtt_enable_hw(dev);
if (ret) {
DRM_ERROR("failed to enable GGTT\n");
goto out_ggtt;
}
/* WARNING: Apparently we must kick fbdev drivers before vgacon,
* otherwise the vga fbdev driver falls over. */
ret = i915_kick_out_firmware_fb(dev_priv);
......@@ -1046,6 +1020,16 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv)
goto out_ggtt;
}
ret = i915_ggtt_init_hw(dev_priv);
if (ret)
return ret;
ret = i915_ggtt_enable_hw(dev_priv);
if (ret) {
DRM_ERROR("failed to enable GGTT\n");
goto out_ggtt;
}
pci_set_master(dev->pdev);
/* overlay on gen2 is broken and can't address above 1G */
......@@ -1058,7 +1042,6 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv)
}
}
/* 965GM sometimes incorrectly writes to hardware status page (HWS)
* using 32bit addressing, overwriting memory if HWS is located
* above 4GB.
......@@ -1077,19 +1060,6 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv)
}
}
aperture_size = ggtt->mappable_end;
ggtt->mappable =
io_mapping_create_wc(ggtt->mappable_base,
aperture_size);
if (!ggtt->mappable) {
ret = -EIO;
goto out_ggtt;
}
ggtt->mtrr = arch_phys_wc_add(ggtt->mappable_base,
aperture_size);
pm_qos_add_request(&dev_priv->pm_qos, PM_QOS_CPU_DMA_LATENCY,
PM_QOS_DEFAULT_VALUE);
......@@ -1118,7 +1088,7 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv)
return 0;
out_ggtt:
i915_ggtt_cleanup_hw(dev);
i915_ggtt_cleanup_hw(dev_priv);
return ret;
}
......@@ -1130,15 +1100,12 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv)
static void i915_driver_cleanup_hw(struct drm_i915_private *dev_priv)
{
struct drm_device *dev = &dev_priv->drm;
struct i915_ggtt *ggtt = &dev_priv->ggtt;
if (dev->pdev->msi_enabled)
pci_disable_msi(dev->pdev);
pm_qos_remove_request(&dev_priv->pm_qos);
arch_phys_wc_del(ggtt->mtrr);
io_mapping_free(ggtt->mappable);
i915_ggtt_cleanup_hw(dev);
i915_ggtt_cleanup_hw(dev_priv);
}
/**
......@@ -1343,7 +1310,7 @@ void i915_driver_unload(struct drm_device *dev)
i915_destroy_error_state(dev);
/* Flush any outstanding unpin_work. */
flush_workqueue(dev_priv->wq);
drain_workqueue(dev_priv->wq);
intel_guc_fini(dev);
i915_gem_fini(dev);
......@@ -1458,8 +1425,6 @@ static int i915_drm_suspend(struct drm_device *dev)
intel_guc_suspend(dev);
intel_suspend_gt_powersave(dev_priv);
intel_display_suspend(dev);
intel_dp_mst_suspend(dev);
......@@ -1586,15 +1551,13 @@ static int i915_drm_resume(struct drm_device *dev)
disable_rpm_wakeref_asserts(dev_priv);
ret = i915_ggtt_enable_hw(dev);
ret = i915_ggtt_enable_hw(dev_priv);
if (ret)
DRM_ERROR("failed to re-enable GGTT\n");
intel_csr_ucode_resume(dev_priv);
mutex_lock(&dev->struct_mutex);
i915_gem_restore_gtt_mappings(dev);
mutex_unlock(&dev->struct_mutex);
i915_gem_resume(dev);
i915_restore_state(dev);
intel_opregion_setup(dev_priv);
......@@ -1652,6 +1615,7 @@ static int i915_drm_resume(struct drm_device *dev)
intel_opregion_notify_adapter(dev_priv, PCI_D0);
intel_autoenable_gt_powersave(dev_priv);
drm_kms_helper_poll_enable(dev);
enable_rpm_wakeref_asserts(dev_priv);
......@@ -1778,8 +1742,6 @@ int i915_reset(struct drm_i915_private *dev_priv)
unsigned reset_counter;
int ret;
intel_reset_gt_powersave(dev_priv);
mutex_lock(&dev->struct_mutex);
/* Clear any previous failed attempts at recovery. Time to try again. */
......@@ -1835,8 +1797,7 @@ int i915_reset(struct drm_i915_private *dev_priv)
* previous concerns that it doesn't respond well to some forms
* of re-init after reset.
*/
if (INTEL_INFO(dev)->gen > 5)
intel_enable_gt_powersave(dev_priv);
intel_autoenable_gt_powersave(dev_priv);
return 0;
......@@ -2462,7 +2423,6 @@ static int intel_runtime_resume(struct device *device)
* we can do is to hope that things will still work (and disable RPM).
*/
i915_gem_init_swizzling(dev);
gen6_update_ring_freq(dev_priv);
intel_runtime_pm_enable_interrupts(dev_priv);
......@@ -2618,6 +2578,7 @@ static struct drm_driver driver = {
.postclose = i915_driver_postclose,
.set_busid = drm_pci_set_busid,
.gem_close_object = i915_gem_close_object,
.gem_free_object = i915_gem_free_object,
.gem_vm_ops = &i915_gem_vm_ops,
......
此差异已折叠。
此差异已折叠。
......@@ -41,15 +41,15 @@
/**
* i915_gem_batch_pool_init() - initialize a batch buffer pool
* @dev: the drm device
* @engine: the associated request submission engine
* @pool: the batch buffer pool
*/
void i915_gem_batch_pool_init(struct drm_device *dev,
void i915_gem_batch_pool_init(struct intel_engine_cs *engine,
struct i915_gem_batch_pool *pool)
{
int n;
pool->dev = dev;
pool->engine = engine;
for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++)
INIT_LIST_HEAD(&pool->cache_list[n]);
......@@ -65,18 +65,17 @@ void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool)
{
int n;
WARN_ON(!mutex_is_locked(&pool->dev->struct_mutex));
lockdep_assert_held(&pool->engine->i915->drm.struct_mutex);
for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) {
while (!list_empty(&pool->cache_list[n])) {
struct drm_i915_gem_object *obj =
list_first_entry(&pool->cache_list[n],
struct drm_i915_gem_object,
batch_pool_link);
list_del(&obj->batch_pool_link);
drm_gem_object_unreference(&obj->base);
}
struct drm_i915_gem_object *obj, *next;
list_for_each_entry_safe(obj, next,
&pool->cache_list[n],
batch_pool_link)
i915_gem_object_put(obj);
INIT_LIST_HEAD(&pool->cache_list[n]);
}
}
......@@ -102,7 +101,7 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
struct list_head *list;
int n;
WARN_ON(!mutex_is_locked(&pool->dev->struct_mutex));
lockdep_assert_held(&pool->engine->i915->drm.struct_mutex);
/* Compute a power-of-two bucket, but throw everything greater than
* 16KiB into the same bucket: i.e. the the buckets hold objects of
......@@ -115,13 +114,14 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
list_for_each_entry_safe(tmp, next, list, batch_pool_link) {
/* The batches are strictly LRU ordered */
if (tmp->active)
if (!i915_gem_active_is_idle(&tmp->last_read[pool->engine->id],
&tmp->base.dev->struct_mutex))
break;
/* While we're looping, do some clean up */
if (tmp->madv == __I915_MADV_PURGED) {
list_del(&tmp->batch_pool_link);
drm_gem_object_unreference(&tmp->base);
i915_gem_object_put(tmp);
continue;
}
......@@ -134,7 +134,7 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
if (obj == NULL) {
int ret;
obj = i915_gem_object_create(pool->dev, size);
obj = i915_gem_object_create(&pool->engine->i915->drm, size);
if (IS_ERR(obj))
return obj;
......
......@@ -27,13 +27,15 @@
#include "i915_drv.h"
struct intel_engine_cs;
struct i915_gem_batch_pool {
struct drm_device *dev;
struct intel_engine_cs *engine;
struct list_head cache_list[4];
};
/* i915_gem_batch_pool.c */
void i915_gem_batch_pool_init(struct drm_device *dev,
void i915_gem_batch_pool_init(struct intel_engine_cs *engine,
struct i915_gem_batch_pool *pool);
void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool);
struct drm_i915_gem_object*
......
......@@ -134,21 +134,6 @@ static int get_context_size(struct drm_i915_private *dev_priv)
return ret;
}
static void i915_gem_context_clean(struct i915_gem_context *ctx)
{
struct i915_hw_ppgtt *ppgtt = ctx->ppgtt;
struct i915_vma *vma, *next;
if (!ppgtt)
return;
list_for_each_entry_safe(vma, next, &ppgtt->base.inactive_list,
vm_link) {
if (WARN_ON(__i915_vma_unbind_no_wait(vma)))
break;
}
}
void i915_gem_context_free(struct kref *ctx_ref)
{
struct i915_gem_context *ctx = container_of(ctx_ref, typeof(*ctx), ref);
......@@ -156,13 +141,7 @@ void i915_gem_context_free(struct kref *ctx_ref)
lockdep_assert_held(&ctx->i915->drm.struct_mutex);
trace_i915_context_free(ctx);
/*
* This context is going away and we need to remove all VMAs still
* around. This is to handle imported shared objects for which
* destructor did not run when their handles were closed.
*/
i915_gem_context_clean(ctx);
GEM_BUG_ON(!ctx->closed);
i915_ppgtt_put(ctx->ppgtt);
......@@ -173,10 +152,10 @@ void i915_gem_context_free(struct kref *ctx_ref)
continue;
WARN_ON(ce->pin_count);
if (ce->ringbuf)
intel_ringbuffer_free(ce->ringbuf);
if (ce->ring)
intel_ring_free(ce->ring);
drm_gem_object_unreference(&ce->state->base);
i915_gem_object_put(ce->state);
}
list_del(&ctx->link);
......@@ -216,7 +195,7 @@ i915_gem_alloc_context_obj(struct drm_device *dev, size_t size)
ret = i915_gem_object_set_cache_level(obj, I915_CACHE_L3_LLC);
/* Failure shouldn't ever happen this early */
if (WARN_ON(ret)) {
drm_gem_object_unreference(&obj->base);
i915_gem_object_put(obj);
return ERR_PTR(ret);
}
}
......@@ -224,6 +203,37 @@ i915_gem_alloc_context_obj(struct drm_device *dev, size_t size)
return obj;
}
static void i915_ppgtt_close(struct i915_address_space *vm)
{
struct list_head *phases[] = {
&vm->active_list,
&vm->inactive_list,
&vm->unbound_list,
NULL,
}, **phase;
GEM_BUG_ON(vm->closed);
vm->closed = true;
for (phase = phases; *phase; phase++) {
struct i915_vma *vma, *vn;
list_for_each_entry_safe(vma, vn, *phase, vm_link)
if (!i915_vma_is_closed(vma))
i915_vma_close(vma);
}
}
static void context_close(struct i915_gem_context *ctx)
{
GEM_BUG_ON(ctx->closed);
ctx->closed = true;
if (ctx->ppgtt)
i915_ppgtt_close(&ctx->ppgtt->base);
ctx->file_priv = ERR_PTR(-EBADF);
i915_gem_context_put(ctx);
}
static int assign_hw_id(struct drm_i915_private *dev_priv, unsigned *out)
{
int ret;
......@@ -305,7 +315,7 @@ __create_hw_context(struct drm_device *dev,
return ctx;
err_out:
i915_gem_context_unreference(ctx);
context_close(ctx);
return ERR_PTR(ret);
}
......@@ -327,13 +337,14 @@ i915_gem_create_context(struct drm_device *dev,
return ctx;
if (USES_FULL_PPGTT(dev)) {
struct i915_hw_ppgtt *ppgtt = i915_ppgtt_create(dev, file_priv);
struct i915_hw_ppgtt *ppgtt =
i915_ppgtt_create(to_i915(dev), file_priv);
if (IS_ERR(ppgtt)) {
DRM_DEBUG_DRIVER("PPGTT setup failed (%ld)\n",
PTR_ERR(ppgtt));
idr_remove(&file_priv->context_idr, ctx->user_handle);
i915_gem_context_unreference(ctx);
context_close(ctx);
return ERR_CAST(ppgtt);
}
......@@ -390,7 +401,7 @@ static void i915_gem_context_unpin(struct i915_gem_context *ctx,
if (ce->state)
i915_gem_object_ggtt_unpin(ce->state);
i915_gem_context_unreference(ctx);
i915_gem_context_put(ctx);
}
}
......@@ -504,7 +515,7 @@ void i915_gem_context_fini(struct drm_device *dev)
lockdep_assert_held(&dev->struct_mutex);
i915_gem_context_unreference(dctx);
context_close(dctx);
dev_priv->kernel_context = NULL;
ida_destroy(&dev_priv->context_hw_ida);
......@@ -514,8 +525,7 @@ static int context_idr_cleanup(int id, void *p, void *data)
{
struct i915_gem_context *ctx = p;
ctx->file_priv = ERR_PTR(-EBADF);
i915_gem_context_unreference(ctx);
context_close(ctx);
return 0;
}
......@@ -552,11 +562,12 @@ static inline int
mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags)
{
struct drm_i915_private *dev_priv = req->i915;
struct intel_ring *ring = req->ring;
struct intel_engine_cs *engine = req->engine;
u32 flags = hw_flags | MI_MM_SPACE_GTT;
const int num_rings =
/* Use an extended w/a on ivb+ if signalling from other rings */
i915_semaphore_is_enabled(dev_priv) ?
i915.semaphores ?
hweight32(INTEL_INFO(dev_priv)->ring_mask) - 1 :
0;
int len, ret;
......@@ -567,7 +578,7 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags)
* itlb_before_ctx_switch.
*/
if (IS_GEN6(dev_priv)) {
ret = engine->flush(req, I915_GEM_GPU_DOMAINS, 0);
ret = engine->emit_flush(req, EMIT_INVALIDATE);
if (ret)
return ret;
}
......@@ -589,64 +600,64 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags)
/* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */
if (INTEL_GEN(dev_priv) >= 7) {
intel_ring_emit(engine, MI_ARB_ON_OFF | MI_ARB_DISABLE);
intel_ring_emit(ring, MI_ARB_ON_OFF | MI_ARB_DISABLE);
if (num_rings) {
struct intel_engine_cs *signaller;
intel_ring_emit(engine,
intel_ring_emit(ring,
MI_LOAD_REGISTER_IMM(num_rings));
for_each_engine(signaller, dev_priv) {
if (signaller == engine)
continue;
intel_ring_emit_reg(engine,
intel_ring_emit_reg(ring,
RING_PSMI_CTL(signaller->mmio_base));
intel_ring_emit(engine,
intel_ring_emit(ring,
_MASKED_BIT_ENABLE(GEN6_PSMI_SLEEP_MSG_DISABLE));
}
}
}
intel_ring_emit(engine, MI_NOOP);
intel_ring_emit(engine, MI_SET_CONTEXT);
intel_ring_emit(engine,
intel_ring_emit(ring, MI_NOOP);
intel_ring_emit(ring, MI_SET_CONTEXT);
intel_ring_emit(ring,
i915_gem_obj_ggtt_offset(req->ctx->engine[RCS].state) |
flags);
/*
* w/a: MI_SET_CONTEXT must always be followed by MI_NOOP
* WaMiSetContext_Hang:snb,ivb,vlv
*/
intel_ring_emit(engine, MI_NOOP);
intel_ring_emit(ring, MI_NOOP);
if (INTEL_GEN(dev_priv) >= 7) {
if (num_rings) {
struct intel_engine_cs *signaller;
i915_reg_t last_reg = {}; /* keep gcc quiet */
intel_ring_emit(engine,
intel_ring_emit(ring,
MI_LOAD_REGISTER_IMM(num_rings));
for_each_engine(signaller, dev_priv) {
if (signaller == engine)
continue;
last_reg = RING_PSMI_CTL(signaller->mmio_base);
intel_ring_emit_reg(engine, last_reg);
intel_ring_emit(engine,
intel_ring_emit_reg(ring, last_reg);
intel_ring_emit(ring,
_MASKED_BIT_DISABLE(GEN6_PSMI_SLEEP_MSG_DISABLE));
}
/* Insert a delay before the next switch! */
intel_ring_emit(engine,
intel_ring_emit(ring,
MI_STORE_REGISTER_MEM |
MI_SRM_LRM_GLOBAL_GTT);
intel_ring_emit_reg(engine, last_reg);
intel_ring_emit(engine, engine->scratch.gtt_offset);
intel_ring_emit(engine, MI_NOOP);
intel_ring_emit_reg(ring, last_reg);
intel_ring_emit(ring, engine->scratch.gtt_offset);
intel_ring_emit(ring, MI_NOOP);
}
intel_ring_emit(engine, MI_ARB_ON_OFF | MI_ARB_ENABLE);
intel_ring_emit(ring, MI_ARB_ON_OFF | MI_ARB_ENABLE);
}
intel_ring_advance(engine);
intel_ring_advance(ring);
return ret;
}
......@@ -654,7 +665,7 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags)
static int remap_l3(struct drm_i915_gem_request *req, int slice)
{
u32 *remap_info = req->i915->l3_parity.remap_info[slice];
struct intel_engine_cs *engine = req->engine;
struct intel_ring *ring = req->ring;
int i, ret;
if (!remap_info)
......@@ -669,13 +680,13 @@ static int remap_l3(struct drm_i915_gem_request *req, int slice)
* here because no other code should access these registers other than
* at initialization time.
*/
intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(GEN7_L3LOG_SIZE/4));
intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(GEN7_L3LOG_SIZE/4));
for (i = 0; i < GEN7_L3LOG_SIZE/4; i++) {
intel_ring_emit_reg(engine, GEN7_L3LOG(slice, i));
intel_ring_emit(engine, remap_info[i]);
intel_ring_emit_reg(ring, GEN7_L3LOG(slice, i));
intel_ring_emit(ring, remap_info[i]);
}
intel_ring_emit(engine, MI_NOOP);
intel_ring_advance(engine);
intel_ring_emit(ring, MI_NOOP);
intel_ring_advance(ring);
return 0;
}
......@@ -752,9 +763,8 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
return 0;
/* Trying to pin first makes error handling easier. */
ret = i915_gem_obj_ggtt_pin(to->engine[RCS].state,
to->ggtt_alignment,
0);
ret = i915_gem_object_ggtt_pin(to->engine[RCS].state, NULL, 0,
to->ggtt_alignment, 0);
if (ret)
return ret;
......@@ -814,8 +824,8 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
* MI_SET_CONTEXT instead of when the next seqno has completed.
*/
if (from != NULL) {
from->engine[RCS].state->base.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
i915_vma_move_to_active(i915_gem_obj_to_ggtt(from->engine[RCS].state), req);
struct drm_i915_gem_object *obj = from->engine[RCS].state;
/* As long as MI_SET_CONTEXT is serializing, ie. it flushes the
* whole damn pipeline, we don't need to explicitly mark the
* object dirty. The only exception is that the context must be
......@@ -823,14 +833,14 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
* able to defer doing this until we know the object would be
* swapped, but there is no way to do that yet.
*/
from->engine[RCS].state->dirty = 1;
obj->base.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
i915_vma_move_to_active(i915_gem_obj_to_ggtt(obj), req, 0);
/* obj is kept alive until the next request by its active ref */
i915_gem_object_ggtt_unpin(from->engine[RCS].state);
i915_gem_context_unreference(from);
i915_gem_object_ggtt_unpin(obj);
i915_gem_context_put(from);
}
i915_gem_context_reference(to);
engine->last_context = to;
engine->last_context = i915_gem_context_get(to);
/* GEN8 does *not* require an explicit reload if the PDPs have been
* setup, and we do not wish to move them.
......@@ -894,8 +904,9 @@ int i915_switch_context(struct drm_i915_gem_request *req)
{
struct intel_engine_cs *engine = req->engine;
WARN_ON(i915.enable_execlists);
lockdep_assert_held(&req->i915->drm.struct_mutex);
if (i915.enable_execlists)
return 0;
if (!req->ctx->engine[engine->id].state) {
struct i915_gem_context *to = req->ctx;
......@@ -914,10 +925,9 @@ int i915_switch_context(struct drm_i915_gem_request *req)
}
if (to != engine->last_context) {
i915_gem_context_reference(to);
if (engine->last_context)
i915_gem_context_unreference(engine->last_context);
engine->last_context = to;
i915_gem_context_put(engine->last_context);
engine->last_context = i915_gem_context_get(to);
}
return 0;
......@@ -926,6 +936,33 @@ int i915_switch_context(struct drm_i915_gem_request *req)
return do_rcs_switch(req);
}
int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv)
{
struct intel_engine_cs *engine;
for_each_engine(engine, dev_priv) {
struct drm_i915_gem_request *req;
int ret;
if (engine->last_context == NULL)
continue;
if (engine->last_context == dev_priv->kernel_context)
continue;
req = i915_gem_request_alloc(engine, dev_priv->kernel_context);
if (IS_ERR(req))
return PTR_ERR(req);
ret = i915_switch_context(req);
i915_add_request_no_flush(req);
if (ret)
return ret;
}
return 0;
}
static bool contexts_enabled(struct drm_device *dev)
{
return i915.enable_execlists || to_i915(dev)->hw_context_size;
......@@ -985,7 +1022,7 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
}
idr_remove(&file_priv->context_idr, ctx->user_handle);
i915_gem_context_unreference(ctx);
context_close(ctx);
mutex_unlock(&dev->struct_mutex);
DRM_DEBUG_DRIVER("HW context %d destroyed\n", args->ctx_id);
......
......@@ -23,9 +23,13 @@
* Authors:
* Dave Airlie <airlied@redhat.com>
*/
#include <linux/dma-buf.h>
#include <linux/reservation.h>
#include <drm/drmP.h>
#include "i915_drv.h"
#include <linux/dma-buf.h>
static struct drm_i915_gem_object *dma_buf_to_obj(struct dma_buf *buf)
{
......@@ -218,25 +222,73 @@ static const struct dma_buf_ops i915_dmabuf_ops = {
.end_cpu_access = i915_gem_end_cpu_access,
};
static void export_fences(struct drm_i915_gem_object *obj,
struct dma_buf *dma_buf)
{
struct reservation_object *resv = dma_buf->resv;
struct drm_i915_gem_request *req;
unsigned long active;
int idx;
active = __I915_BO_ACTIVE(obj);
if (!active)
return;
/* Serialise with execbuf to prevent concurrent fence-loops */
mutex_lock(&obj->base.dev->struct_mutex);
/* Mark the object for future fences before racily adding old fences */
obj->base.dma_buf = dma_buf;
ww_mutex_lock(&resv->lock, NULL);
for_each_active(active, idx) {
req = i915_gem_active_get(&obj->last_read[idx],
&obj->base.dev->struct_mutex);
if (!req)
continue;
if (reservation_object_reserve_shared(resv) == 0)
reservation_object_add_shared_fence(resv, &req->fence);
i915_gem_request_put(req);
}
req = i915_gem_active_get(&obj->last_write,
&obj->base.dev->struct_mutex);
if (req) {
reservation_object_add_excl_fence(resv, &req->fence);
i915_gem_request_put(req);
}
ww_mutex_unlock(&resv->lock);
mutex_unlock(&obj->base.dev->struct_mutex);
}
struct dma_buf *i915_gem_prime_export(struct drm_device *dev,
struct drm_gem_object *gem_obj, int flags)
{
struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
struct dma_buf *dma_buf;
exp_info.ops = &i915_dmabuf_ops;
exp_info.size = gem_obj->size;
exp_info.flags = flags;
exp_info.priv = gem_obj;
if (obj->ops->dmabuf_export) {
int ret = obj->ops->dmabuf_export(obj);
if (ret)
return ERR_PTR(ret);
}
return dma_buf_export(&exp_info);
dma_buf = dma_buf_export(&exp_info);
if (IS_ERR(dma_buf))
return dma_buf;
export_fences(obj, dma_buf);
return dma_buf;
}
static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj)
......@@ -278,8 +330,7 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
* Importing dmabuf exported from out own gem increases
* refcount on gem itself instead of f_count of dmabuf.
*/
drm_gem_object_reference(&obj->base);
return &obj->base;
return &i915_gem_object_get(obj)->base;
}
}
......@@ -300,6 +351,16 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
i915_gem_object_init(obj, &i915_gem_object_dmabuf_ops);
obj->base.import_attach = attach;
/* We use GTT as shorthand for a coherent domain, one that is
* neither in the GPU cache nor in the CPU cache, where all
* writes are immediately visible in memory. (That's not strictly
* true, but it's close! There are internal buffers such as the
* write-combined buffer or a delay through the chipset for GTT
* writes that do require us to treat GTT as a separate cache domain.)
*/
obj->base.read_domains = I915_GEM_DOMAIN_GTT;
obj->base.write_domain = 0;
return &obj->base;
fail_detach:
......
......@@ -33,41 +33,23 @@
#include "intel_drv.h"
#include "i915_trace.h"
static int switch_to_pinned_context(struct drm_i915_private *dev_priv)
static bool
gpu_is_idle(struct drm_i915_private *dev_priv)
{
struct intel_engine_cs *engine;
if (i915.enable_execlists)
return 0;
for_each_engine(engine, dev_priv) {
struct drm_i915_gem_request *req;
int ret;
if (engine->last_context == NULL)
continue;
if (engine->last_context == dev_priv->kernel_context)
continue;
req = i915_gem_request_alloc(engine, dev_priv->kernel_context);
if (IS_ERR(req))
return PTR_ERR(req);
ret = i915_switch_context(req);
i915_add_request_no_flush(req);
if (ret)
return ret;
if (intel_engine_is_active(engine))
return false;
}
return 0;
return true;
}
static bool
mark_free(struct i915_vma *vma, struct list_head *unwind)
{
if (vma->pin_count)
if (i915_vma_is_pinned(vma))
return false;
if (WARN_ON(!list_empty(&vma->exec_list)))
......@@ -79,7 +61,6 @@ mark_free(struct i915_vma *vma, struct list_head *unwind)
/**
* i915_gem_evict_something - Evict vmas to make room for binding a new one
* @dev: drm_device
* @vm: address space to evict from
* @min_size: size of the desired free space
* @alignment: alignment constraint of the desired free space
......@@ -102,42 +83,37 @@ mark_free(struct i915_vma *vma, struct list_head *unwind)
* memory in e.g. the shrinker.
*/
int
i915_gem_evict_something(struct drm_device *dev, struct i915_address_space *vm,
int min_size, unsigned alignment, unsigned cache_level,
unsigned long start, unsigned long end,
i915_gem_evict_something(struct i915_address_space *vm,
u64 min_size, u64 alignment,
unsigned cache_level,
u64 start, u64 end,
unsigned flags)
{
struct list_head eviction_list, unwind_list;
struct i915_vma *vma;
int ret = 0;
int pass = 0;
struct drm_i915_private *dev_priv = to_i915(vm->dev);
struct list_head eviction_list;
struct list_head *phases[] = {
&vm->inactive_list,
&vm->active_list,
NULL,
}, **phase;
struct i915_vma *vma, *next;
int ret;
trace_i915_gem_evict(dev, min_size, alignment, flags);
trace_i915_gem_evict(vm, min_size, alignment, flags);
/*
* The goal is to evict objects and amalgamate space in LRU order.
* The oldest idle objects reside on the inactive list, which is in
* retirement order. The next objects to retire are those on the (per
* ring) active list that do not have an outstanding flush. Once the
* hardware reports completion (the seqno is updated after the
* batchbuffer has been finished) the clean buffer objects would
* be retired to the inactive list. Any dirty objects would be added
* to the tail of the flushing list. So after processing the clean
* active objects we need to emit a MI_FLUSH to retire the flushing
* list, hence the retirement order of the flushing list is in
* advance of the dirty objects on the active lists.
* retirement order. The next objects to retire are those in flight,
* on the active list, again in retirement order.
*
* The retirement sequence is thus:
* 1. Inactive objects (already retired)
* 2. Clean active objects
* 3. Flushing list
* 4. Dirty active objects.
* 2. Active objects (will stall on unbinding)
*
* On each list, the oldest objects lie at the HEAD with the freshest
* object on the TAIL.
*/
INIT_LIST_HEAD(&unwind_list);
if (start != 0 || end != vm->total) {
drm_mm_init_scan_with_range(&vm->mm, min_size,
alignment, cache_level,
......@@ -145,96 +121,84 @@ i915_gem_evict_something(struct drm_device *dev, struct i915_address_space *vm,
} else
drm_mm_init_scan(&vm->mm, min_size, alignment, cache_level);
search_again:
/* First see if there is a large enough contiguous idle region... */
list_for_each_entry(vma, &vm->inactive_list, vm_link) {
if (mark_free(vma, &unwind_list))
goto found;
}
if (flags & PIN_NONBLOCK)
goto none;
phases[1] = NULL;
/* Now merge in the soon-to-be-expired objects... */
list_for_each_entry(vma, &vm->active_list, vm_link) {
if (mark_free(vma, &unwind_list))
search_again:
INIT_LIST_HEAD(&eviction_list);
phase = phases;
do {
list_for_each_entry(vma, *phase, vm_link)
if (mark_free(vma, &eviction_list))
goto found;
}
} while (*++phase);
none:
/* Nothing found, clean up and bail out! */
while (!list_empty(&unwind_list)) {
vma = list_first_entry(&unwind_list,
struct i915_vma,
exec_list);
list_for_each_entry_safe(vma, next, &eviction_list, exec_list) {
ret = drm_mm_scan_remove_block(&vma->node);
BUG_ON(ret);
list_del_init(&vma->exec_list);
INIT_LIST_HEAD(&vma->exec_list);
}
/* Can we unpin some objects such as idle hw contents,
* or pending flips?
* or pending flips? But since only the GGTT has global entries
* such as scanouts, rinbuffers and contexts, we can skip the
* purge when inspecting per-process local address spaces.
*/
if (flags & PIN_NONBLOCK)
if (!i915_is_ggtt(vm) || flags & PIN_NONBLOCK)
return -ENOSPC;
/* Only idle the GPU and repeat the search once */
if (pass++ == 0) {
struct drm_i915_private *dev_priv = to_i915(dev);
if (gpu_is_idle(dev_priv)) {
/* If we still have pending pageflip completions, drop
* back to userspace to give our workqueues time to
* acquire our locks and unpin the old scanouts.
*/
return intel_has_pending_fb_unpin(vm->dev) ? -EAGAIN : -ENOSPC;
}
if (i915_is_ggtt(vm)) {
ret = switch_to_pinned_context(dev_priv);
/* Not everything in the GGTT is tracked via vma (otherwise we
* could evict as required with minimal stalling) so we are forced
* to idle the GPU and explicitly retire outstanding requests in
* the hopes that we can then remove contexts and the like only
* bound by their active reference.
*/
ret = i915_gem_switch_to_kernel_context(dev_priv);
if (ret)
return ret;
}
ret = i915_gem_wait_for_idle(dev_priv);
ret = i915_gem_wait_for_idle(dev_priv, true);
if (ret)
return ret;
i915_gem_retire_requests(dev_priv);
goto search_again;
}
/* If we still have pending pageflip completions, drop
* back to userspace to give our workqueues time to
* acquire our locks and unpin the old scanouts.
*/
return intel_has_pending_fb_unpin(dev) ? -EAGAIN : -ENOSPC;
found:
/* drm_mm doesn't allow any other other operations while
* scanning, therefore store to be evicted objects on a
* temporary list. */
INIT_LIST_HEAD(&eviction_list);
while (!list_empty(&unwind_list)) {
vma = list_first_entry(&unwind_list,
struct i915_vma,
exec_list);
if (drm_mm_scan_remove_block(&vma->node)) {
list_move(&vma->exec_list, &eviction_list);
drm_gem_object_reference(&vma->obj->base);
continue;
}
* scanning, therefore store to-be-evicted objects on a
* temporary list and take a reference for all before
* calling unbind (which may remove the active reference
* of any of our objects, thus corrupting the list).
*/
list_for_each_entry_safe(vma, next, &eviction_list, exec_list) {
if (drm_mm_scan_remove_block(&vma->node))
__i915_vma_pin(vma);
else
list_del_init(&vma->exec_list);
}
/* Unbinding will emit any required flushes */
while (!list_empty(&eviction_list)) {
struct drm_gem_object *obj;
vma = list_first_entry(&eviction_list,
struct i915_vma,
exec_list);
obj = &vma->obj->base;
list_del_init(&vma->exec_list);
__i915_vma_unpin(vma);
if (ret == 0)
ret = i915_vma_unbind(vma);
drm_gem_object_unreference(obj);
}
return ret;
}
......@@ -256,8 +220,8 @@ i915_gem_evict_for_vma(struct i915_vma *target)
vma = container_of(node, typeof(*vma), node);
if (vma->pin_count) {
if (!vma->exec_entry || (vma->pin_count > 1))
if (i915_vma_is_pinned(vma)) {
if (!vma->exec_entry || i915_vma_pin_count(vma) > 1)
/* Object is pinned for some other use */
return -EBUSY;
......@@ -303,22 +267,21 @@ int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle)
struct drm_i915_private *dev_priv = to_i915(vm->dev);
if (i915_is_ggtt(vm)) {
ret = switch_to_pinned_context(dev_priv);
ret = i915_gem_switch_to_kernel_context(dev_priv);
if (ret)
return ret;
}
ret = i915_gem_wait_for_idle(dev_priv);
ret = i915_gem_wait_for_idle(dev_priv, true);
if (ret)
return ret;
i915_gem_retire_requests(dev_priv);
WARN_ON(!list_empty(&vm->active_list));
}
list_for_each_entry_safe(vma, next, &vm->inactive_list, vm_link)
if (vma->pin_count == 0)
if (!i915_vma_is_pinned(vma))
WARN_ON(i915_vma_unbind(vma));
return 0;
......
......@@ -86,20 +86,22 @@ static void i965_write_fence_reg(struct drm_device *dev, int reg,
if (obj) {
u32 size = i915_gem_obj_ggtt_size(obj);
unsigned int tiling = i915_gem_object_get_tiling(obj);
unsigned int stride = i915_gem_object_get_stride(obj);
uint64_t val;
/* Adjust fence size to match tiled area */
if (obj->tiling_mode != I915_TILING_NONE) {
uint32_t row_size = obj->stride *
(obj->tiling_mode == I915_TILING_Y ? 32 : 8);
if (tiling != I915_TILING_NONE) {
uint32_t row_size = stride *
(tiling == I915_TILING_Y ? 32 : 8);
size = (size / row_size) * row_size;
}
val = (uint64_t)((i915_gem_obj_ggtt_offset(obj) + size - 4096) &
0xfffff000) << 32;
val |= i915_gem_obj_ggtt_offset(obj) & 0xfffff000;
val |= (uint64_t)((obj->stride / 128) - 1) << fence_pitch_shift;
if (obj->tiling_mode == I915_TILING_Y)
val |= (uint64_t)((stride / 128) - 1) << fence_pitch_shift;
if (tiling == I915_TILING_Y)
val |= 1 << I965_FENCE_TILING_Y_SHIFT;
val |= I965_FENCE_REG_VALID;
......@@ -122,6 +124,8 @@ static void i915_write_fence_reg(struct drm_device *dev, int reg,
if (obj) {
u32 size = i915_gem_obj_ggtt_size(obj);
unsigned int tiling = i915_gem_object_get_tiling(obj);
unsigned int stride = i915_gem_object_get_stride(obj);
int pitch_val;
int tile_width;
......@@ -131,17 +135,17 @@ static void i915_write_fence_reg(struct drm_device *dev, int reg,
"object 0x%08llx [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n",
i915_gem_obj_ggtt_offset(obj), obj->map_and_fenceable, size);
if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))
if (tiling == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))
tile_width = 128;
else
tile_width = 512;
/* Note: pitch better be a power of two tile widths */
pitch_val = obj->stride / tile_width;
pitch_val = stride / tile_width;
pitch_val = ffs(pitch_val) - 1;
val = i915_gem_obj_ggtt_offset(obj);
if (obj->tiling_mode == I915_TILING_Y)
if (tiling == I915_TILING_Y)
val |= 1 << I830_FENCE_TILING_Y_SHIFT;
val |= I915_FENCE_SIZE_BITS(size);
val |= pitch_val << I830_FENCE_PITCH_SHIFT;
......@@ -161,6 +165,8 @@ static void i830_write_fence_reg(struct drm_device *dev, int reg,
if (obj) {
u32 size = i915_gem_obj_ggtt_size(obj);
unsigned int tiling = i915_gem_object_get_tiling(obj);
unsigned int stride = i915_gem_object_get_stride(obj);
uint32_t pitch_val;
WARN((i915_gem_obj_ggtt_offset(obj) & ~I830_FENCE_START_MASK) ||
......@@ -169,11 +175,11 @@ static void i830_write_fence_reg(struct drm_device *dev, int reg,
"object 0x%08llx not 512K or pot-size 0x%08x aligned\n",
i915_gem_obj_ggtt_offset(obj), size);
pitch_val = obj->stride / 128;
pitch_val = stride / 128;
pitch_val = ffs(pitch_val) - 1;
val = i915_gem_obj_ggtt_offset(obj);
if (obj->tiling_mode == I915_TILING_Y)
if (tiling == I915_TILING_Y)
val |= 1 << I830_FENCE_TILING_Y_SHIFT;
val |= I830_FENCE_SIZE_BITS(size);
val |= pitch_val << I830_FENCE_PITCH_SHIFT;
......@@ -201,9 +207,12 @@ static void i915_gem_write_fence(struct drm_device *dev, int reg,
if (i915_gem_object_needs_mb(dev_priv->fence_regs[reg].obj))
mb();
WARN(obj && (!obj->stride || !obj->tiling_mode),
WARN(obj &&
(!i915_gem_object_get_stride(obj) ||
!i915_gem_object_get_tiling(obj)),
"bogus fence setup with stride: 0x%x, tiling mode: %i\n",
obj->stride, obj->tiling_mode);
i915_gem_object_get_stride(obj),
i915_gem_object_get_tiling(obj));
if (IS_GEN2(dev))
i830_write_fence_reg(dev, reg, obj);
......@@ -248,7 +257,7 @@ static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj)
{
if (obj->tiling_mode)
if (i915_gem_object_is_tiled(obj))
i915_gem_release_mmap(obj);
/* As we do not have an associated fence register, we will force
......@@ -261,15 +270,8 @@ static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj)
static int
i915_gem_object_wait_fence(struct drm_i915_gem_object *obj)
{
if (obj->last_fenced_req) {
int ret = i915_wait_request(obj->last_fenced_req);
if (ret)
return ret;
i915_gem_request_assign(&obj->last_fenced_req, NULL);
}
return 0;
return i915_gem_active_retire(&obj->last_fence,
&obj->base.dev->struct_mutex);
}
/**
......@@ -368,7 +370,7 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
{
struct drm_device *dev = obj->base.dev;
struct drm_i915_private *dev_priv = to_i915(dev);
bool enable = obj->tiling_mode != I915_TILING_NONE;
bool enable = i915_gem_object_is_tiled(obj);
struct drm_i915_fence_reg *reg;
int ret;
......@@ -438,7 +440,7 @@ i915_gem_object_pin_fence(struct drm_i915_gem_object *obj)
WARN_ON(!ggtt_vma ||
dev_priv->fence_regs[obj->fence_reg].pin_count >
ggtt_vma->pin_count);
i915_vma_pin_count(ggtt_vma));
dev_priv->fence_regs[obj->fence_reg].pin_count++;
return true;
} else
......@@ -484,7 +486,7 @@ void i915_gem_restore_fences(struct drm_device *dev)
*/
if (reg->obj) {
i915_gem_object_update_fence(reg->obj, reg,
reg->obj->tiling_mode);
i915_gem_object_get_tiling(reg->obj));
} else {
i915_gem_write_fence(dev, i, NULL);
}
......
此差异已折叠。
......@@ -36,6 +36,8 @@
#include <linux/io-mapping.h>
#include "i915_gem_request.h"
struct drm_i915_file_private;
typedef uint32_t gen6_pte_t;
......@@ -178,12 +180,32 @@ struct i915_vma {
struct drm_i915_gem_object *obj;
struct i915_address_space *vm;
void __iomem *iomap;
u64 size;
unsigned int flags;
/**
* How many users have pinned this object in GTT space. The following
* users can each hold at most one reference: pwrite/pread, execbuffer
* (objects are not allowed multiple times for the same batchbuffer),
* and the framebuffer code. When switching/pageflipping, the
* framebuffer code has at most two buffers pinned per crtc.
*
* In the worst case this is 1 + 1 + 1 + 2*2 = 7. That would fit into 3
* bits with absolutely no headroom. So use 4 bits.
*/
#define I915_VMA_PIN_MASK 0xf
#define I915_VMA_PIN_OVERFLOW BIT(5)
/** Flags and address space this VMA is bound to */
#define GLOBAL_BIND (1<<0)
#define LOCAL_BIND (1<<1)
unsigned int bound : 4;
bool is_ggtt : 1;
#define I915_VMA_GLOBAL_BIND BIT(6)
#define I915_VMA_LOCAL_BIND BIT(7)
#define I915_VMA_BIND_MASK (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND | I915_VMA_PIN_OVERFLOW)
#define I915_VMA_GGTT BIT(8)
#define I915_VMA_CLOSED BIT(9)
unsigned int active;
struct i915_gem_active last_read[I915_NUM_ENGINES];
/**
* Support different GGTT views into the same object.
......@@ -208,20 +230,46 @@ struct i915_vma {
struct hlist_node exec_node;
unsigned long exec_handle;
struct drm_i915_gem_exec_object2 *exec_entry;
/**
* How many users have pinned this object in GTT space. The following
* users can each hold at most one reference: pwrite/pread, execbuffer
* (objects are not allowed multiple times for the same batchbuffer),
* and the framebuffer code. When switching/pageflipping, the
* framebuffer code has at most two buffers pinned per crtc.
*
* In the worst case this is 1 + 1 + 1 + 2*2 = 7. That would fit into 3
* bits with absolutely no headroom. So use 4 bits. */
unsigned int pin_count:4;
#define DRM_I915_GEM_OBJECT_MAX_PIN_COUNT 0xf
};
static inline bool i915_vma_is_ggtt(const struct i915_vma *vma)
{
return vma->flags & I915_VMA_GGTT;
}
static inline bool i915_vma_is_closed(const struct i915_vma *vma)
{
return vma->flags & I915_VMA_CLOSED;
}
static inline unsigned int i915_vma_get_active(const struct i915_vma *vma)
{
return vma->active;
}
static inline bool i915_vma_is_active(const struct i915_vma *vma)
{
return i915_vma_get_active(vma);
}
static inline void i915_vma_set_active(struct i915_vma *vma,
unsigned int engine)
{
vma->active |= BIT(engine);
}
static inline void i915_vma_clear_active(struct i915_vma *vma,
unsigned int engine)
{
vma->active &= ~BIT(engine);
}
static inline bool i915_vma_has_active_engine(const struct i915_vma *vma,
unsigned int engine)
{
return vma->active & BIT(engine);
}
struct i915_page_dma {
struct page *page;
union {
......@@ -272,11 +320,20 @@ struct i915_pml4 {
struct i915_address_space {
struct drm_mm mm;
struct drm_device *dev;
/* Every address space belongs to a struct file - except for the global
* GTT that is owned by the driver (and so @file is set to NULL). In
* principle, no information should leak from one context to another
* (or between files/processes etc) unless explicitly shared by the
* owner. Tracking the owner is important in order to free up per-file
* objects along with the file, to aide resource tracking, and to
* assign blame.
*/
struct drm_i915_file_private *file;
struct list_head global_link;
u64 start; /* Start offset always 0 for dri2 */
u64 total; /* size addr space maps (ex. 2GB for ggtt) */
bool is_ggtt;
bool closed;
struct i915_page_scratch *scratch_page;
struct i915_page_table *scratch_pt;
......@@ -306,6 +363,13 @@ struct i915_address_space {
*/
struct list_head inactive_list;
/**
* List of vma that have been unbound.
*
* A reference is not held on the buffer while on this list.
*/
struct list_head unbound_list;
/* FIXME: Need a more generic return type */
gen6_pte_t (*pte_encode)(dma_addr_t addr,
enum i915_cache_level level,
......@@ -338,7 +402,7 @@ struct i915_address_space {
u32 flags);
};
#define i915_is_ggtt(V) ((V)->is_ggtt)
#define i915_is_ggtt(V) (!(V)->file)
/* The Graphics Translation Table is the way in which GEN hardware translates a
* Graphics Virtual Address into a Physical Address. In addition to the normal
......@@ -354,7 +418,6 @@ struct i915_ggtt {
size_t stolen_usable_size; /* Total size minus BIOS reserved */
size_t stolen_reserved_base;
size_t stolen_reserved_size;
size_t size; /* Total size of Global GTT */
u64 mappable_end; /* End offset that we can CPU map */
struct io_mapping *mappable; /* Mapping to our CPU mappable region */
phys_addr_t mappable_base; /* PA of our GMADR */
......@@ -365,8 +428,6 @@ struct i915_ggtt {
bool do_idle_maps;
int mtrr;
int (*probe)(struct i915_ggtt *ggtt);
};
struct i915_hw_ppgtt {
......@@ -380,8 +441,6 @@ struct i915_hw_ppgtt {
struct i915_page_directory pd; /* GEN6-7 */
};
struct drm_i915_file_private *file_priv;
gen6_pte_t __iomem *pd_addr;
int (*enable)(struct i915_hw_ppgtt *ppgtt);
......@@ -521,14 +580,15 @@ i915_page_dir_dma_addr(const struct i915_hw_ppgtt *ppgtt, const unsigned n)
px_dma(ppgtt->base.scratch_pd);
}
int i915_ggtt_init_hw(struct drm_device *dev);
int i915_ggtt_enable_hw(struct drm_device *dev);
void i915_gem_init_ggtt(struct drm_device *dev);
void i915_ggtt_cleanup_hw(struct drm_device *dev);
int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv);
int i915_ggtt_init_hw(struct drm_i915_private *dev_priv);
int i915_ggtt_enable_hw(struct drm_i915_private *dev_priv);
int i915_gem_init_ggtt(struct drm_i915_private *dev_priv);
void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv);
int i915_ppgtt_init_hw(struct drm_device *dev);
void i915_ppgtt_release(struct kref *kref);
struct i915_hw_ppgtt *i915_ppgtt_create(struct drm_device *dev,
struct i915_hw_ppgtt *i915_ppgtt_create(struct drm_i915_private *dev_priv,
struct drm_i915_file_private *fpriv);
static inline void i915_ppgtt_get(struct i915_hw_ppgtt *ppgtt)
{
......@@ -562,9 +622,66 @@ i915_ggtt_view_equal(const struct i915_ggtt_view *a,
return true;
}
size_t
i915_ggtt_view_size(struct drm_i915_gem_object *obj,
const struct i915_ggtt_view *view);
/* Flags used by pin/bind&friends. */
#define PIN_NONBLOCK BIT(0)
#define PIN_MAPPABLE BIT(1)
#define PIN_ZONE_4G BIT(2)
#define PIN_MBZ BIT(5) /* I915_VMA_PIN_OVERFLOW */
#define PIN_GLOBAL BIT(6) /* I915_VMA_GLOBAL_BIND */
#define PIN_USER BIT(7) /* I915_VMA_LOCAL_BIND */
#define PIN_UPDATE BIT(8)
#define PIN_HIGH BIT(9)
#define PIN_OFFSET_BIAS BIT(10)
#define PIN_OFFSET_FIXED BIT(11)
#define PIN_OFFSET_MASK (~4095)
int __i915_vma_do_pin(struct i915_vma *vma,
u64 size, u64 alignment, u64 flags);
static inline int __must_check
i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
{
BUILD_BUG_ON(PIN_MBZ != I915_VMA_PIN_OVERFLOW);
BUILD_BUG_ON(PIN_GLOBAL != I915_VMA_GLOBAL_BIND);
BUILD_BUG_ON(PIN_USER != I915_VMA_LOCAL_BIND);
/* Pin early to prevent the shrinker/eviction logic from destroying
* our vma as we insert and bind.
*/
if (likely(((++vma->flags ^ flags) & I915_VMA_BIND_MASK) == 0))
return 0;
return __i915_vma_do_pin(vma, size, alignment, flags);
}
static inline int i915_vma_pin_count(const struct i915_vma *vma)
{
return vma->flags & I915_VMA_PIN_MASK;
}
static inline bool i915_vma_is_pinned(const struct i915_vma *vma)
{
return i915_vma_pin_count(vma);
}
static inline void __i915_vma_pin(struct i915_vma *vma)
{
vma->flags++;
GEM_BUG_ON(vma->flags & I915_VMA_PIN_OVERFLOW);
}
static inline void __i915_vma_unpin(struct i915_vma *vma)
{
GEM_BUG_ON(!i915_vma_is_pinned(vma));
vma->flags--;
}
static inline void i915_vma_unpin(struct i915_vma *vma)
{
GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
__i915_vma_unpin(vma);
}
/**
* i915_vma_pin_iomap - calls ioremap_wc to map the GGTT VMA via the aperture
......@@ -580,6 +697,7 @@ i915_ggtt_view_size(struct drm_i915_gem_object *obj,
* Returns a valid iomapped pointer or ERR_PTR.
*/
void __iomem *i915_vma_pin_iomap(struct i915_vma *vma);
#define IO_ERR_PTR(x) ((void __iomem *)ERR_PTR(x))
/**
* i915_vma_unpin_iomap - unpins the mapping returned from i915_vma_iomap
......@@ -593,9 +711,8 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma);
static inline void i915_vma_unpin_iomap(struct i915_vma *vma)
{
lockdep_assert_held(&vma->vm->dev->struct_mutex);
GEM_BUG_ON(vma->pin_count == 0);
GEM_BUG_ON(vma->iomap == NULL);
vma->pin_count--;
i915_vma_unpin(vma);
}
#endif
......@@ -28,10 +28,18 @@
#include "i915_drv.h"
#include "intel_renderstate.h"
struct render_state {
const struct intel_renderstate_rodata *rodata;
struct drm_i915_gem_object *obj;
u64 ggtt_offset;
u32 aux_batch_size;
u32 aux_batch_offset;
};
static const struct intel_renderstate_rodata *
render_state_get_rodata(const int gen)
render_state_get_rodata(const struct drm_i915_gem_request *req)
{
switch (gen) {
switch (INTEL_GEN(req->i915)) {
case 6:
return &gen6_null_state;
case 7:
......@@ -45,35 +53,6 @@ render_state_get_rodata(const int gen)
return NULL;
}
static int render_state_init(struct render_state *so,
struct drm_i915_private *dev_priv)
{
int ret;
so->gen = INTEL_GEN(dev_priv);
so->rodata = render_state_get_rodata(so->gen);
if (so->rodata == NULL)
return 0;
if (so->rodata->batch_items * 4 > 4096)
return -EINVAL;
so->obj = i915_gem_object_create(&dev_priv->drm, 4096);
if (IS_ERR(so->obj))
return PTR_ERR(so->obj);
ret = i915_gem_obj_ggtt_pin(so->obj, 4096, 0);
if (ret)
goto free_gem;
so->ggtt_offset = i915_gem_obj_ggtt_offset(so->obj);
return 0;
free_gem:
drm_gem_object_unreference(&so->obj->base);
return ret;
}
/*
* Macro to add commands to auxiliary batch.
* This macro only checks for page overflow before inserting the commands,
......@@ -96,6 +75,7 @@ static int render_state_setup(struct render_state *so)
{
struct drm_device *dev = so->obj->base.dev;
const struct intel_renderstate_rodata *rodata = so->rodata;
const bool has_64bit_reloc = INTEL_GEN(dev) >= 8;
unsigned int i = 0, reloc_index = 0;
struct page *page;
u32 *d;
......@@ -114,7 +94,7 @@ static int render_state_setup(struct render_state *so)
if (i * 4 == rodata->reloc[reloc_index]) {
u64 r = s + so->ggtt_offset;
s = lower_32_bits(r);
if (so->gen >= 8) {
if (has_64bit_reloc) {
if (i + 1 >= rodata->batch_items ||
rodata->batch[i + 1] != 0) {
ret = -EINVAL;
......@@ -192,67 +172,55 @@ static int render_state_setup(struct render_state *so)
#undef OUT_BATCH
void i915_gem_render_state_fini(struct render_state *so)
{
i915_gem_object_ggtt_unpin(so->obj);
drm_gem_object_unreference(&so->obj->base);
}
int i915_gem_render_state_prepare(struct intel_engine_cs *engine,
struct render_state *so)
int i915_gem_render_state_init(struct drm_i915_gem_request *req)
{
struct render_state so;
int ret;
if (WARN_ON(engine->id != RCS))
if (WARN_ON(req->engine->id != RCS))
return -ENOENT;
ret = render_state_init(so, engine->i915);
if (ret)
return ret;
if (so->rodata == NULL)
so.rodata = render_state_get_rodata(req);
if (!so.rodata)
return 0;
ret = render_state_setup(so);
if (ret) {
i915_gem_render_state_fini(so);
return ret;
}
if (so.rodata->batch_items * 4 > 4096)
return -EINVAL;
return 0;
}
so.obj = i915_gem_object_create(&req->i915->drm, 4096);
if (IS_ERR(so.obj))
return PTR_ERR(so.obj);
int i915_gem_render_state_init(struct drm_i915_gem_request *req)
{
struct render_state so;
int ret;
ret = i915_gem_render_state_prepare(req->engine, &so);
ret = i915_gem_object_ggtt_pin(so.obj, NULL, 0, 0, 0);
if (ret)
return ret;
goto err_obj;
if (so.rodata == NULL)
return 0;
so.ggtt_offset = i915_gem_obj_ggtt_offset(so.obj);
ret = render_state_setup(&so);
if (ret)
goto err_unpin;
ret = req->engine->dispatch_execbuffer(req, so.ggtt_offset,
ret = req->engine->emit_bb_start(req, so.ggtt_offset,
so.rodata->batch_items * 4,
I915_DISPATCH_SECURE);
if (ret)
goto out;
goto err_unpin;
if (so.aux_batch_size > 8) {
ret = req->engine->dispatch_execbuffer(req,
ret = req->engine->emit_bb_start(req,
(so.ggtt_offset +
so.aux_batch_offset),
so.aux_batch_size,
I915_DISPATCH_SECURE);
if (ret)
goto out;
goto err_unpin;
}
i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), req);
out:
i915_gem_render_state_fini(&so);
i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), req, 0);
err_unpin:
i915_gem_object_ggtt_unpin(so.obj);
err_obj:
i915_gem_object_put(so.obj);
return ret;
}
......@@ -26,24 +26,6 @@
#include <linux/types.h>
struct intel_renderstate_rodata {
const u32 *reloc;
const u32 *batch;
const u32 batch_items;
};
struct render_state {
const struct intel_renderstate_rodata *rodata;
struct drm_i915_gem_object *obj;
u64 ggtt_offset;
int gen;
u32 aux_batch_size;
u32 aux_batch_offset;
};
int i915_gem_render_state_init(struct drm_i915_gem_request *req);
void i915_gem_render_state_fini(struct render_state *so);
int i915_gem_render_state_prepare(struct intel_engine_cs *engine,
struct render_state *so);
#endif /* _I915_GEM_RENDER_STATE_H_ */
此差异已折叠。
此差异已折叠。
此差异已折叠。
......@@ -173,6 +173,7 @@ static const struct intel_device_info intel_pineview_info = {
.gen = 3, .is_g33 = 1, .is_pineview = 1, .is_mobile = 1, .num_pipes = 2,
.need_gfx_hws = 1, .has_hotplug = 1,
.has_overlay = 1,
.ring_mask = RENDER_RING,
GEN_DEFAULT_PIPEOFFSETS,
CURSOR_OFFSETS,
};
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
......@@ -160,7 +160,6 @@ extern int intel_guc_resume(struct drm_device *dev);
int i915_guc_submission_init(struct drm_i915_private *dev_priv);
int i915_guc_submission_enable(struct drm_i915_private *dev_priv);
int i915_guc_wq_check_space(struct drm_i915_gem_request *rq);
int i915_guc_submit(struct drm_i915_gem_request *rq);
void i915_guc_submission_disable(struct drm_i915_private *dev_priv);
void i915_guc_submission_fini(struct drm_i915_private *dev_priv);
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册