提交 51d61207 编写于 作者: D Dave Airlie

Merge branch 'drm-intel-next' of git://anongit.freedesktop.org/drm-intel into drm-next

drm-intel-next-2016-08-22:
- bugfixes and cleanups for rcu-protected requests (Chris)
- atomic modeset fixes for gpu reset on pre-g4x (Maarten&Ville)
- guc submission improvements (Dave Gordon)
- panel power sequence cleanup (Imre)
- better use of stolen and unmappable ggtt (Chris), plus prep work to make that
  happen
- rework of framebuffer offsets, prep for multi-plane framebuffers (Ville)
- fully partial ggtt vmaps, including fenced ones (Chris)
- move lots more of the gem tracking from the object to the vma (Chris)
- tune the command parser (Chris)
- allow fbc without fences on recent platforms (Chris)
- fbc frontbuffer tracking fixes (Chris)
- fast prefaulting using io-mappping.h pgprot caching (Chris)

* 'drm-intel-next' of git://anongit.freedesktop.org/drm-intel: (141 commits)
  io-mapping: Fixup for different names of writecombine
  io-mapping.h: s/PAGE_KERNEL_IO/PAGE_KERNEL/
  drm/i915: Update DRIVER_DATE to 20160822
  drm/i915: Use remap_io_mapping() to prefault all PTE in a single pass
  drm/i915: Embed the io-mapping struct inside drm_i915_private
  io-mapping: Always create a struct to hold metadata about the io-mapping
  drm/i915/fbc: Allow on unfenced surfaces, for recent gen
  drm/i915/fbc: Don't set an illegal fence if unfenced
  drm/i915: Flush delayed fence releases after reset
  drm/i915: Reattach comment, complete type specification
  drm/i915/cmdparser: Accelerate copies from WC memory
  drm/i915/cmdparser: Use binary search for faster register lookup
  drm/i915/cmdparser: Check for SKIP descriptors first
  drm/i915/cmdparser: Compare against the previous command descriptor
  drm/i915/cmdparser: Improve hash function
  drm/i915/cmdparser: Only cache the dst vmap
  drm/i915/cmdparser: Use cached vmappings
  drm/i915/cmdparser: Add the TIMESTAMP register for the other engines
  drm/i915/cmdparser: Make initialisation failure non-fatal
  drm/i915: Stop discarding GTT cache-domain on unbind vma
  ...
......@@ -317,16 +317,11 @@ static phys_addr_t __init i85x_stolen_base(int num, int slot, int func,
static phys_addr_t __init i865_stolen_base(int num, int slot, int func,
size_t stolen_size)
{
u16 toud;
u16 toud = 0;
/*
* FIXME is the graphics stolen memory region
* always at TOUD? Ie. is it always the last
* one to be allocated by the BIOS?
*/
toud = read_pci_config_16(0, 0, 0, I865_TOUD);
return (phys_addr_t)toud << 16;
return (phys_addr_t)(toud << 16) + i845_tseg_size();
}
static phys_addr_t __init gen3_stolen_base(int num, int slot, int func,
......
......@@ -845,6 +845,8 @@ void intel_gtt_insert_page(dma_addr_t addr,
unsigned int flags)
{
intel_private.driver->write_entry(addr, pg, flags);
if (intel_private.driver->chipset_flush)
intel_private.driver->chipset_flush();
}
EXPORT_SYMBOL(intel_gtt_insert_page);
......
......@@ -47,7 +47,7 @@ obj-$(CONFIG_DRM_RADEON)+= radeon/
obj-$(CONFIG_DRM_AMDGPU)+= amd/amdgpu/
obj-$(CONFIG_DRM_MGA) += mga/
obj-$(CONFIG_DRM_I810) += i810/
obj-$(CONFIG_DRM_I915) += i915/
obj-$(CONFIG_DRM_I915) += i915/
obj-$(CONFIG_DRM_MGAG200) += mgag200/
obj-$(CONFIG_DRM_VC4) += vc4/
obj-$(CONFIG_DRM_CIRRUS_QEMU) += cirrus/
......
......@@ -3,12 +3,16 @@
# Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher.
subdir-ccflags-$(CONFIG_DRM_I915_WERROR) := -Werror
subdir-ccflags-y += \
$(call as-instr,movntdqa (%eax)$(comma)%xmm0,-DCONFIG_AS_MOVNTDQA)
# Please keep these build lists sorted!
# core driver code
i915-y := i915_drv.o \
i915_irq.o \
i915_memcpy.o \
i915_mm.o \
i915_params.o \
i915_pci.o \
i915_suspend.o \
......@@ -110,6 +114,6 @@ i915-y += intel_gvt.o
include $(src)/gvt/Makefile
endif
obj-$(CONFIG_DRM_I915) += i915.o
obj-$(CONFIG_DRM_I915) += i915.o
CFLAGS_i915_trace_points.o := -I$(src)
......@@ -86,24 +86,25 @@
* general bitmasking mechanism.
*/
#define STD_MI_OPCODE_MASK 0xFF800000
#define STD_3D_OPCODE_MASK 0xFFFF0000
#define STD_2D_OPCODE_MASK 0xFFC00000
#define STD_MFX_OPCODE_MASK 0xFFFF0000
#define STD_MI_OPCODE_SHIFT (32 - 9)
#define STD_3D_OPCODE_SHIFT (32 - 16)
#define STD_2D_OPCODE_SHIFT (32 - 10)
#define STD_MFX_OPCODE_SHIFT (32 - 16)
#define MIN_OPCODE_SHIFT 16
#define CMD(op, opm, f, lm, fl, ...) \
{ \
.flags = (fl) | ((f) ? CMD_DESC_FIXED : 0), \
.cmd = { (op), (opm) }, \
.cmd = { (op), ~0u << (opm) }, \
.length = { (lm) }, \
__VA_ARGS__ \
}
/* Convenience macros to compress the tables */
#define SMI STD_MI_OPCODE_MASK
#define S3D STD_3D_OPCODE_MASK
#define S2D STD_2D_OPCODE_MASK
#define SMFX STD_MFX_OPCODE_MASK
#define SMI STD_MI_OPCODE_SHIFT
#define S3D STD_3D_OPCODE_SHIFT
#define S2D STD_2D_OPCODE_SHIFT
#define SMFX STD_MFX_OPCODE_SHIFT
#define F true
#define S CMD_DESC_SKIP
#define R CMD_DESC_REJECT
......@@ -350,6 +351,9 @@ static const struct drm_i915_cmd_descriptor hsw_blt_cmds[] = {
CMD( MI_LOAD_SCAN_LINES_EXCL, SMI, !F, 0x3F, R ),
};
static const struct drm_i915_cmd_descriptor noop_desc =
CMD(MI_NOOP, SMI, F, 1, S);
#undef CMD
#undef SMI
#undef S3D
......@@ -458,6 +462,7 @@ static const struct drm_i915_reg_descriptor gen7_render_regs[] = {
REG32(GEN7_GPGPU_DISPATCHDIMX),
REG32(GEN7_GPGPU_DISPATCHDIMY),
REG32(GEN7_GPGPU_DISPATCHDIMZ),
REG64_IDX(RING_TIMESTAMP, BSD_RING_BASE),
REG64_IDX(GEN7_SO_NUM_PRIMS_WRITTEN, 0),
REG64_IDX(GEN7_SO_NUM_PRIMS_WRITTEN, 1),
REG64_IDX(GEN7_SO_NUM_PRIMS_WRITTEN, 2),
......@@ -473,6 +478,7 @@ static const struct drm_i915_reg_descriptor gen7_render_regs[] = {
REG32(GEN7_L3SQCREG1),
REG32(GEN7_L3CNTLREG2),
REG32(GEN7_L3CNTLREG3),
REG64_IDX(RING_TIMESTAMP, BLT_RING_BASE),
};
static const struct drm_i915_reg_descriptor hsw_render_regs[] = {
......@@ -502,7 +508,10 @@ static const struct drm_i915_reg_descriptor hsw_render_regs[] = {
};
static const struct drm_i915_reg_descriptor gen7_blt_regs[] = {
REG64_IDX(RING_TIMESTAMP, RENDER_RING_BASE),
REG64_IDX(RING_TIMESTAMP, BSD_RING_BASE),
REG32(BCS_SWCTRL),
REG64_IDX(RING_TIMESTAMP, BLT_RING_BASE),
};
static const struct drm_i915_reg_descriptor ivb_master_regs[] = {
......@@ -691,12 +700,26 @@ struct cmd_node {
* non-opcode bits being set. But if we don't include those bits, some 3D
* commands may hash to the same bucket due to not including opcode bits that
* make the command unique. For now, we will risk hashing to the same bucket.
*
* If we attempt to generate a perfect hash, we should be able to look at bits
* 31:29 of a command from a batch buffer and use the full mask for that
* client. The existing INSTR_CLIENT_MASK/SHIFT defines can be used for this.
*/
#define CMD_HASH_MASK STD_MI_OPCODE_MASK
static inline u32 cmd_header_key(u32 x)
{
u32 shift;
switch (x >> INSTR_CLIENT_SHIFT) {
default:
case INSTR_MI_CLIENT:
shift = STD_MI_OPCODE_SHIFT;
break;
case INSTR_RC_CLIENT:
shift = STD_3D_OPCODE_SHIFT;
break;
case INSTR_BC_CLIENT:
shift = STD_2D_OPCODE_SHIFT;
break;
}
return x >> shift;
}
static int init_hash_table(struct intel_engine_cs *engine,
const struct drm_i915_cmd_table *cmd_tables,
......@@ -720,7 +743,7 @@ static int init_hash_table(struct intel_engine_cs *engine,
desc_node->desc = desc;
hash_add(engine->cmd_hash, &desc_node->node,
desc->cmd.value & CMD_HASH_MASK);
cmd_header_key(desc->cmd.value));
}
}
......@@ -746,17 +769,15 @@ static void fini_hash_table(struct intel_engine_cs *engine)
* Optionally initializes fields related to batch buffer command parsing in the
* struct intel_engine_cs based on whether the platform requires software
* command parsing.
*
* Return: non-zero if initialization fails
*/
int intel_engine_init_cmd_parser(struct intel_engine_cs *engine)
void intel_engine_init_cmd_parser(struct intel_engine_cs *engine)
{
const struct drm_i915_cmd_table *cmd_tables;
int cmd_table_count;
int ret;
if (!IS_GEN7(engine->i915))
return 0;
return;
switch (engine->id) {
case RCS:
......@@ -811,24 +832,27 @@ int intel_engine_init_cmd_parser(struct intel_engine_cs *engine)
break;
default:
MISSING_CASE(engine->id);
BUG();
return;
}
BUG_ON(!validate_cmds_sorted(engine, cmd_tables, cmd_table_count));
BUG_ON(!validate_regs_sorted(engine));
WARN_ON(!hash_empty(engine->cmd_hash));
if (!validate_cmds_sorted(engine, cmd_tables, cmd_table_count)) {
DRM_ERROR("%s: command descriptions are not sorted\n",
engine->name);
return;
}
if (!validate_regs_sorted(engine)) {
DRM_ERROR("%s: registers are not sorted\n", engine->name);
return;
}
ret = init_hash_table(engine, cmd_tables, cmd_table_count);
if (ret) {
DRM_ERROR("CMD: cmd_parser_init failed!\n");
DRM_ERROR("%s: initialised failed!\n", engine->name);
fini_hash_table(engine);
return ret;
return;
}
engine->needs_cmd_parser = true;
return 0;
}
/**
......@@ -853,12 +877,9 @@ find_cmd_in_table(struct intel_engine_cs *engine,
struct cmd_node *desc_node;
hash_for_each_possible(engine->cmd_hash, desc_node, node,
cmd_header & CMD_HASH_MASK) {
cmd_header_key(cmd_header)) {
const struct drm_i915_cmd_descriptor *desc = desc_node->desc;
u32 masked_cmd = desc->cmd.mask & cmd_header;
u32 masked_value = desc->cmd.value & desc->cmd.mask;
if (masked_cmd == masked_value)
if (((cmd_header ^ desc->cmd.value) & desc->cmd.mask) == 0)
return desc;
}
......@@ -876,11 +897,14 @@ find_cmd_in_table(struct intel_engine_cs *engine,
static const struct drm_i915_cmd_descriptor*
find_cmd(struct intel_engine_cs *engine,
u32 cmd_header,
const struct drm_i915_cmd_descriptor *desc,
struct drm_i915_cmd_descriptor *default_desc)
{
const struct drm_i915_cmd_descriptor *desc;
u32 mask;
if (((cmd_header ^ desc->cmd.value) & desc->cmd.mask) == 0)
return desc;
desc = find_cmd_in_table(engine, cmd_header);
if (desc)
return desc;
......@@ -889,140 +913,127 @@ find_cmd(struct intel_engine_cs *engine,
if (!mask)
return NULL;
BUG_ON(!default_desc);
default_desc->flags = CMD_DESC_SKIP;
default_desc->cmd.value = cmd_header;
default_desc->cmd.mask = ~0u << MIN_OPCODE_SHIFT;
default_desc->length.mask = mask;
default_desc->flags = CMD_DESC_SKIP;
return default_desc;
}
static const struct drm_i915_reg_descriptor *
find_reg(const struct drm_i915_reg_descriptor *table,
int count, u32 addr)
__find_reg(const struct drm_i915_reg_descriptor *table, int count, u32 addr)
{
int i;
for (i = 0; i < count; i++) {
if (i915_mmio_reg_offset(table[i].addr) == addr)
return &table[i];
int start = 0, end = count;
while (start < end) {
int mid = start + (end - start) / 2;
int ret = addr - i915_mmio_reg_offset(table[mid].addr);
if (ret < 0)
end = mid;
else if (ret > 0)
start = mid + 1;
else
return &table[mid];
}
return NULL;
}
static const struct drm_i915_reg_descriptor *
find_reg_in_tables(const struct drm_i915_reg_table *tables,
int count, bool is_master, u32 addr)
find_reg(const struct intel_engine_cs *engine, bool is_master, u32 addr)
{
int i;
const struct drm_i915_reg_table *table;
const struct drm_i915_reg_descriptor *reg;
const struct drm_i915_reg_table *table = engine->reg_tables;
int count = engine->reg_table_count;
for (i = 0; i < count; i++) {
table = &tables[i];
do {
if (!table->master || is_master) {
reg = find_reg(table->regs, table->num_regs,
addr);
const struct drm_i915_reg_descriptor *reg;
reg = __find_reg(table->regs, table->num_regs, addr);
if (reg != NULL)
return reg;
}
}
} while (table++, --count);
return NULL;
}
static u32 *vmap_batch(struct drm_i915_gem_object *obj,
unsigned start, unsigned len)
{
int i;
void *addr = NULL;
struct sg_page_iter sg_iter;
int first_page = start >> PAGE_SHIFT;
int last_page = (len + start + 4095) >> PAGE_SHIFT;
int npages = last_page - first_page;
struct page **pages;
pages = drm_malloc_ab(npages, sizeof(*pages));
if (pages == NULL) {
DRM_DEBUG_DRIVER("Failed to get space for pages\n");
goto finish;
}
i = 0;
for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, first_page) {
pages[i++] = sg_page_iter_page(&sg_iter);
if (i == npages)
break;
}
addr = vmap(pages, i, 0, PAGE_KERNEL);
if (addr == NULL) {
DRM_DEBUG_DRIVER("Failed to vmap pages\n");
goto finish;
}
finish:
if (pages)
drm_free_large(pages);
return (u32*)addr;
}
/* Returns a vmap'd pointer to dest_obj, which the caller must unmap */
static u32 *copy_batch(struct drm_i915_gem_object *dest_obj,
/* Returns a vmap'd pointer to dst_obj, which the caller must unmap */
static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
struct drm_i915_gem_object *src_obj,
u32 batch_start_offset,
u32 batch_len)
u32 batch_len,
bool *needs_clflush_after)
{
int needs_clflush = 0;
void *src_base, *src;
void *dst = NULL;
unsigned int src_needs_clflush;
unsigned int dst_needs_clflush;
void *dst, *src;
int ret;
if (batch_len > dest_obj->base.size ||
batch_len + batch_start_offset > src_obj->base.size)
return ERR_PTR(-E2BIG);
if (WARN_ON(dest_obj->pages_pin_count == 0))
return ERR_PTR(-ENODEV);
ret = i915_gem_obj_prepare_shmem_read(src_obj, &needs_clflush);
if (ret) {
DRM_DEBUG_DRIVER("CMD: failed to prepare shadow batch\n");
ret = i915_gem_obj_prepare_shmem_read(src_obj, &src_needs_clflush);
if (ret)
return ERR_PTR(ret);
}
src_base = vmap_batch(src_obj, batch_start_offset, batch_len);
if (!src_base) {
DRM_DEBUG_DRIVER("CMD: Failed to vmap batch\n");
ret = -ENOMEM;
ret = i915_gem_obj_prepare_shmem_write(dst_obj, &dst_needs_clflush);
if (ret) {
dst = ERR_PTR(ret);
goto unpin_src;
}
ret = i915_gem_object_set_to_cpu_domain(dest_obj, true);
if (ret) {
DRM_DEBUG_DRIVER("CMD: Failed to set shadow batch to CPU\n");
goto unmap_src;
dst = i915_gem_object_pin_map(dst_obj, I915_MAP_WB);
if (IS_ERR(dst))
goto unpin_dst;
src = ERR_PTR(-ENODEV);
if (src_needs_clflush &&
i915_memcpy_from_wc((void *)(uintptr_t)batch_start_offset, 0, 0)) {
src = i915_gem_object_pin_map(src_obj, I915_MAP_WC);
if (!IS_ERR(src)) {
i915_memcpy_from_wc(dst,
src + batch_start_offset,
ALIGN(batch_len, 16));
i915_gem_object_unpin_map(src_obj);
}
}
dst = vmap_batch(dest_obj, 0, batch_len);
if (!dst) {
DRM_DEBUG_DRIVER("CMD: Failed to vmap shadow batch\n");
ret = -ENOMEM;
goto unmap_src;
if (IS_ERR(src)) {
void *ptr;
int offset, n;
offset = offset_in_page(batch_start_offset);
/* We can avoid clflushing partial cachelines before the write
* if we only every write full cache-lines. Since we know that
* both the source and destination are in multiples of
* PAGE_SIZE, we can simply round up to the next cacheline.
* We don't care about copying too much here as we only
* validate up to the end of the batch.
*/
if (dst_needs_clflush & CLFLUSH_BEFORE)
batch_len = roundup(batch_len,
boot_cpu_data.x86_clflush_size);
ptr = dst;
for (n = batch_start_offset >> PAGE_SHIFT; batch_len; n++) {
int len = min_t(int, batch_len, PAGE_SIZE - offset);
src = kmap_atomic(i915_gem_object_get_page(src_obj, n));
if (src_needs_clflush)
drm_clflush_virt_range(src + offset, len);
memcpy(ptr, src + offset, len);
kunmap_atomic(src);
ptr += len;
batch_len -= len;
offset = 0;
}
}
src = src_base + offset_in_page(batch_start_offset);
if (needs_clflush)
drm_clflush_virt_range(src, batch_len);
memcpy(dst, src, batch_len);
/* dst_obj is returned with vmap pinned */
*needs_clflush_after = dst_needs_clflush & CLFLUSH_AFTER;
unmap_src:
vunmap(src_base);
unpin_dst:
i915_gem_obj_finish_shmem_access(dst_obj);
unpin_src:
i915_gem_object_unpin_pages(src_obj);
return ret ? ERR_PTR(ret) : dst;
i915_gem_obj_finish_shmem_access(src_obj);
return dst;
}
/**
......@@ -1052,6 +1063,9 @@ static bool check_cmd(const struct intel_engine_cs *engine,
const bool is_master,
bool *oacontrol_set)
{
if (desc->flags & CMD_DESC_SKIP)
return true;
if (desc->flags & CMD_DESC_REJECT) {
DRM_DEBUG_DRIVER("CMD: Rejected command: 0x%08X\n", *cmd);
return false;
......@@ -1076,10 +1090,7 @@ static bool check_cmd(const struct intel_engine_cs *engine,
offset += step) {
const u32 reg_addr = cmd[offset] & desc->reg.mask;
const struct drm_i915_reg_descriptor *reg =
find_reg_in_tables(engine->reg_tables,
engine->reg_table_count,
is_master,
reg_addr);
find_reg(engine, is_master, reg_addr);
if (!reg) {
DRM_DEBUG_DRIVER("CMD: Rejected register 0x%08X in command: 0x%08X (exec_id=%d)\n",
......@@ -1200,16 +1211,19 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
u32 batch_len,
bool is_master)
{
u32 *cmd, *batch_base, *batch_end;
struct drm_i915_cmd_descriptor default_desc = { 0 };
u32 *cmd, *batch_end;
struct drm_i915_cmd_descriptor default_desc = noop_desc;
const struct drm_i915_cmd_descriptor *desc = &default_desc;
bool oacontrol_set = false; /* OACONTROL tracking. See check_cmd() */
bool needs_clflush_after = false;
int ret = 0;
batch_base = copy_batch(shadow_batch_obj, batch_obj,
batch_start_offset, batch_len);
if (IS_ERR(batch_base)) {
cmd = copy_batch(shadow_batch_obj, batch_obj,
batch_start_offset, batch_len,
&needs_clflush_after);
if (IS_ERR(cmd)) {
DRM_DEBUG_DRIVER("CMD: Failed to copy batch\n");
return PTR_ERR(batch_base);
return PTR_ERR(cmd);
}
/*
......@@ -1217,17 +1231,14 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
* large or larger and copy_batch() will write MI_NOPs to the extra
* space. Parsing should be faster in some cases this way.
*/
batch_end = batch_base + (batch_len / sizeof(*batch_end));
cmd = batch_base;
batch_end = cmd + (batch_len / sizeof(*batch_end));
while (cmd < batch_end) {
const struct drm_i915_cmd_descriptor *desc;
u32 length;
if (*cmd == MI_BATCH_BUFFER_END)
break;
desc = find_cmd(engine, *cmd, &default_desc);
desc = find_cmd(engine, *cmd, desc, &default_desc);
if (!desc) {
DRM_DEBUG_DRIVER("CMD: Unrecognized command: 0x%08X\n",
*cmd);
......@@ -1278,7 +1289,9 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
ret = -EINVAL;
}
vunmap(batch_base);
if (ret == 0 && needs_clflush_after)
drm_clflush_virt_range(shadow_batch_obj->mapping, batch_len);
i915_gem_object_unpin_map(shadow_batch_obj);
return ret;
}
......
此差异已折叠。
......@@ -827,6 +827,8 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv,
mutex_init(&dev_priv->wm.wm_mutex);
mutex_init(&dev_priv->pps_mutex);
i915_memcpy_init_early(dev_priv);
ret = i915_workqueues_init(dev_priv);
if (ret < 0)
return ret;
......@@ -1560,6 +1562,7 @@ static int i915_drm_resume(struct drm_device *dev)
i915_gem_resume(dev);
i915_restore_state(dev);
intel_pps_unlock_regs_wa(dev_priv);
intel_opregion_setup(dev_priv);
intel_init_pch_refclk(dev);
......
此差异已折叠。
此差异已折叠。
......@@ -155,9 +155,10 @@ void i915_gem_context_free(struct kref *ctx_ref)
if (ce->ring)
intel_ring_free(ce->ring);
i915_gem_object_put(ce->state);
i915_vma_put(ce->state);
}
put_pid(ctx->pid);
list_del(&ctx->link);
ida_simple_remove(&ctx->i915->context_hw_ida, ctx->hw_id);
......@@ -281,13 +282,24 @@ __create_hw_context(struct drm_device *dev,
ctx->ggtt_alignment = get_context_alignment(dev_priv);
if (dev_priv->hw_context_size) {
struct drm_i915_gem_object *obj =
i915_gem_alloc_context_obj(dev, dev_priv->hw_context_size);
struct drm_i915_gem_object *obj;
struct i915_vma *vma;
obj = i915_gem_alloc_context_obj(dev,
dev_priv->hw_context_size);
if (IS_ERR(obj)) {
ret = PTR_ERR(obj);
goto err_out;
}
ctx->engine[RCS].state = obj;
vma = i915_vma_create(obj, &dev_priv->ggtt.base, NULL);
if (IS_ERR(vma)) {
i915_gem_object_put(obj);
ret = PTR_ERR(vma);
goto err_out;
}
ctx->engine[RCS].state = vma;
}
/* Default context will never have a file_priv */
......@@ -300,6 +312,9 @@ __create_hw_context(struct drm_device *dev,
ret = DEFAULT_CONTEXT_HANDLE;
ctx->file_priv = file_priv;
if (file_priv)
ctx->pid = get_task_pid(current, PIDTYPE_PID);
ctx->user_handle = ret;
/* NB: Mark all slices as needing a remap so that when the context first
* loads it will restore whatever remap state already exists. If there
......@@ -399,7 +414,7 @@ static void i915_gem_context_unpin(struct i915_gem_context *ctx,
struct intel_context *ce = &ctx->engine[engine->id];
if (ce->state)
i915_gem_object_ggtt_unpin(ce->state);
i915_vma_unpin(ce->state);
i915_gem_context_put(ctx);
}
......@@ -568,7 +583,7 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags)
const int num_rings =
/* Use an extended w/a on ivb+ if signalling from other rings */
i915.semaphores ?
hweight32(INTEL_INFO(dev_priv)->ring_mask) - 1 :
INTEL_INFO(dev_priv)->num_rings - 1 :
0;
int len, ret;
......@@ -621,8 +636,7 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags)
intel_ring_emit(ring, MI_NOOP);
intel_ring_emit(ring, MI_SET_CONTEXT);
intel_ring_emit(ring,
i915_gem_obj_ggtt_offset(req->ctx->engine[RCS].state) |
flags);
i915_ggtt_offset(req->ctx->engine[RCS].state) | flags);
/*
* w/a: MI_SET_CONTEXT must always be followed by MI_NOOP
* WaMiSetContext_Hang:snb,ivb,vlv
......@@ -651,7 +665,8 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags)
MI_STORE_REGISTER_MEM |
MI_SRM_LRM_GLOBAL_GTT);
intel_ring_emit_reg(ring, last_reg);
intel_ring_emit(ring, engine->scratch.gtt_offset);
intel_ring_emit(ring,
i915_ggtt_offset(engine->scratch));
intel_ring_emit(ring, MI_NOOP);
}
intel_ring_emit(ring, MI_ARB_ON_OFF | MI_ARB_ENABLE);
......@@ -755,6 +770,7 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
struct i915_gem_context *to = req->ctx;
struct intel_engine_cs *engine = req->engine;
struct i915_hw_ppgtt *ppgtt = to->ppgtt ?: req->i915->mm.aliasing_ppgtt;
struct i915_vma *vma = to->engine[RCS].state;
struct i915_gem_context *from;
u32 hw_flags;
int ret, i;
......@@ -762,9 +778,15 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
if (skip_rcs_switch(ppgtt, engine, to))
return 0;
/* Clear this page out of any CPU caches for coherent swap-in/out. */
if (!(vma->flags & I915_VMA_GLOBAL_BIND)) {
ret = i915_gem_object_set_to_gtt_domain(vma->obj, false);
if (ret)
return ret;
}
/* Trying to pin first makes error handling easier. */
ret = i915_gem_object_ggtt_pin(to->engine[RCS].state, NULL, 0,
to->ggtt_alignment, 0);
ret = i915_vma_pin(vma, 0, to->ggtt_alignment, PIN_GLOBAL);
if (ret)
return ret;
......@@ -777,18 +799,6 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
*/
from = engine->last_context;
/*
* Clear this page out of any CPU caches for coherent swap-in/out. Note
* that thanks to write = false in this call and us not setting any gpu
* write domains when putting a context object onto the active list
* (when switching away from it), this won't block.
*
* XXX: We need a real interface to do this instead of trickery.
*/
ret = i915_gem_object_set_to_gtt_domain(to->engine[RCS].state, false);
if (ret)
goto unpin_out;
if (needs_pd_load_pre(ppgtt, engine, to)) {
/* Older GENs and non render rings still want the load first,
* "PP_DCLV followed by PP_DIR_BASE register through Load
......@@ -797,7 +807,7 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
trace_switch_mm(engine, to);
ret = ppgtt->switch_mm(ppgtt, req);
if (ret)
goto unpin_out;
goto err;
}
if (!to->engine[RCS].initialised || i915_gem_context_is_default(to))
......@@ -814,7 +824,7 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
if (to != from || (hw_flags & MI_FORCE_RESTORE)) {
ret = mi_set_context(req, hw_flags);
if (ret)
goto unpin_out;
goto err;
}
/* The backing object for the context is done after switching to the
......@@ -824,8 +834,6 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
* MI_SET_CONTEXT instead of when the next seqno has completed.
*/
if (from != NULL) {
struct drm_i915_gem_object *obj = from->engine[RCS].state;
/* As long as MI_SET_CONTEXT is serializing, ie. it flushes the
* whole damn pipeline, we don't need to explicitly mark the
* object dirty. The only exception is that the context must be
......@@ -833,11 +841,9 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
* able to defer doing this until we know the object would be
* swapped, but there is no way to do that yet.
*/
obj->base.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
i915_vma_move_to_active(i915_gem_obj_to_ggtt(obj), req, 0);
/* obj is kept alive until the next request by its active ref */
i915_gem_object_ggtt_unpin(obj);
i915_vma_move_to_active(from->engine[RCS].state, req, 0);
/* state is kept alive until the next request */
i915_vma_unpin(from->engine[RCS].state);
i915_gem_context_put(from);
}
engine->last_context = i915_gem_context_get(to);
......@@ -882,8 +888,8 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
return 0;
unpin_out:
i915_gem_object_ggtt_unpin(to->engine[RCS].state);
err:
i915_vma_unpin(vma);
return ret;
}
......
......@@ -119,7 +119,7 @@ static void *i915_gem_dmabuf_vmap(struct dma_buf *dma_buf)
if (ret)
return ERR_PTR(ret);
addr = i915_gem_object_pin_map(obj);
addr = i915_gem_object_pin_map(obj, I915_MAP_WB);
mutex_unlock(&dev->struct_mutex);
return addr;
......
......@@ -47,7 +47,7 @@ gpu_is_idle(struct drm_i915_private *dev_priv)
}
static bool
mark_free(struct i915_vma *vma, struct list_head *unwind)
mark_free(struct i915_vma *vma, unsigned int flags, struct list_head *unwind)
{
if (i915_vma_is_pinned(vma))
return false;
......@@ -55,6 +55,9 @@ mark_free(struct i915_vma *vma, struct list_head *unwind)
if (WARN_ON(!list_empty(&vma->exec_list)))
return false;
if (flags & PIN_NONFAULT && vma->obj->fault_mappable)
return false;
list_add(&vma->exec_list, unwind);
return drm_mm_scan_add_block(&vma->node);
}
......@@ -129,7 +132,7 @@ i915_gem_evict_something(struct i915_address_space *vm,
phase = phases;
do {
list_for_each_entry(vma, *phase, vm_link)
if (mark_free(vma, &eviction_list))
if (mark_free(vma, flags, &eviction_list))
goto found;
} while (*++phase);
......
......@@ -55,87 +55,85 @@
* CPU ptes into GTT mmaps (not the GTT ptes themselves) as needed.
*/
static void i965_write_fence_reg(struct drm_device *dev, int reg,
struct drm_i915_gem_object *obj)
#define pipelined 0
static void i965_write_fence_reg(struct drm_i915_fence_reg *fence,
struct i915_vma *vma)
{
struct drm_i915_private *dev_priv = to_i915(dev);
i915_reg_t fence_reg_lo, fence_reg_hi;
int fence_pitch_shift;
u64 val;
if (INTEL_INFO(dev)->gen >= 6) {
fence_reg_lo = FENCE_REG_GEN6_LO(reg);
fence_reg_hi = FENCE_REG_GEN6_HI(reg);
if (INTEL_INFO(fence->i915)->gen >= 6) {
fence_reg_lo = FENCE_REG_GEN6_LO(fence->id);
fence_reg_hi = FENCE_REG_GEN6_HI(fence->id);
fence_pitch_shift = GEN6_FENCE_PITCH_SHIFT;
} else {
fence_reg_lo = FENCE_REG_965_LO(reg);
fence_reg_hi = FENCE_REG_965_HI(reg);
fence_reg_lo = FENCE_REG_965_LO(fence->id);
fence_reg_hi = FENCE_REG_965_HI(fence->id);
fence_pitch_shift = I965_FENCE_PITCH_SHIFT;
}
/* To w/a incoherency with non-atomic 64-bit register updates,
* we split the 64-bit update into two 32-bit writes. In order
* for a partial fence not to be evaluated between writes, we
* precede the update with write to turn off the fence register,
* and only enable the fence as the last step.
*
* For extra levels of paranoia, we make sure each step lands
* before applying the next step.
*/
I915_WRITE(fence_reg_lo, 0);
POSTING_READ(fence_reg_lo);
if (obj) {
u32 size = i915_gem_obj_ggtt_size(obj);
unsigned int tiling = i915_gem_object_get_tiling(obj);
unsigned int stride = i915_gem_object_get_stride(obj);
uint64_t val;
/* Adjust fence size to match tiled area */
if (tiling != I915_TILING_NONE) {
uint32_t row_size = stride *
(tiling == I915_TILING_Y ? 32 : 8);
size = (size / row_size) * row_size;
}
val = (uint64_t)((i915_gem_obj_ggtt_offset(obj) + size - 4096) &
0xfffff000) << 32;
val |= i915_gem_obj_ggtt_offset(obj) & 0xfffff000;
val |= (uint64_t)((stride / 128) - 1) << fence_pitch_shift;
if (tiling == I915_TILING_Y)
val |= 1 << I965_FENCE_TILING_Y_SHIFT;
val = 0;
if (vma) {
unsigned int tiling = i915_gem_object_get_tiling(vma->obj);
bool is_y_tiled = tiling == I915_TILING_Y;
unsigned int stride = i915_gem_object_get_stride(vma->obj);
u32 row_size = stride * (is_y_tiled ? 32 : 8);
u32 size = rounddown((u32)vma->node.size, row_size);
val = ((vma->node.start + size - 4096) & 0xfffff000) << 32;
val |= vma->node.start & 0xfffff000;
val |= (u64)((stride / 128) - 1) << fence_pitch_shift;
if (is_y_tiled)
val |= BIT(I965_FENCE_TILING_Y_SHIFT);
val |= I965_FENCE_REG_VALID;
}
I915_WRITE(fence_reg_hi, val >> 32);
POSTING_READ(fence_reg_hi);
if (!pipelined) {
struct drm_i915_private *dev_priv = fence->i915;
I915_WRITE(fence_reg_lo, val);
/* To w/a incoherency with non-atomic 64-bit register updates,
* we split the 64-bit update into two 32-bit writes. In order
* for a partial fence not to be evaluated between writes, we
* precede the update with write to turn off the fence register,
* and only enable the fence as the last step.
*
* For extra levels of paranoia, we make sure each step lands
* before applying the next step.
*/
I915_WRITE(fence_reg_lo, 0);
POSTING_READ(fence_reg_lo);
I915_WRITE(fence_reg_hi, upper_32_bits(val));
I915_WRITE(fence_reg_lo, lower_32_bits(val));
POSTING_READ(fence_reg_lo);
} else {
I915_WRITE(fence_reg_hi, 0);
POSTING_READ(fence_reg_hi);
}
}
static void i915_write_fence_reg(struct drm_device *dev, int reg,
struct drm_i915_gem_object *obj)
static void i915_write_fence_reg(struct drm_i915_fence_reg *fence,
struct i915_vma *vma)
{
struct drm_i915_private *dev_priv = to_i915(dev);
u32 val;
if (obj) {
u32 size = i915_gem_obj_ggtt_size(obj);
unsigned int tiling = i915_gem_object_get_tiling(obj);
unsigned int stride = i915_gem_object_get_stride(obj);
val = 0;
if (vma) {
unsigned int tiling = i915_gem_object_get_tiling(vma->obj);
bool is_y_tiled = tiling == I915_TILING_Y;
unsigned int stride = i915_gem_object_get_stride(vma->obj);
int pitch_val;
int tile_width;
WARN((i915_gem_obj_ggtt_offset(obj) & ~I915_FENCE_START_MASK) ||
(size & -size) != size ||
(i915_gem_obj_ggtt_offset(obj) & (size - 1)),
"object 0x%08llx [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n",
i915_gem_obj_ggtt_offset(obj), obj->map_and_fenceable, size);
WARN((vma->node.start & ~I915_FENCE_START_MASK) ||
!is_power_of_2(vma->node.size) ||
(vma->node.start & (vma->node.size - 1)),
"object 0x%08llx [fenceable? %d] not 1M or pot-size (0x%08llx) aligned\n",
vma->node.start,
i915_vma_is_map_and_fenceable(vma),
vma->node.size);
if (tiling == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))
if (is_y_tiled && HAS_128_BYTE_Y_TILING(fence->i915))
tile_width = 128;
else
tile_width = 512;
......@@ -144,139 +142,141 @@ static void i915_write_fence_reg(struct drm_device *dev, int reg,
pitch_val = stride / tile_width;
pitch_val = ffs(pitch_val) - 1;
val = i915_gem_obj_ggtt_offset(obj);
if (tiling == I915_TILING_Y)
val |= 1 << I830_FENCE_TILING_Y_SHIFT;
val |= I915_FENCE_SIZE_BITS(size);
val = vma->node.start;
if (is_y_tiled)
val |= BIT(I830_FENCE_TILING_Y_SHIFT);
val |= I915_FENCE_SIZE_BITS(vma->node.size);
val |= pitch_val << I830_FENCE_PITCH_SHIFT;
val |= I830_FENCE_REG_VALID;
} else
val = 0;
}
if (!pipelined) {
struct drm_i915_private *dev_priv = fence->i915;
i915_reg_t reg = FENCE_REG(fence->id);
I915_WRITE(FENCE_REG(reg), val);
POSTING_READ(FENCE_REG(reg));
I915_WRITE(reg, val);
POSTING_READ(reg);
}
}
static void i830_write_fence_reg(struct drm_device *dev, int reg,
struct drm_i915_gem_object *obj)
static void i830_write_fence_reg(struct drm_i915_fence_reg *fence,
struct i915_vma *vma)
{
struct drm_i915_private *dev_priv = to_i915(dev);
uint32_t val;
u32 val;
if (obj) {
u32 size = i915_gem_obj_ggtt_size(obj);
unsigned int tiling = i915_gem_object_get_tiling(obj);
unsigned int stride = i915_gem_object_get_stride(obj);
uint32_t pitch_val;
val = 0;
if (vma) {
unsigned int tiling = i915_gem_object_get_tiling(vma->obj);
bool is_y_tiled = tiling == I915_TILING_Y;
unsigned int stride = i915_gem_object_get_stride(vma->obj);
u32 pitch_val;
WARN((i915_gem_obj_ggtt_offset(obj) & ~I830_FENCE_START_MASK) ||
(size & -size) != size ||
(i915_gem_obj_ggtt_offset(obj) & (size - 1)),
"object 0x%08llx not 512K or pot-size 0x%08x aligned\n",
i915_gem_obj_ggtt_offset(obj), size);
WARN((vma->node.start & ~I830_FENCE_START_MASK) ||
!is_power_of_2(vma->node.size) ||
(vma->node.start & (vma->node.size - 1)),
"object 0x%08llx not 512K or pot-size 0x%08llx aligned\n",
vma->node.start, vma->node.size);
pitch_val = stride / 128;
pitch_val = ffs(pitch_val) - 1;
val = i915_gem_obj_ggtt_offset(obj);
if (tiling == I915_TILING_Y)
val |= 1 << I830_FENCE_TILING_Y_SHIFT;
val |= I830_FENCE_SIZE_BITS(size);
val = vma->node.start;
if (is_y_tiled)
val |= BIT(I830_FENCE_TILING_Y_SHIFT);
val |= I830_FENCE_SIZE_BITS(vma->node.size);
val |= pitch_val << I830_FENCE_PITCH_SHIFT;
val |= I830_FENCE_REG_VALID;
} else
val = 0;
}
I915_WRITE(FENCE_REG(reg), val);
POSTING_READ(FENCE_REG(reg));
}
if (!pipelined) {
struct drm_i915_private *dev_priv = fence->i915;
i915_reg_t reg = FENCE_REG(fence->id);
inline static bool i915_gem_object_needs_mb(struct drm_i915_gem_object *obj)
{
return obj && obj->base.read_domains & I915_GEM_DOMAIN_GTT;
I915_WRITE(reg, val);
POSTING_READ(reg);
}
}
static void i915_gem_write_fence(struct drm_device *dev, int reg,
struct drm_i915_gem_object *obj)
static void fence_write(struct drm_i915_fence_reg *fence,
struct i915_vma *vma)
{
struct drm_i915_private *dev_priv = to_i915(dev);
/* Ensure that all CPU reads are completed before installing a fence
* and all writes before removing the fence.
/* Previous access through the fence register is marshalled by
* the mb() inside the fault handlers (i915_gem_release_mmaps)
* and explicitly managed for internal users.
*/
if (i915_gem_object_needs_mb(dev_priv->fence_regs[reg].obj))
mb();
WARN(obj &&
(!i915_gem_object_get_stride(obj) ||
!i915_gem_object_get_tiling(obj)),
"bogus fence setup with stride: 0x%x, tiling mode: %i\n",
i915_gem_object_get_stride(obj),
i915_gem_object_get_tiling(obj));
if (IS_GEN2(dev))
i830_write_fence_reg(dev, reg, obj);
else if (IS_GEN3(dev))
i915_write_fence_reg(dev, reg, obj);
else if (INTEL_INFO(dev)->gen >= 4)
i965_write_fence_reg(dev, reg, obj);
/* And similarly be paranoid that no direct access to this region
* is reordered to before the fence is installed.
if (IS_GEN2(fence->i915))
i830_write_fence_reg(fence, vma);
else if (IS_GEN3(fence->i915))
i915_write_fence_reg(fence, vma);
else
i965_write_fence_reg(fence, vma);
/* Access through the fenced region afterwards is
* ordered by the posting reads whilst writing the registers.
*/
if (i915_gem_object_needs_mb(obj))
mb();
}
static inline int fence_number(struct drm_i915_private *dev_priv,
struct drm_i915_fence_reg *fence)
{
return fence - dev_priv->fence_regs;
fence->dirty = false;
}
static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
struct drm_i915_fence_reg *fence,
bool enable)
static int fence_update(struct drm_i915_fence_reg *fence,
struct i915_vma *vma)
{
struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
int reg = fence_number(dev_priv, fence);
int ret;
i915_gem_write_fence(obj->base.dev, reg, enable ? obj : NULL);
if (vma) {
if (!i915_vma_is_map_and_fenceable(vma))
return -EINVAL;
if (enable) {
obj->fence_reg = reg;
fence->obj = obj;
list_move_tail(&fence->lru_list, &dev_priv->mm.fence_list);
} else {
obj->fence_reg = I915_FENCE_REG_NONE;
fence->obj = NULL;
list_del_init(&fence->lru_list);
if (WARN(!i915_gem_object_get_stride(vma->obj) ||
!i915_gem_object_get_tiling(vma->obj),
"bogus fence setup with stride: 0x%x, tiling mode: %i\n",
i915_gem_object_get_stride(vma->obj),
i915_gem_object_get_tiling(vma->obj)))
return -EINVAL;
ret = i915_gem_active_retire(&vma->last_fence,
&vma->obj->base.dev->struct_mutex);
if (ret)
return ret;
}
obj->fence_dirty = false;
}
static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj)
{
if (i915_gem_object_is_tiled(obj))
i915_gem_release_mmap(obj);
if (fence->vma) {
ret = i915_gem_active_retire(&fence->vma->last_fence,
&fence->vma->obj->base.dev->struct_mutex);
if (ret)
return ret;
}
/* As we do not have an associated fence register, we will force
* a tiling change if we ever need to acquire one.
*/
obj->fence_dirty = false;
obj->fence_reg = I915_FENCE_REG_NONE;
}
if (fence->vma && fence->vma != vma) {
/* Ensure that all userspace CPU access is completed before
* stealing the fence.
*/
i915_gem_release_mmap(fence->vma->obj);
static int
i915_gem_object_wait_fence(struct drm_i915_gem_object *obj)
{
return i915_gem_active_retire(&obj->last_fence,
&obj->base.dev->struct_mutex);
fence->vma->fence = NULL;
fence->vma = NULL;
list_move(&fence->link, &fence->i915->mm.fence_list);
}
fence_write(fence, vma);
if (vma) {
if (fence->vma != vma) {
vma->fence = fence;
fence->vma = vma;
}
list_move_tail(&fence->link, &fence->i915->mm.fence_list);
}
return 0;
}
/**
* i915_gem_object_put_fence - force-remove fence for an object
* @obj: object to map through a fence reg
* i915_vma_put_fence - force-remove fence for a VMA
* @vma: vma to map linearly (not through a fence reg)
*
* This function force-removes any fence from the given object, which is useful
* if the kernel wants to do untiled GTT access.
......@@ -286,70 +286,40 @@ i915_gem_object_wait_fence(struct drm_i915_gem_object *obj)
* 0 on success, negative error code on failure.
*/
int
i915_gem_object_put_fence(struct drm_i915_gem_object *obj)
i915_vma_put_fence(struct i915_vma *vma)
{
struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
struct drm_i915_fence_reg *fence;
int ret;
ret = i915_gem_object_wait_fence(obj);
if (ret)
return ret;
struct drm_i915_fence_reg *fence = vma->fence;
if (obj->fence_reg == I915_FENCE_REG_NONE)
if (!fence)
return 0;
fence = &dev_priv->fence_regs[obj->fence_reg];
if (WARN_ON(fence->pin_count))
if (fence->pin_count)
return -EBUSY;
i915_gem_object_fence_lost(obj);
i915_gem_object_update_fence(obj, fence, false);
return 0;
return fence_update(fence, NULL);
}
static struct drm_i915_fence_reg *
i915_find_fence_reg(struct drm_device *dev)
static struct drm_i915_fence_reg *fence_find(struct drm_i915_private *dev_priv)
{
struct drm_i915_private *dev_priv = to_i915(dev);
struct drm_i915_fence_reg *reg, *avail;
int i;
/* First try to find a free reg */
avail = NULL;
for (i = 0; i < dev_priv->num_fence_regs; i++) {
reg = &dev_priv->fence_regs[i];
if (!reg->obj)
return reg;
if (!reg->pin_count)
avail = reg;
}
if (avail == NULL)
goto deadlock;
struct drm_i915_fence_reg *fence;
/* None available, try to steal one or wait for a user to finish */
list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) {
if (reg->pin_count)
list_for_each_entry(fence, &dev_priv->mm.fence_list, link) {
if (fence->pin_count)
continue;
return reg;
return fence;
}
deadlock:
/* Wait for completion of pending flips which consume fences */
if (intel_has_pending_fb_unpin(dev))
if (intel_has_pending_fb_unpin(&dev_priv->drm))
return ERR_PTR(-EAGAIN);
return ERR_PTR(-EDEADLK);
}
/**
* i915_gem_object_get_fence - set up fencing for an object
* @obj: object to map through a fence reg
* i915_vma_get_fence - set up fencing for a vma
* @vma: vma to map through a fence reg
*
* When mapping objects through the GTT, userspace wants to be able to write
* to them without having to worry about swizzling if the object is tiled.
......@@ -366,103 +336,27 @@ i915_find_fence_reg(struct drm_device *dev)
* 0 on success, negative error code on failure.
*/
int
i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
i915_vma_get_fence(struct i915_vma *vma)
{
struct drm_device *dev = obj->base.dev;
struct drm_i915_private *dev_priv = to_i915(dev);
bool enable = i915_gem_object_is_tiled(obj);
struct drm_i915_fence_reg *reg;
int ret;
/* Have we updated the tiling parameters upon the object and so
* will need to serialise the write to the associated fence register?
*/
if (obj->fence_dirty) {
ret = i915_gem_object_wait_fence(obj);
if (ret)
return ret;
}
struct drm_i915_fence_reg *fence;
struct i915_vma *set = i915_gem_object_is_tiled(vma->obj) ? vma : NULL;
/* Just update our place in the LRU if our fence is getting reused. */
if (obj->fence_reg != I915_FENCE_REG_NONE) {
reg = &dev_priv->fence_regs[obj->fence_reg];
if (!obj->fence_dirty) {
list_move_tail(&reg->lru_list,
&dev_priv->mm.fence_list);
if (vma->fence) {
fence = vma->fence;
if (!fence->dirty) {
list_move_tail(&fence->link,
&fence->i915->mm.fence_list);
return 0;
}
} else if (enable) {
if (WARN_ON(!obj->map_and_fenceable))
return -EINVAL;
reg = i915_find_fence_reg(dev);
if (IS_ERR(reg))
return PTR_ERR(reg);
if (reg->obj) {
struct drm_i915_gem_object *old = reg->obj;
ret = i915_gem_object_wait_fence(old);
if (ret)
return ret;
i915_gem_object_fence_lost(old);
}
} else if (set) {
fence = fence_find(to_i915(vma->vm->dev));
if (IS_ERR(fence))
return PTR_ERR(fence);
} else
return 0;
i915_gem_object_update_fence(obj, reg, enable);
return 0;
}
/**
* i915_gem_object_pin_fence - pin fencing state
* @obj: object to pin fencing for
*
* This pins the fencing state (whether tiled or untiled) to make sure the
* object is ready to be used as a scanout target. Fencing status must be
* synchronize first by calling i915_gem_object_get_fence():
*
* The resulting fence pin reference must be released again with
* i915_gem_object_unpin_fence().
*
* Returns:
*
* True if the object has a fence, false otherwise.
*/
bool
i915_gem_object_pin_fence(struct drm_i915_gem_object *obj)
{
if (obj->fence_reg != I915_FENCE_REG_NONE) {
struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
struct i915_vma *ggtt_vma = i915_gem_obj_to_ggtt(obj);
WARN_ON(!ggtt_vma ||
dev_priv->fence_regs[obj->fence_reg].pin_count >
i915_vma_pin_count(ggtt_vma));
dev_priv->fence_regs[obj->fence_reg].pin_count++;
return true;
} else
return false;
}
/**
* i915_gem_object_unpin_fence - unpin fencing state
* @obj: object to unpin fencing for
*
* This releases the fence pin reference acquired through
* i915_gem_object_pin_fence. It will handle both objects with and without an
* attached fence correctly, callers do not need to distinguish this.
*/
void
i915_gem_object_unpin_fence(struct drm_i915_gem_object *obj)
{
if (obj->fence_reg != I915_FENCE_REG_NONE) {
struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
WARN_ON(dev_priv->fence_regs[obj->fence_reg].pin_count <= 0);
dev_priv->fence_regs[obj->fence_reg].pin_count--;
}
return fence_update(fence, set);
}
/**
......@@ -479,17 +373,16 @@ void i915_gem_restore_fences(struct drm_device *dev)
for (i = 0; i < dev_priv->num_fence_regs; i++) {
struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
struct i915_vma *vma = reg->vma;
/*
* Commit delayed tiling changes if we have an object still
* attached to the fence, otherwise just clear the fence.
*/
if (reg->obj) {
i915_gem_object_update_fence(reg->obj, reg,
i915_gem_object_get_tiling(reg->obj));
} else {
i915_gem_write_fence(dev, i, NULL);
}
if (vma && !i915_gem_object_is_tiled(vma->obj))
vma = NULL;
fence_update(reg, vma);
}
}
......
此差异已折叠。
此差异已折叠。
......@@ -24,7 +24,7 @@
#ifndef _I915_GEM_RENDER_STATE_H_
#define _I915_GEM_RENDER_STATE_H_
#include <linux/types.h>
struct drm_i915_gem_request;
int i915_gem_render_state_init(struct drm_i915_gem_request *req);
......
......@@ -115,17 +115,28 @@ static unsigned long i915_stolen_to_physical(struct drm_device *dev)
base = bsm & INTEL_BSM_MASK;
} else if (IS_I865G(dev)) {
u32 tseg_size = 0;
u16 toud = 0;
u8 tmp;
pci_bus_read_config_byte(dev->pdev->bus, PCI_DEVFN(0, 0),
I845_ESMRAMC, &tmp);
if (tmp & TSEG_ENABLE) {
switch (tmp & I845_TSEG_SIZE_MASK) {
case I845_TSEG_SIZE_512K:
tseg_size = KB(512);
break;
case I845_TSEG_SIZE_1M:
tseg_size = MB(1);
break;
}
}
/*
* FIXME is the graphics stolen memory region
* always at TOUD? Ie. is it always the last
* one to be allocated by the BIOS?
*/
pci_bus_read_config_word(dev->pdev->bus, PCI_DEVFN(0, 0),
I865_TOUD, &toud);
base = toud << 16;
base = (toud << 16) + tseg_size;
} else if (IS_I85X(dev)) {
u32 tseg_size = 0;
u32 tom;
......@@ -685,7 +696,7 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_device *dev,
if (gtt_offset == I915_GTT_OFFSET_NONE)
return obj;
vma = i915_gem_obj_lookup_or_create_vma(obj, &ggtt->base);
vma = i915_gem_obj_lookup_or_create_vma(obj, &ggtt->base, NULL);
if (IS_ERR(vma)) {
ret = PTR_ERR(vma);
goto err;
......@@ -705,6 +716,7 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_device *dev,
goto err;
}
vma->pages = obj->pages;
vma->flags |= I915_VMA_GLOBAL_BIND;
__i915_vma_set_map_and_fenceable(vma);
list_move_tail(&vma->vm_link, &ggtt->base.inactive_list);
......
......@@ -542,8 +542,6 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
}
}
obj->userptr.work = ERR_PTR(ret);
if (ret)
__i915_gem_userptr_set_active(obj, false);
}
obj->userptr.workers--;
......@@ -628,15 +626,14 @@ i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
* to the vma (discard or cloning) which should prevent the more
* egregious cases from causing harm.
*/
if (IS_ERR(obj->userptr.work)) {
/* active flag will have been dropped already by the worker */
ret = PTR_ERR(obj->userptr.work);
obj->userptr.work = NULL;
return ret;
}
if (obj->userptr.work)
if (obj->userptr.work) {
/* active flag should still be held for the pending work */
return -EAGAIN;
if (IS_ERR(obj->userptr.work))
return PTR_ERR(obj->userptr.work);
else
return -EAGAIN;
}
/* Let the mmu-notifier know that we have begun and need cancellation */
ret = __i915_gem_userptr_set_active(obj, true);
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册