提交 2e161017 编写于 作者: D Dave Airlie

Merge tag 'drm-intel-next-2017-03-06' of git://anongit.freedesktop.org/git/drm-intel into drm-next

4 weeks worth of stuff since I was traveling&lazy:

- lspcon improvements (Imre)
- proper atomic state for cdclk handling (Ville)
- gpu reset improvements (Chris)
- lots and lots of polish around fences, requests, waiting and
  everything related all over (both gem and modeset code), from Chris
- atomic by default on gen5+ minus byt/bsw (Maarten did the patch to
  flip the default, really this is a massive joint team effort)
- moar power domains, now 64bit (Ander)
- big pile of in-kernel unit tests for various gem subsystems (Chris),
  including simple mock objects for i915 device and and the ggtt
  manager.
- i915_gpu_info in debugfs, for taking a snapshot of the current gpu
  state. Same thing as i915_error_state, but useful if the kernel didn't
  notice something is stick. From Chris.
- bxt dsi fixes (Umar Shankar)
- bxt w/a updates (Jani)
- no more struct_mutex for gem object unreference (Chris)
- some execlist refactoring (Tvrtko)
- color manager support for glk (Ander)
- improve the power-well sync code to better take over from the
  firmware (Imre)
- gem tracepoint polish (Tvrtko)
- lots of glk fixes all around (Ander)
- ctx switch improvements (Chris)
- glk dsi support&fixes (Deepak M)
- dsi fixes for vlv and clanups, lots of them (Hans de Goede)
- switch to i915.ko types in lots of our internal modeset code (Ander)
- byt/bsw atomic wm update code, yay (Ville)

* tag 'drm-intel-next-2017-03-06' of git://anongit.freedesktop.org/git/drm-intel: (432 commits)
  drm/i915: Update DRIVER_DATE to 20170306
  drm/i915: Don't use enums for hardware engine id
  drm/i915: Split breadcrumbs spinlock into two
  drm/i915: Refactor wakeup of the next breadcrumb waiter
  drm/i915: Take reference for signaling the request from hardirq
  drm/i915: Add FIFO underrun tracepoints
  drm/i915: Add cxsr toggle tracepoint
  drm/i915: Add VLV/CHV watermark/FIFO programming tracepoints
  drm/i915: Add plane update/disable tracepoints
  drm/i915: Kill level 0 wm hack for VLV/CHV
  drm/i915: Workaround VLV/CHV sprite1->sprite0 enable underrun
  drm/i915: Sanitize VLV/CHV watermarks properly
  drm/i915: Only use update_wm_{pre,post} for pre-ilk platforms
  drm/i915: Nuke crtc->wm.cxsr_allowed
  drm/i915: Compute proper intermediate wms for vlv/cvh
  drm/i915: Skip useless watermark/FIFO related work on VLV/CHV when not needed
  drm/i915: Compute vlv/chv wms the atomic way
  drm/i915: Compute VLV/CHV FIFO sizes based on the PM2 watermarks
  drm/i915: Plop vlv/chv fifo sizes into crtc state
  drm/i915: Plop vlv wm state into crtc_state
  ...
......@@ -222,6 +222,15 @@ Video BIOS Table (VBT)
.. kernel-doc:: drivers/gpu/drm/i915/intel_vbt_defs.h
:internal:
Display clocks
--------------
.. kernel-doc:: drivers/gpu/drm/i915/intel_cdclk.c
:doc: CDCLK / RAWCLK
.. kernel-doc:: drivers/gpu/drm/i915/intel_cdclk.c
:internal:
Display PLLs
------------
......
......@@ -526,6 +526,7 @@ static const struct pci_device_id intel_early_ids[] __initconst = {
INTEL_SKL_IDS(&gen9_early_ops),
INTEL_BXT_IDS(&gen9_early_ops),
INTEL_KBL_IDS(&gen9_early_ops),
INTEL_GLK_IDS(&gen9_early_ops),
};
static void __init
......
......@@ -332,14 +332,6 @@ static void i810_write_entry(dma_addr_t addr, unsigned int entry,
writel_relaxed(addr | pte_flags, intel_private.gtt + entry);
}
static const struct aper_size_info_fixed intel_fake_agp_sizes[] = {
{32, 8192, 3},
{64, 16384, 4},
{128, 32768, 5},
{256, 65536, 6},
{512, 131072, 7},
};
static unsigned int intel_gtt_stolen_size(void)
{
u16 gmch_ctrl;
......@@ -670,6 +662,14 @@ static int intel_gtt_init(void)
}
#if IS_ENABLED(CONFIG_AGP_INTEL)
static const struct aper_size_info_fixed intel_fake_agp_sizes[] = {
{32, 8192, 3},
{64, 16384, 4},
{128, 32768, 5},
{256, 65536, 6},
{512, 131072, 7},
};
static int intel_fake_agp_fetch_size(void)
{
int num_sizes = ARRAY_SIZE(intel_fake_agp_sizes);
......
......@@ -19,6 +19,7 @@ config DRM_I915
select INPUT if ACPI
select ACPI_VIDEO if ACPI
select ACPI_BUTTON if ACPI
select SYNC_FILE
help
Choose this option if you have a system that has "Intel Graphics
Media Accelerator" or "HD Graphics" integrated graphics,
......
......@@ -24,7 +24,9 @@ config DRM_I915_DEBUG
select X86_MSR # used by igt/pm_rpm
select DRM_VGEM # used by igt/prime_vgem (dmabuf interop checks)
select DRM_DEBUG_MM if DRM=y
select DRM_DEBUG_MM_SELFTEST
select DRM_I915_SW_FENCE_DEBUG_OBJECTS
select DRM_I915_SELFTEST
default n
help
Choose this option to turn on extra driver debugging that may affect
......@@ -58,3 +60,30 @@ config DRM_I915_SW_FENCE_DEBUG_OBJECTS
Recommended for driver developers only.
If in doubt, say "N".
config DRM_I915_SELFTEST
bool "Enable selftests upon driver load"
depends on DRM_I915
default n
select FAULT_INJECTION
select PRIME_NUMBERS
help
Choose this option to allow the driver to perform selftests upon
loading; also requires the i915.selftest=1 module parameter. To
exit the module after running the selftests (i.e. to prevent normal
module initialisation afterwards) use i915.selftest=-1.
Recommended for driver developers only.
If in doubt, say "N".
config DRM_I915_LOW_LEVEL_TRACEPOINTS
bool "Enable low level request tracing events"
depends on DRM_I915
default n
help
Choose this option to turn on low level request tracing events.
This provides the ability to precisely monitor engine utilisation
and also analyze the request dependency resolving timeline.
If in doubt, say "N".
......@@ -29,6 +29,7 @@ i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o intel_pipe_crc.o
# GEM code
i915-y += i915_cmd_parser.o \
i915_gem_batch_pool.o \
i915_gem_clflush.o \
i915_gem_context.o \
i915_gem_dmabuf.o \
i915_gem_evict.o \
......@@ -72,6 +73,7 @@ i915-y += intel_audio.o \
intel_atomic.o \
intel_atomic_plane.o \
intel_bios.o \
intel_cdclk.o \
intel_color.o \
intel_display.o \
intel_dpio_phy.o \
......@@ -116,6 +118,9 @@ i915-y += dvo_ch7017.o \
# Post-mortem debug and GPU hang state capture
i915-$(CONFIG_DRM_I915_CAPTURE_ERROR) += i915_gpu_error.o
i915-$(CONFIG_DRM_I915_SELFTEST) += \
selftests/i915_random.o \
selftests/i915_selftest.o
# virtual gpu code
i915-y += i915_vgpu.o
......
......@@ -1530,7 +1530,7 @@ static int copy_gma_to_hva(struct intel_vgpu *vgpu, struct intel_vgpu_mm *mm,
len += copy_len;
gma += copy_len;
}
return 0;
return len;
}
......@@ -1644,7 +1644,7 @@ static int perform_bb_shadow(struct parser_exec_state *s)
ret = copy_gma_to_hva(s->vgpu, s->vgpu->gtt.ggtt_mm,
gma, gma + bb_size,
dst);
if (ret) {
if (ret < 0) {
gvt_err("fail to copy guest ring buffer\n");
goto unmap_src;
}
......@@ -2608,11 +2608,8 @@ static int scan_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
static int shadow_workload_ring_buffer(struct intel_vgpu_workload *workload)
{
struct intel_vgpu *vgpu = workload->vgpu;
int ring_id = workload->ring_id;
struct i915_gem_context *shadow_ctx = vgpu->shadow_ctx;
struct intel_ring *ring = shadow_ctx->engine[ring_id].ring;
unsigned long gma_head, gma_tail, gma_top, guest_rb_size;
unsigned int copy_len = 0;
u32 *cs;
int ret;
guest_rb_size = _RING_CTL_BUF_SIZE(workload->rb_ctl);
......@@ -2626,36 +2623,33 @@ static int shadow_workload_ring_buffer(struct intel_vgpu_workload *workload)
gma_top = workload->rb_start + guest_rb_size;
/* allocate shadow ring buffer */
ret = intel_ring_begin(workload->req, workload->rb_len / 4);
if (ret)
return ret;
cs = intel_ring_begin(workload->req, workload->rb_len / sizeof(u32));
if (IS_ERR(cs))
return PTR_ERR(cs);
/* get shadow ring buffer va */
workload->shadow_ring_buffer_va = ring->vaddr + ring->tail;
workload->shadow_ring_buffer_va = cs;
/* head > tail --> copy head <-> top */
if (gma_head > gma_tail) {
ret = copy_gma_to_hva(vgpu, vgpu->gtt.ggtt_mm,
gma_head, gma_top,
workload->shadow_ring_buffer_va);
if (ret) {
gma_head, gma_top, cs);
if (ret < 0) {
gvt_err("fail to copy guest ring buffer\n");
return ret;
}
copy_len = gma_top - gma_head;
cs += ret / sizeof(u32);
gma_head = workload->rb_start;
}
/* copy head or start <-> tail */
ret = copy_gma_to_hva(vgpu, vgpu->gtt.ggtt_mm,
gma_head, gma_tail,
workload->shadow_ring_buffer_va + copy_len);
if (ret) {
ret = copy_gma_to_hva(vgpu, vgpu->gtt.ggtt_mm, gma_head, gma_tail, cs);
if (ret < 0) {
gvt_err("fail to copy guest ring buffer\n");
return ret;
}
ring->tail += workload->rb_len;
intel_ring_advance(ring);
cs += ret / sizeof(u32);
intel_ring_advance(workload->req, cs);
return 0;
}
......@@ -2709,7 +2703,7 @@ static int shadow_indirect_ctx(struct intel_shadow_wa_ctx *wa_ctx)
wa_ctx->workload->vgpu->gtt.ggtt_mm,
guest_gma, guest_gma + ctx_size,
map);
if (ret) {
if (ret < 0) {
gvt_err("fail to copy guest indirect ctx\n");
goto unmap_src;
}
......
此差异已折叠。
......@@ -43,6 +43,7 @@
#include <drm/drmP.h>
#include <drm/drm_crtc_helper.h>
#include <drm/drm_atomic_helper.h>
#include <drm/i915_drm.h>
#include "i915_drv.h"
......@@ -248,6 +249,7 @@ static int i915_getparam(struct drm_device *dev, void *data,
case I915_PARAM_IRQ_ACTIVE:
case I915_PARAM_ALLOW_BATCHBUFFER:
case I915_PARAM_LAST_DISPATCH:
case I915_PARAM_HAS_EXEC_CONSTANTS:
/* Reject all old ums/dri params. */
return -ENODEV;
case I915_PARAM_CHIPSET_ID:
......@@ -274,9 +276,6 @@ static int i915_getparam(struct drm_device *dev, void *data,
case I915_PARAM_HAS_BSD2:
value = !!dev_priv->engine[VCS2];
break;
case I915_PARAM_HAS_EXEC_CONSTANTS:
value = INTEL_GEN(dev_priv) >= 4;
break;
case I915_PARAM_HAS_LLC:
value = HAS_LLC(dev_priv);
break;
......@@ -318,10 +317,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
value = INTEL_INFO(dev_priv)->sseu.min_eu_in_pool;
break;
case I915_PARAM_HUC_STATUS:
/* The register is already force-woken. We dont need
* any rpm here
*/
intel_runtime_pm_get(dev_priv);
value = I915_READ(HUC_STATUS2) & HUC_FW_VERIFIED;
intel_runtime_pm_put(dev_priv);
break;
case I915_PARAM_MMAP_GTT_VERSION:
/* Though we've started our numbering from 1, and so class all
......@@ -350,6 +348,8 @@ static int i915_getparam(struct drm_device *dev, void *data,
case I915_PARAM_HAS_EXEC_HANDLE_LUT:
case I915_PARAM_HAS_COHERENT_PHYS_GTT:
case I915_PARAM_HAS_EXEC_SOFTPIN:
case I915_PARAM_HAS_EXEC_ASYNC:
case I915_PARAM_HAS_EXEC_FENCE:
/* For the time being all of these are always true;
* if some supported hardware does not have one of these
* features this value needs to be provided from
......@@ -756,6 +756,15 @@ static int i915_workqueues_init(struct drm_i915_private *dev_priv)
return -ENOMEM;
}
static void i915_engines_cleanup(struct drm_i915_private *i915)
{
struct intel_engine_cs *engine;
enum intel_engine_id id;
for_each_engine(engine, i915, id)
kfree(engine);
}
static void i915_workqueues_cleanup(struct drm_i915_private *dev_priv)
{
destroy_workqueue(dev_priv->hotplug.dp_wq);
......@@ -769,10 +778,17 @@ static void i915_workqueues_cleanup(struct drm_i915_private *dev_priv)
*/
static void intel_detect_preproduction_hw(struct drm_i915_private *dev_priv)
{
if (IS_HSW_EARLY_SDV(dev_priv) ||
IS_SKL_REVID(dev_priv, 0, SKL_REVID_F0))
bool pre = false;
pre |= IS_HSW_EARLY_SDV(dev_priv);
pre |= IS_SKL_REVID(dev_priv, 0, SKL_REVID_F0);
pre |= IS_BXT_REVID(dev_priv, 0, BXT_REVID_B_LAST);
if (pre) {
DRM_ERROR("This is a pre-production stepping. "
"It may not be fully functional.\n");
add_taint(TAINT_MACHINE_CHECK, LOCKDEP_STILL_OK);
}
}
/**
......@@ -808,6 +824,7 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv,
spin_lock_init(&dev_priv->gpu_error.lock);
mutex_init(&dev_priv->backlight_lock);
spin_lock_init(&dev_priv->uncore.lock);
spin_lock_init(&dev_priv->mm.object_stat_lock);
spin_lock_init(&dev_priv->mmio_flip_lock);
spin_lock_init(&dev_priv->wm.dsparb_lock);
......@@ -818,12 +835,15 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv,
mutex_init(&dev_priv->pps_mutex);
intel_uc_init_early(dev_priv);
i915_memcpy_init_early(dev_priv);
ret = intel_engines_init_early(dev_priv);
if (ret)
return ret;
ret = i915_workqueues_init(dev_priv);
if (ret < 0)
return ret;
goto err_engines;
/* This must be called before any calls to HAS_PCH_* */
intel_detect_pch(dev_priv);
......@@ -852,6 +872,8 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv,
err_workqueues:
i915_workqueues_cleanup(dev_priv);
err_engines:
i915_engines_cleanup(dev_priv);
return ret;
}
......@@ -864,6 +886,7 @@ static void i915_driver_cleanup_early(struct drm_i915_private *dev_priv)
i915_perf_fini(dev_priv);
i915_gem_load_cleanup(dev_priv);
i915_workqueues_cleanup(dev_priv);
i915_engines_cleanup(dev_priv);
}
static int i915_mmio_setup(struct drm_i915_private *dev_priv)
......@@ -930,6 +953,7 @@ static int i915_driver_init_mmio(struct drm_i915_private *dev_priv)
goto put_bridge;
intel_uncore_init(dev_priv);
i915_gem_init_mmio(dev_priv);
return 0;
......@@ -967,7 +991,7 @@ static void intel_sanitize_options(struct drm_i915_private *dev_priv)
DRM_DEBUG_DRIVER("ppgtt mode: %i\n", i915.enable_ppgtt);
i915.semaphores = intel_sanitize_semaphores(dev_priv, i915.semaphores);
DRM_DEBUG_DRIVER("use GPU sempahores? %s\n", yesno(i915.semaphores));
DRM_DEBUG_DRIVER("use GPU semaphores? %s\n", yesno(i915.semaphores));
}
/**
......@@ -1185,11 +1209,15 @@ static void i915_driver_unregister(struct drm_i915_private *dev_priv)
*/
int i915_driver_load(struct pci_dev *pdev, const struct pci_device_id *ent)
{
const struct intel_device_info *match_info =
(struct intel_device_info *)ent->driver_data;
struct drm_i915_private *dev_priv;
int ret;
if (i915.nuclear_pageflip)
driver.driver_features |= DRIVER_ATOMIC;
/* Enable nuclear pageflip on ILK+, except vlv/chv */
if (!i915.nuclear_pageflip &&
(match_info->gen < 5 || match_info->has_gmch_display))
driver.driver_features &= ~DRIVER_ATOMIC;
ret = -ENOMEM;
dev_priv = kzalloc(sizeof(*dev_priv), GFP_KERNEL);
......@@ -1197,8 +1225,7 @@ int i915_driver_load(struct pci_dev *pdev, const struct pci_device_id *ent)
ret = drm_dev_init(&dev_priv->drm, &driver, &pdev->dev);
if (ret) {
DRM_DEV_ERROR(&pdev->dev, "allocation failed\n");
kfree(dev_priv);
return ret;
goto out_free;
}
dev_priv->drm.pdev = pdev;
......@@ -1206,7 +1233,7 @@ int i915_driver_load(struct pci_dev *pdev, const struct pci_device_id *ent)
ret = pci_enable_device(pdev);
if (ret)
goto out_free_priv;
goto out_fini;
pci_set_drvdata(pdev, &dev_priv->drm);
......@@ -1270,9 +1297,11 @@ int i915_driver_load(struct pci_dev *pdev, const struct pci_device_id *ent)
i915_driver_cleanup_early(dev_priv);
out_pci_disable:
pci_disable_device(pdev);
out_free_priv:
out_fini:
i915_load_error(dev_priv, "Device initialization failed (%d)\n", ret);
drm_dev_unref(&dev_priv->drm);
drm_dev_fini(&dev_priv->drm);
out_free:
kfree(dev_priv);
return ret;
}
......@@ -1280,6 +1309,8 @@ void i915_driver_unload(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = to_i915(dev);
struct pci_dev *pdev = dev_priv->drm.pdev;
struct drm_modeset_acquire_ctx ctx;
int ret;
intel_fbdev_fini(dev);
......@@ -1288,6 +1319,24 @@ void i915_driver_unload(struct drm_device *dev)
intel_display_power_get(dev_priv, POWER_DOMAIN_INIT);
drm_modeset_acquire_init(&ctx, 0);
while (1) {
ret = drm_modeset_lock_all_ctx(dev, &ctx);
if (!ret)
ret = drm_atomic_helper_disable_all(dev, &ctx);
if (ret != -EDEADLK)
break;
drm_modeset_backoff(&ctx);
}
if (ret)
DRM_ERROR("Disabling all crtc's during unload failed with %i\n", ret);
drm_modeset_drop_locks(&ctx);
drm_modeset_acquire_fini(&ctx);
intel_gvt_cleanup(dev_priv);
i915_driver_unregister(dev_priv);
......@@ -1317,7 +1366,7 @@ void i915_driver_unload(struct drm_device *dev)
/* Free error state after interrupts are fully disabled. */
cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
i915_destroy_error_state(dev_priv);
i915_reset_error_state(dev_priv);
/* Flush any outstanding unpin_work. */
drain_workqueue(dev_priv->wq);
......@@ -1333,8 +1382,16 @@ void i915_driver_unload(struct drm_device *dev)
i915_driver_cleanup_mmio(dev_priv);
intel_display_power_put(dev_priv, POWER_DOMAIN_INIT);
}
static void i915_driver_release(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = to_i915(dev);
i915_driver_cleanup_early(dev_priv);
drm_dev_fini(&dev_priv->drm);
kfree(dev_priv);
}
static int i915_driver_open(struct drm_device *dev, struct drm_file *file)
......@@ -1716,6 +1773,8 @@ static int i915_drm_resume_early(struct drm_device *dev)
!(dev_priv->suspended_to_idle && dev_priv->csr.dmc_payload))
intel_power_domains_init_hw(dev_priv, true);
i915_gem_sanitize(dev_priv);
enable_rpm_wakeref_asserts(dev_priv);
out:
......@@ -1787,7 +1846,7 @@ void i915_reset(struct drm_i915_private *dev_priv)
goto error;
}
i915_gem_reset_finish(dev_priv);
i915_gem_reset(dev_priv);
intel_overlay_reset(dev_priv);
/* Ok, now get things going again... */
......@@ -1813,6 +1872,7 @@ void i915_reset(struct drm_i915_private *dev_priv)
i915_queue_hangcheck(dev_priv);
wakeup:
i915_gem_reset_finish(dev_priv);
enable_irq(dev_priv->drm.irq);
wake_up_bit(&error->flags, I915_RESET_IN_PROGRESS);
return;
......@@ -2532,7 +2592,7 @@ static const struct drm_ioctl_desc i915_ioctls[] = {
DRM_IOCTL_DEF_DRV(I915_HWS_ADDR, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
DRM_IOCTL_DEF_DRV(I915_GEM_INIT, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER, i915_gem_execbuffer, DRM_AUTH),
DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER2, i915_gem_execbuffer2, DRM_AUTH|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER2_WR, i915_gem_execbuffer2, DRM_AUTH|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(I915_GEM_PIN, i915_gem_reject_pin_ioctl, DRM_AUTH|DRM_ROOT_ONLY),
DRM_IOCTL_DEF_DRV(I915_GEM_UNPIN, i915_gem_reject_pin_ioctl, DRM_AUTH|DRM_ROOT_ONLY),
DRM_IOCTL_DEF_DRV(I915_GEM_BUSY, i915_gem_busy_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
......@@ -2574,7 +2634,8 @@ static struct drm_driver driver = {
*/
.driver_features =
DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED | DRIVER_GEM | DRIVER_PRIME |
DRIVER_RENDER | DRIVER_MODESET,
DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC,
.release = i915_driver_release,
.open = i915_driver_open,
.lastclose = i915_driver_lastclose,
.preclose = i915_driver_preclose,
......@@ -2603,3 +2664,7 @@ static struct drm_driver driver = {
.minor = DRIVER_MINOR,
.patchlevel = DRIVER_PATCHLEVEL,
};
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "selftests/mock_drm.c"
#endif
......@@ -79,8 +79,8 @@
#define DRIVER_NAME "i915"
#define DRIVER_DESC "Intel Graphics"
#define DRIVER_DATE "20170123"
#define DRIVER_TIMESTAMP 1485156432
#define DRIVER_DATE "20170306"
#define DRIVER_TIMESTAMP 1488785683
#undef WARN_ON
/* Many gcc seem to no see through this and fall over :( */
......@@ -293,6 +293,7 @@ enum plane_id {
PLANE_PRIMARY,
PLANE_SPRITE0,
PLANE_SPRITE1,
PLANE_SPRITE2,
PLANE_CURSOR,
I915_MAX_PLANES,
};
......@@ -343,6 +344,11 @@ enum intel_display_power_domain {
POWER_DOMAIN_PORT_DDI_C_LANES,
POWER_DOMAIN_PORT_DDI_D_LANES,
POWER_DOMAIN_PORT_DDI_E_LANES,
POWER_DOMAIN_PORT_DDI_A_IO,
POWER_DOMAIN_PORT_DDI_B_IO,
POWER_DOMAIN_PORT_DDI_C_IO,
POWER_DOMAIN_PORT_DDI_D_IO,
POWER_DOMAIN_PORT_DDI_E_IO,
POWER_DOMAIN_PORT_DSI,
POWER_DOMAIN_PORT_CRT,
POWER_DOMAIN_PORT_OTHER,
......@@ -384,6 +390,8 @@ enum hpd_pin {
#define for_each_hpd_pin(__pin) \
for ((__pin) = (HPD_NONE + 1); (__pin) < HPD_NUM_PINS; (__pin)++)
#define HPD_STORM_DEFAULT_THRESHOLD 5
struct i915_hotplug {
struct work_struct hotplug_work;
......@@ -407,6 +415,8 @@ struct i915_hotplug {
struct work_struct poll_init_work;
bool poll_enabled;
unsigned int hpd_storm_threshold;
/*
* if we get a HPD irq from DP and a HPD irq from non-DP
* the non-DP HPD could block the workqueue on a mode config
......@@ -494,7 +504,35 @@ struct i915_hotplug {
#define for_each_power_domain(domain, mask) \
for ((domain) = 0; (domain) < POWER_DOMAIN_NUM; (domain)++) \
for_each_if ((1 << (domain)) & (mask))
for_each_if (BIT_ULL(domain) & (mask))
#define for_each_power_well(__dev_priv, __power_well) \
for ((__power_well) = (__dev_priv)->power_domains.power_wells; \
(__power_well) - (__dev_priv)->power_domains.power_wells < \
(__dev_priv)->power_domains.power_well_count; \
(__power_well)++)
#define for_each_power_well_rev(__dev_priv, __power_well) \
for ((__power_well) = (__dev_priv)->power_domains.power_wells + \
(__dev_priv)->power_domains.power_well_count - 1; \
(__power_well) - (__dev_priv)->power_domains.power_wells >= 0; \
(__power_well)--)
#define for_each_power_domain_well(__dev_priv, __power_well, __domain_mask) \
for_each_power_well(__dev_priv, __power_well) \
for_each_if ((__power_well)->domains & (__domain_mask))
#define for_each_power_domain_well_rev(__dev_priv, __power_well, __domain_mask) \
for_each_power_well_rev(__dev_priv, __power_well) \
for_each_if ((__power_well)->domains & (__domain_mask))
#define for_each_intel_plane_in_state(__state, plane, plane_state, __i) \
for ((__i) = 0; \
(__i) < (__state)->base.dev->mode_config.num_total_plane && \
((plane) = to_intel_plane((__state)->base.planes[__i].ptr), \
(plane_state) = to_intel_plane_state((__state)->base.planes[__i].state), 1); \
(__i)++) \
for_each_if (plane_state)
struct drm_i915_private;
struct i915_mm_struct;
......@@ -600,9 +638,13 @@ struct intel_initial_plane_config;
struct intel_crtc;
struct intel_limit;
struct dpll;
struct intel_cdclk_state;
struct drm_i915_display_funcs {
int (*get_display_clock_speed)(struct drm_i915_private *dev_priv);
void (*get_cdclk)(struct drm_i915_private *dev_priv,
struct intel_cdclk_state *cdclk_state);
void (*set_cdclk)(struct drm_i915_private *dev_priv,
const struct intel_cdclk_state *cdclk_state);
int (*get_fifo_size)(struct drm_i915_private *dev_priv, int plane);
int (*compute_pipe_wm)(struct intel_crtc_state *cstate);
int (*compute_intermediate_wm)(struct drm_device *dev,
......@@ -617,7 +659,6 @@ struct drm_i915_display_funcs {
int (*compute_global_watermarks)(struct drm_atomic_state *state);
void (*update_wm)(struct intel_crtc *crtc);
int (*modeset_calc_cdclk)(struct drm_atomic_state *state);
void (*modeset_commit_cdclk)(struct drm_atomic_state *state);
/* Returns the active state of the crtc, and if the crtc is active,
* fills out the pipe-config with the hw state. */
bool (*get_pipe_config)(struct intel_crtc *,
......@@ -636,7 +677,8 @@ struct drm_i915_display_funcs {
struct intel_encoder *encoder,
const struct drm_display_mode *adjusted_mode);
void (*audio_codec_disable)(struct intel_encoder *encoder);
void (*fdi_link_train)(struct drm_crtc *crtc);
void (*fdi_link_train)(struct intel_crtc *crtc,
const struct intel_crtc_state *crtc_state);
void (*init_clock_gating)(struct drm_i915_private *dev_priv);
int (*queue_flip)(struct drm_device *dev, struct drm_crtc *crtc,
struct drm_framebuffer *fb,
......@@ -856,6 +898,7 @@ enum intel_platform {
INTEL_BROXTON,
INTEL_KABYLAKE,
INTEL_GEMINILAKE,
INTEL_MAX_PLATFORMS
};
struct intel_device_info {
......@@ -890,7 +933,7 @@ struct intel_device_info {
struct intel_display_error_state;
struct drm_i915_error_state {
struct i915_gpu_state {
struct kref ref;
struct timeval time;
struct timeval boottime;
......@@ -900,16 +943,20 @@ struct drm_i915_error_state {
char error_msg[128];
bool simulated;
bool awake;
bool wakelock;
bool suspended;
int iommu;
u32 reset_count;
u32 suspend_count;
struct intel_device_info device_info;
struct i915_params params;
/* Generic register state */
u32 eir;
u32 pgtbl_er;
u32 ier;
u32 gtier[4];
u32 gtier[4], ngtier;
u32 ccid;
u32 derrmr;
u32 forcewake;
......@@ -923,6 +970,7 @@ struct drm_i915_error_state {
u32 gab_ctl;
u32 gfx_mode;
u32 nfence;
u64 fence[I915_MAX_NUM_FENCES];
struct intel_overlay_error_state *overlay;
struct intel_display_error_state *display;
......@@ -970,6 +1018,16 @@ struct drm_i915_error_state {
u32 semaphore_mboxes[I915_NUM_ENGINES - 1];
struct intel_instdone instdone;
struct drm_i915_error_context {
char comm[TASK_COMM_LEN];
pid_t pid;
u32 handle;
u32 hw_id;
int ban_score;
int active;
int guilty;
} context;
struct drm_i915_error_object {
u64 gtt_offset;
u64 gtt_size;
......@@ -1003,10 +1061,6 @@ struct drm_i915_error_state {
u32 pp_dir_base;
};
} vm_info;
pid_t pid;
char comm[TASK_COMM_LEN];
int context_bans;
} engine[I915_NUM_ENGINES];
struct drm_i915_error_buffer {
......@@ -1395,7 +1449,7 @@ struct i915_power_well {
int count;
/* cached hw enabled state */
bool hw_enabled;
unsigned long domains;
u64 domains;
/* unique identifier for this power well */
unsigned long id;
/*
......@@ -1456,7 +1510,7 @@ struct i915_gem_mm {
struct work_struct free_work;
/** Usable portion of the GTT for GEM */
phys_addr_t stolen_base; /* limited to low memory (32-bit) */
dma_addr_t stolen_base; /* limited to low memory (32-bit) */
/** PPGTT used for aliasing the PPGTT with the GTT */
struct i915_hw_ppgtt *aliasing_ppgtt;
......@@ -1498,11 +1552,6 @@ struct drm_i915_error_state_buf {
loff_t pos;
};
struct i915_error_state_file_priv {
struct drm_i915_private *i915;
struct drm_i915_error_state *error;
};
#define I915_RESET_TIMEOUT (10 * HZ) /* 10s */
#define I915_FENCE_TIMEOUT (10 * HZ) /* 10s */
......@@ -1519,7 +1568,7 @@ struct i915_gpu_error {
/* For reset and error_state handling. */
spinlock_t lock;
/* Protected by the above dev->gpu_error.lock. */
struct drm_i915_error_state *first_error;
struct i915_gpu_state *first_error;
unsigned long missed_irq_rings;
......@@ -2053,6 +2102,10 @@ struct i915_oa_ops {
bool (*oa_buffer_is_empty)(struct drm_i915_private *dev_priv);
};
struct intel_cdclk_state {
unsigned int cdclk, vco, ref;
};
struct drm_i915_private {
struct drm_device drm;
......@@ -2063,8 +2116,6 @@ struct drm_i915_private {
const struct intel_device_info info;
int relative_constants_mode;
void __iomem *regs;
struct intel_uncore uncore;
......@@ -2157,13 +2208,7 @@ struct drm_i915_private {
unsigned int fsb_freq, mem_freq, is_ddr3;
unsigned int skl_preferred_vco_freq;
unsigned int cdclk_freq, max_cdclk_freq;
/*
* For reading holding any crtc lock is sufficient,
* for writing must hold all of them.
*/
unsigned int atomic_cdclk_freq;
unsigned int max_cdclk_freq;
unsigned int max_dotclk_freq;
unsigned int rawclk_freq;
......@@ -2171,8 +2216,22 @@ struct drm_i915_private {
unsigned int czclk_freq;
struct {
unsigned int vco, ref;
} cdclk_pll;
/*
* The current logical cdclk state.
* See intel_atomic_state.cdclk.logical
*
* For reading holding any crtc lock is sufficient,
* for writing must hold all of them.
*/
struct intel_cdclk_state logical;
/*
* The current actual cdclk state.
* See intel_atomic_state.cdclk.actual
*/
struct intel_cdclk_state actual;
/* The current hardware cdclk state */
struct intel_cdclk_state hw;
} cdclk;
/**
* wq - Driver workqueue for GEM.
......@@ -2752,6 +2811,12 @@ intel_info(const struct drm_i915_private *dev_priv)
#define IS_KBL_REVID(dev_priv, since, until) \
(IS_KABYLAKE(dev_priv) && IS_REVID(dev_priv, since, until))
#define GLK_REVID_A0 0x0
#define GLK_REVID_A1 0x1
#define IS_GLK_REVID(dev_priv, since, until) \
(IS_GEMINILAKE(dev_priv) && IS_REVID(dev_priv, since, until))
/*
* The genX designation typically refers to the render engine, so render
* capability related checks should use IS_GEN, while display and other checks
......@@ -2767,8 +2832,9 @@ intel_info(const struct drm_i915_private *dev_priv)
#define IS_GEN8(dev_priv) (!!((dev_priv)->info.gen_mask & BIT(7)))
#define IS_GEN9(dev_priv) (!!((dev_priv)->info.gen_mask & BIT(8)))
#define IS_GEN9_LP(dev_priv) (IS_GEN9(dev_priv) && INTEL_INFO(dev_priv)->is_lp)
#define IS_LP(dev_priv) (INTEL_INFO(dev_priv)->is_lp)
#define IS_GEN9_LP(dev_priv) (IS_GEN9(dev_priv) && IS_LP(dev_priv))
#define IS_GEN9_BC(dev_priv) (IS_GEN9(dev_priv) && !IS_LP(dev_priv))
#define ENGINE_MASK(id) BIT(id)
#define RENDER_RING ENGINE_MASK(RCS)
......@@ -2810,9 +2876,7 @@ intel_info(const struct drm_i915_private *dev_priv)
/* WaRsDisableCoarsePowerGating:skl,bxt */
#define NEEDS_WaRsDisableCoarsePowerGating(dev_priv) \
(IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1) || \
IS_SKL_GT3(dev_priv) || \
IS_SKL_GT4(dev_priv))
(IS_SKL_GT3(dev_priv) || IS_SKL_GT4(dev_priv))
/*
* dp aux and gmbus irq on gen4 seems to be able to generate legacy interrupts
......@@ -2952,6 +3016,9 @@ extern unsigned long i915_gfx_val(struct drm_i915_private *dev_priv);
extern void i915_update_gfx_val(struct drm_i915_private *dev_priv);
int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool on);
int intel_engines_init_early(struct drm_i915_private *dev_priv);
int intel_engines_init(struct drm_i915_private *dev_priv);
/* intel_hotplug.c */
void intel_hpd_irq_handler(struct drm_i915_private *dev_priv,
u32 pin_mask, u32 long_mask);
......@@ -3129,6 +3196,7 @@ int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv);
int i915_gem_wait_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv);
void i915_gem_sanitize(struct drm_i915_private *i915);
int i915_gem_load_init(struct drm_i915_private *dev_priv);
void i915_gem_load_cleanup(struct drm_i915_private *dev_priv);
void i915_gem_load_init_fences(struct drm_i915_private *dev_priv);
......@@ -3341,15 +3409,17 @@ static inline u32 i915_reset_count(struct i915_gpu_error *error)
}
int i915_gem_reset_prepare(struct drm_i915_private *dev_priv);
void i915_gem_reset(struct drm_i915_private *dev_priv);
void i915_gem_reset_finish(struct drm_i915_private *dev_priv);
void i915_gem_set_wedged(struct drm_i915_private *dev_priv);
void i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force);
void i915_gem_init_mmio(struct drm_i915_private *i915);
int __must_check i915_gem_init(struct drm_i915_private *dev_priv);
int __must_check i915_gem_init_hw(struct drm_i915_private *dev_priv);
void i915_gem_init_swizzling(struct drm_i915_private *dev_priv);
void i915_gem_cleanup_engines(struct drm_i915_private *dev_priv);
int __must_check i915_gem_wait_for_idle(struct drm_i915_private *dev_priv,
unsigned int flags);
int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv,
unsigned int flags);
int __must_check i915_gem_suspend(struct drm_i915_private *dev_priv);
void i915_gem_resume(struct drm_i915_private *dev_priv);
int i915_gem_fault(struct vm_fault *vmf);
......@@ -3543,7 +3613,7 @@ static inline void intel_display_crc_init(struct drm_i915_private *dev_priv) {}
__printf(2, 3)
void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...);
int i915_error_state_to_str(struct drm_i915_error_state_buf *estr,
const struct i915_error_state_file_priv *error);
const struct i915_gpu_state *gpu);
int i915_error_state_buf_init(struct drm_i915_error_state_buf *eb,
struct drm_i915_private *i915,
size_t count, loff_t pos);
......@@ -3552,13 +3622,28 @@ static inline void i915_error_state_buf_release(
{
kfree(eb->buf);
}
struct i915_gpu_state *i915_capture_gpu_state(struct drm_i915_private *i915);
void i915_capture_error_state(struct drm_i915_private *dev_priv,
u32 engine_mask,
const char *error_msg);
void i915_error_state_get(struct drm_device *dev,
struct i915_error_state_file_priv *error_priv);
void i915_error_state_put(struct i915_error_state_file_priv *error_priv);
void i915_destroy_error_state(struct drm_i915_private *dev_priv);
static inline struct i915_gpu_state *
i915_gpu_state_get(struct i915_gpu_state *gpu)
{
kref_get(&gpu->ref);
return gpu;
}
void __i915_gpu_state_free(struct kref *kref);
static inline void i915_gpu_state_put(struct i915_gpu_state *gpu)
{
if (gpu)
kref_put(&gpu->ref, __i915_gpu_state_free);
}
struct i915_gpu_state *i915_first_error_state(struct drm_i915_private *i915);
void i915_reset_error_state(struct drm_i915_private *i915);
#else
......@@ -3568,7 +3653,13 @@ static inline void i915_capture_error_state(struct drm_i915_private *dev_priv,
{
}
static inline void i915_destroy_error_state(struct drm_i915_private *dev_priv)
static inline struct i915_gpu_state *
i915_first_error_state(struct drm_i915_private *i915)
{
return NULL;
}
static inline void i915_reset_error_state(struct drm_i915_private *i915)
{
}
......@@ -3708,7 +3799,7 @@ extern void i915_redisable_vga(struct drm_i915_private *dev_priv);
extern void i915_redisable_vga_power_on(struct drm_i915_private *dev_priv);
extern bool ironlake_set_drps(struct drm_i915_private *dev_priv, u8 val);
extern void intel_init_pch_refclk(struct drm_i915_private *dev_priv);
extern void intel_set_rps(struct drm_i915_private *dev_priv, u8 val);
extern int intel_set_rps(struct drm_i915_private *dev_priv, u8 val);
extern bool intel_set_memory_cxsr(struct drm_i915_private *dev_priv,
bool enable);
......@@ -3724,7 +3815,6 @@ extern void intel_overlay_print_error_state(struct drm_i915_error_state_buf *e,
extern struct intel_display_error_state *
intel_display_capture_error_state(struct drm_i915_private *dev_priv);
extern void intel_display_print_error_state(struct drm_i915_error_state_buf *e,
struct drm_i915_private *dev_priv,
struct intel_display_error_state *error);
int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val);
......@@ -3734,7 +3824,7 @@ int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request,
/* intel_sideband.c */
u32 vlv_punit_read(struct drm_i915_private *dev_priv, u32 addr);
void vlv_punit_write(struct drm_i915_private *dev_priv, u32 addr, u32 val);
int vlv_punit_write(struct drm_i915_private *dev_priv, u32 addr, u32 val);
u32 vlv_nc_read(struct drm_i915_private *dev_priv, u8 addr);
u32 vlv_iosf_sb_read(struct drm_i915_private *dev_priv, u8 port, u32 reg);
void vlv_iosf_sb_write(struct drm_i915_private *dev_priv, u8 port, u32 reg, u32 val);
......@@ -3953,14 +4043,34 @@ wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms)
}
static inline bool
__i915_request_irq_complete(struct drm_i915_gem_request *req)
__i915_request_irq_complete(const struct drm_i915_gem_request *req)
{
struct intel_engine_cs *engine = req->engine;
u32 seqno;
/* Note that the engine may have wrapped around the seqno, and
* so our request->global_seqno will be ahead of the hardware,
* even though it completed the request before wrapping. We catch
* this by kicking all the waiters before resetting the seqno
* in hardware, and also signal the fence.
*/
if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &req->fence.flags))
return true;
/* The request was dequeued before we were awoken. We check after
* inspecting the hw to confirm that this was the same request
* that generated the HWS update. The memory barriers within
* the request execution are sufficient to ensure that a check
* after reading the value from hw matches this request.
*/
seqno = i915_gem_request_global_seqno(req);
if (!seqno)
return false;
/* Before we do the heavier coherent read of the seqno,
* check the value (hopefully) in the CPU cacheline.
*/
if (__i915_gem_request_completed(req))
if (__i915_gem_request_completed(req, seqno))
return true;
/* Ensure our read of the seqno is coherent so that we
......@@ -3975,9 +4085,9 @@ __i915_request_irq_complete(struct drm_i915_gem_request *req)
* is woken.
*/
if (engine->irq_seqno_barrier &&
rcu_access_pointer(engine->breadcrumbs.irq_seqno_bh) == current &&
cmpxchg_relaxed(&engine->breadcrumbs.irq_posted, 1, 0)) {
struct task_struct *tsk;
test_and_clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted)) {
struct intel_breadcrumbs *b = &engine->breadcrumbs;
unsigned long flags;
/* The ordering of irq_posted versus applying the barrier
* is crucial. The clearing of the current irq_posted must
......@@ -3999,19 +4109,18 @@ __i915_request_irq_complete(struct drm_i915_gem_request *req)
* the seqno before we believe it coherent since they see
* irq_posted == false but we are still running).
*/
rcu_read_lock();
tsk = rcu_dereference(engine->breadcrumbs.irq_seqno_bh);
if (tsk && tsk != current)
spin_lock_irqsave(&b->irq_lock, flags);
if (b->irq_wait && b->irq_wait->tsk != current)
/* Note that if the bottom-half is changed as we
* are sending the wake-up, the new bottom-half will
* be woken by whomever made the change. We only have
* to worry about when we steal the irq-posted for
* ourself.
*/
wake_up_process(tsk);
rcu_read_unlock();
wake_up_process(b->irq_wait->tsk);
spin_unlock_irqrestore(&b->irq_lock, flags);
if (__i915_gem_request_completed(req))
if (__i915_gem_request_completed(req, seqno))
return true;
}
......@@ -4042,4 +4151,10 @@ int remap_io_mapping(struct vm_area_struct *vma,
unsigned long addr, unsigned long pfn, unsigned long size,
struct io_mapping *iomap);
static inline bool i915_gem_object_is_coherent(struct drm_i915_gem_object *obj)
{
return (obj->cache_level != I915_CACHE_NONE ||
HAS_LLC(to_i915(obj->base.dev)));
}
#endif
此差异已折叠。
......@@ -28,9 +28,18 @@
#ifdef CONFIG_DRM_I915_DEBUG_GEM
#define GEM_BUG_ON(expr) BUG_ON(expr)
#define GEM_WARN_ON(expr) WARN_ON(expr)
#define GEM_DEBUG_DECL(var) var
#define GEM_DEBUG_EXEC(expr) expr
#define GEM_DEBUG_BUG_ON(expr) GEM_BUG_ON(expr)
#else
#define GEM_BUG_ON(expr) BUILD_BUG_ON_INVALID(expr)
#define GEM_WARN_ON(expr) (BUILD_BUG_ON_INVALID(expr), 0)
#define GEM_DEBUG_DECL(var)
#define GEM_DEBUG_EXEC(expr) do { } while (0)
#define GEM_DEBUG_BUG_ON(expr)
#endif
#define I915_NUM_ENGINES 5
......
......@@ -122,9 +122,9 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
if (tmp->base.size >= size) {
/* Clear the set of shared fences early */
ww_mutex_lock(&tmp->resv->lock, NULL);
reservation_object_lock(tmp->resv, NULL);
reservation_object_add_excl_fence(tmp->resv, NULL);
ww_mutex_unlock(&tmp->resv->lock);
reservation_object_unlock(tmp->resv);
obj = tmp;
break;
......
/*
* Copyright © 2016 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*
*/
#include "i915_drv.h"
#include "intel_frontbuffer.h"
#include "i915_gem_clflush.h"
static DEFINE_SPINLOCK(clflush_lock);
static u64 clflush_context;
struct clflush {
struct dma_fence dma; /* Must be first for dma_fence_free() */
struct i915_sw_fence wait;
struct work_struct work;
struct drm_i915_gem_object *obj;
};
static const char *i915_clflush_get_driver_name(struct dma_fence *fence)
{
return DRIVER_NAME;
}
static const char *i915_clflush_get_timeline_name(struct dma_fence *fence)
{
return "clflush";
}
static bool i915_clflush_enable_signaling(struct dma_fence *fence)
{
return true;
}
static void i915_clflush_release(struct dma_fence *fence)
{
struct clflush *clflush = container_of(fence, typeof(*clflush), dma);
i915_sw_fence_fini(&clflush->wait);
BUILD_BUG_ON(offsetof(typeof(*clflush), dma));
dma_fence_free(&clflush->dma);
}
static const struct dma_fence_ops i915_clflush_ops = {
.get_driver_name = i915_clflush_get_driver_name,
.get_timeline_name = i915_clflush_get_timeline_name,
.enable_signaling = i915_clflush_enable_signaling,
.wait = dma_fence_default_wait,
.release = i915_clflush_release,
};
static void __i915_do_clflush(struct drm_i915_gem_object *obj)
{
drm_clflush_sg(obj->mm.pages);
obj->cache_dirty = false;
intel_fb_obj_flush(obj, ORIGIN_CPU);
}
static void i915_clflush_work(struct work_struct *work)
{
struct clflush *clflush = container_of(work, typeof(*clflush), work);
struct drm_i915_gem_object *obj = clflush->obj;
if (!obj->cache_dirty)
goto out;
if (i915_gem_object_pin_pages(obj)) {
DRM_ERROR("Failed to acquire obj->pages for clflushing\n");
goto out;
}
__i915_do_clflush(obj);
i915_gem_object_unpin_pages(obj);
out:
i915_gem_object_put(obj);
dma_fence_signal(&clflush->dma);
dma_fence_put(&clflush->dma);
}
static int __i915_sw_fence_call
i915_clflush_notify(struct i915_sw_fence *fence,
enum i915_sw_fence_notify state)
{
struct clflush *clflush = container_of(fence, typeof(*clflush), wait);
switch (state) {
case FENCE_COMPLETE:
schedule_work(&clflush->work);
break;
case FENCE_FREE:
dma_fence_put(&clflush->dma);
break;
}
return NOTIFY_DONE;
}
void i915_gem_clflush_object(struct drm_i915_gem_object *obj,
unsigned int flags)
{
struct clflush *clflush;
/*
* Stolen memory is always coherent with the GPU as it is explicitly
* marked as wc by the system, or the system is cache-coherent.
* Similarly, we only access struct pages through the CPU cache, so
* anything not backed by physical memory we consider to be always
* coherent and not need clflushing.
*/
if (!i915_gem_object_has_struct_page(obj))
return;
obj->cache_dirty = true;
/* If the GPU is snooping the contents of the CPU cache,
* we do not need to manually clear the CPU cache lines. However,
* the caches are only snooped when the render cache is
* flushed/invalidated. As we always have to emit invalidations
* and flushes when moving into and out of the RENDER domain, correct
* snooping behaviour occurs naturally as the result of our domain
* tracking.
*/
if (!(flags & I915_CLFLUSH_FORCE) && i915_gem_object_is_coherent(obj))
return;
trace_i915_gem_object_clflush(obj);
clflush = NULL;
if (!(flags & I915_CLFLUSH_SYNC))
clflush = kmalloc(sizeof(*clflush), GFP_KERNEL);
if (clflush) {
dma_fence_init(&clflush->dma,
&i915_clflush_ops,
&clflush_lock,
clflush_context,
0);
i915_sw_fence_init(&clflush->wait, i915_clflush_notify);
clflush->obj = i915_gem_object_get(obj);
INIT_WORK(&clflush->work, i915_clflush_work);
dma_fence_get(&clflush->dma);
i915_sw_fence_await_reservation(&clflush->wait,
obj->resv, NULL,
false, I915_FENCE_TIMEOUT,
GFP_KERNEL);
reservation_object_lock(obj->resv, NULL);
reservation_object_add_excl_fence(obj->resv, &clflush->dma);
reservation_object_unlock(obj->resv);
i915_sw_fence_commit(&clflush->wait);
} else if (obj->mm.pages) {
__i915_do_clflush(obj);
} else {
GEM_BUG_ON(obj->base.write_domain != I915_GEM_DOMAIN_CPU);
}
}
void i915_gem_clflush_init(struct drm_i915_private *i915)
{
clflush_context = dma_fence_context_alloc(1);
}
/*
* Copyright © 2016 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*
*/
#ifndef __I915_GEM_CLFLUSH_H__
#define __I915_GEM_CLFLUSH_H__
struct drm_i915_private;
struct drm_i915_gem_object;
void i915_gem_clflush_init(struct drm_i915_private *i915);
void i915_gem_clflush_object(struct drm_i915_gem_object *obj,
unsigned int flags);
#define I915_CLFLUSH_FORCE BIT(0)
#define I915_CLFLUSH_SYNC BIT(1)
#endif /* __I915_GEM_CLFLUSH_H__ */
......@@ -92,21 +92,6 @@
#define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1
/* This is a HW constraint. The value below is the largest known requirement
* I've seen in a spec to date, and that was a workaround for a non-shipping
* part. It should be safe to decrease this, but it's more future proof as is.
*/
#define GEN6_CONTEXT_ALIGN (64<<10)
#define GEN7_CONTEXT_ALIGN I915_GTT_MIN_ALIGNMENT
static size_t get_context_alignment(struct drm_i915_private *dev_priv)
{
if (IS_GEN6(dev_priv))
return GEN6_CONTEXT_ALIGN;
return GEN7_CONTEXT_ALIGN;
}
static int get_context_size(struct drm_i915_private *dev_priv)
{
int ret;
......@@ -236,6 +221,30 @@ static int assign_hw_id(struct drm_i915_private *dev_priv, unsigned *out)
return 0;
}
static u32 default_desc_template(const struct drm_i915_private *i915,
const struct i915_hw_ppgtt *ppgtt)
{
u32 address_mode;
u32 desc;
desc = GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE;
address_mode = INTEL_LEGACY_32B_CONTEXT;
if (ppgtt && i915_vm_is_48bit(&ppgtt->base))
address_mode = INTEL_LEGACY_64B_CONTEXT;
desc |= address_mode << GEN8_CTX_ADDRESSING_MODE_SHIFT;
if (IS_GEN8(i915))
desc |= GEN8_CTX_L3LLC_COHERENT;
/* TODO: WaDisableLiteRestore when we start using semaphore
* signalling between Command Streamers
* ring->ctx_desc_template |= GEN8_CTX_FORCE_RESTORE;
*/
return desc;
}
static struct i915_gem_context *
__create_hw_context(struct drm_i915_private *dev_priv,
struct drm_i915_file_private *file_priv)
......@@ -257,8 +266,6 @@ __create_hw_context(struct drm_i915_private *dev_priv,
list_add_tail(&ctx->link, &dev_priv->context_list);
ctx->i915 = dev_priv;
ctx->ggtt_alignment = get_context_alignment(dev_priv);
if (dev_priv->hw_context_size) {
struct drm_i915_gem_object *obj;
struct i915_vma *vma;
......@@ -309,8 +316,8 @@ __create_hw_context(struct drm_i915_private *dev_priv,
i915_gem_context_set_bannable(ctx);
ctx->ring_size = 4 * PAGE_SIZE;
ctx->desc_template = GEN8_CTX_ADDRESSING_MODE(dev_priv) <<
GEN8_CTX_ADDRESSING_MODE_SHIFT;
ctx->desc_template =
default_desc_template(dev_priv, dev_priv->mm.aliasing_ppgtt);
ATOMIC_INIT_NOTIFIER_HEAD(&ctx->status_notifier);
/* GuC requires the ring to be placed above GUC_WOPCM_TOP. If GuC is not
......@@ -332,6 +339,13 @@ __create_hw_context(struct drm_i915_private *dev_priv,
return ERR_PTR(ret);
}
static void __destroy_hw_context(struct i915_gem_context *ctx,
struct drm_i915_file_private *file_priv)
{
idr_remove(&file_priv->context_idr, ctx->user_handle);
context_close(ctx);
}
/**
* The default context needs to exist per ring that uses contexts. It stores the
* context state of the GPU for applications that don't utilize HW contexts, as
......@@ -356,12 +370,12 @@ i915_gem_create_context(struct drm_i915_private *dev_priv,
if (IS_ERR(ppgtt)) {
DRM_DEBUG_DRIVER("PPGTT setup failed (%ld)\n",
PTR_ERR(ppgtt));
idr_remove(&file_priv->context_idr, ctx->user_handle);
context_close(ctx);
__destroy_hw_context(ctx, file_priv);
return ERR_CAST(ppgtt);
}
ctx->ppgtt = ppgtt;
ctx->desc_template = default_desc_template(dev_priv, ppgtt);
}
trace_i915_context_create(ctx);
......@@ -400,7 +414,8 @@ i915_gem_context_create_gvt(struct drm_device *dev)
i915_gem_context_set_closed(ctx); /* not user accessible */
i915_gem_context_clear_bannable(ctx);
i915_gem_context_set_force_single_submission(ctx);
ctx->ring_size = 512 * PAGE_SIZE; /* Max ring buffer size */
if (!i915.enable_guc_submission)
ctx->ring_size = 512 * PAGE_SIZE; /* Max ring buffer size */
GEM_BUG_ON(i915_gem_context_is_kernel(ctx));
out:
......@@ -451,6 +466,11 @@ int i915_gem_context_init(struct drm_i915_private *dev_priv)
return PTR_ERR(ctx);
}
/* For easy recognisablity, we want the kernel context to be 0 and then
* all user contexts will have non-zero hw_id.
*/
GEM_BUG_ON(ctx->hw_id);
i915_gem_context_clear_bannable(ctx);
ctx->priority = I915_PRIORITY_MIN; /* lowest priority; idle task */
dev_priv->kernel_context = ctx;
......@@ -560,27 +580,15 @@ static inline int
mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags)
{
struct drm_i915_private *dev_priv = req->i915;
struct intel_ring *ring = req->ring;
struct intel_engine_cs *engine = req->engine;
enum intel_engine_id id;
u32 flags = hw_flags | MI_MM_SPACE_GTT;
u32 *cs, flags = hw_flags | MI_MM_SPACE_GTT;
const int num_rings =
/* Use an extended w/a on ivb+ if signalling from other rings */
i915.semaphores ?
INTEL_INFO(dev_priv)->num_rings - 1 :
0;
int len, ret;
/* w/a: If Flush TLB Invalidation Mode is enabled, driver must do a TLB
* invalidation prior to MI_SET_CONTEXT. On GEN6 we don't set the value
* explicitly, so we rely on the value at ring init, stored in
* itlb_before_ctx_switch.
*/
if (IS_GEN6(dev_priv)) {
ret = engine->emit_flush(req, EMIT_INVALIDATE);
if (ret)
return ret;
}
int len;
/* These flags are for resource streamer on HSW+ */
if (IS_HASWELL(dev_priv) || INTEL_GEN(dev_priv) >= 8)
......@@ -593,99 +601,92 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags)
if (INTEL_GEN(dev_priv) >= 7)
len += 2 + (num_rings ? 4*num_rings + 6 : 0);
ret = intel_ring_begin(req, len);
if (ret)
return ret;
cs = intel_ring_begin(req, len);
if (IS_ERR(cs))
return PTR_ERR(cs);
/* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */
if (INTEL_GEN(dev_priv) >= 7) {
intel_ring_emit(ring, MI_ARB_ON_OFF | MI_ARB_DISABLE);
*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
if (num_rings) {
struct intel_engine_cs *signaller;
intel_ring_emit(ring,
MI_LOAD_REGISTER_IMM(num_rings));
*cs++ = MI_LOAD_REGISTER_IMM(num_rings);
for_each_engine(signaller, dev_priv, id) {
if (signaller == engine)
continue;
intel_ring_emit_reg(ring,
RING_PSMI_CTL(signaller->mmio_base));
intel_ring_emit(ring,
_MASKED_BIT_ENABLE(GEN6_PSMI_SLEEP_MSG_DISABLE));
*cs++ = i915_mmio_reg_offset(
RING_PSMI_CTL(signaller->mmio_base));
*cs++ = _MASKED_BIT_ENABLE(
GEN6_PSMI_SLEEP_MSG_DISABLE);
}
}
}
intel_ring_emit(ring, MI_NOOP);
intel_ring_emit(ring, MI_SET_CONTEXT);
intel_ring_emit(ring,
i915_ggtt_offset(req->ctx->engine[RCS].state) | flags);
*cs++ = MI_NOOP;
*cs++ = MI_SET_CONTEXT;
*cs++ = i915_ggtt_offset(req->ctx->engine[RCS].state) | flags;
/*
* w/a: MI_SET_CONTEXT must always be followed by MI_NOOP
* WaMiSetContext_Hang:snb,ivb,vlv
*/
intel_ring_emit(ring, MI_NOOP);
*cs++ = MI_NOOP;
if (INTEL_GEN(dev_priv) >= 7) {
if (num_rings) {
struct intel_engine_cs *signaller;
i915_reg_t last_reg = {}; /* keep gcc quiet */
intel_ring_emit(ring,
MI_LOAD_REGISTER_IMM(num_rings));
*cs++ = MI_LOAD_REGISTER_IMM(num_rings);
for_each_engine(signaller, dev_priv, id) {
if (signaller == engine)
continue;
last_reg = RING_PSMI_CTL(signaller->mmio_base);
intel_ring_emit_reg(ring, last_reg);
intel_ring_emit(ring,
_MASKED_BIT_DISABLE(GEN6_PSMI_SLEEP_MSG_DISABLE));
*cs++ = i915_mmio_reg_offset(last_reg);
*cs++ = _MASKED_BIT_DISABLE(
GEN6_PSMI_SLEEP_MSG_DISABLE);
}
/* Insert a delay before the next switch! */
intel_ring_emit(ring,
MI_STORE_REGISTER_MEM |
MI_SRM_LRM_GLOBAL_GTT);
intel_ring_emit_reg(ring, last_reg);
intel_ring_emit(ring,
i915_ggtt_offset(engine->scratch));
intel_ring_emit(ring, MI_NOOP);
*cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
*cs++ = i915_mmio_reg_offset(last_reg);
*cs++ = i915_ggtt_offset(engine->scratch);
*cs++ = MI_NOOP;
}
intel_ring_emit(ring, MI_ARB_ON_OFF | MI_ARB_ENABLE);
*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
}
intel_ring_advance(ring);
intel_ring_advance(req, cs);
return ret;
return 0;
}
static int remap_l3(struct drm_i915_gem_request *req, int slice)
{
u32 *remap_info = req->i915->l3_parity.remap_info[slice];
struct intel_ring *ring = req->ring;
int i, ret;
u32 *cs, *remap_info = req->i915->l3_parity.remap_info[slice];
int i;
if (!remap_info)
return 0;
ret = intel_ring_begin(req, GEN7_L3LOG_SIZE/4 * 2 + 2);
if (ret)
return ret;
cs = intel_ring_begin(req, GEN7_L3LOG_SIZE/4 * 2 + 2);
if (IS_ERR(cs))
return PTR_ERR(cs);
/*
* Note: We do not worry about the concurrent register cacheline hang
* here because no other code should access these registers other than
* at initialization time.
*/
intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(GEN7_L3LOG_SIZE/4));
*cs++ = MI_LOAD_REGISTER_IMM(GEN7_L3LOG_SIZE/4);
for (i = 0; i < GEN7_L3LOG_SIZE/4; i++) {
intel_ring_emit_reg(ring, GEN7_L3LOG(slice, i));
intel_ring_emit(ring, remap_info[i]);
*cs++ = i915_mmio_reg_offset(GEN7_L3LOG(slice, i));
*cs++ = remap_info[i];
}
intel_ring_emit(ring, MI_NOOP);
intel_ring_advance(ring);
*cs++ = MI_NOOP;
intel_ring_advance(req, cs);
return 0;
}
......@@ -1014,8 +1015,7 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
return PTR_ERR(ctx);
}
idr_remove(&file_priv->context_idr, ctx->user_handle);
context_close(ctx);
__destroy_hw_context(ctx, file_priv);
mutex_unlock(&dev->struct_mutex);
DRM_DEBUG("HW context %d destroyed\n", args->ctx_id);
......@@ -1164,3 +1164,8 @@ int i915_gem_context_reset_stats_ioctl(struct drm_device *dev,
return 0;
}
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "selftests/mock_context.c"
#include "selftests/i915_gem_context.c"
#endif
......@@ -140,8 +140,6 @@ struct i915_gem_context {
*/
int priority;
/** ggtt_alignment: alignment restriction for context objects */
u32 ggtt_alignment;
/** ggtt_offset_bias: placement restriction for context objects */
u32 ggtt_offset_bias;
......
......@@ -307,3 +307,8 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
return ERR_PTR(ret);
}
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "selftests/mock_dmabuf.c"
#include "selftests/i915_gem_dmabuf.c"
#endif
......@@ -258,6 +258,9 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
int ret = 0;
lockdep_assert_held(&vm->i915->drm.struct_mutex);
GEM_BUG_ON(!IS_ALIGNED(start, I915_GTT_PAGE_SIZE));
GEM_BUG_ON(!IS_ALIGNED(end, I915_GTT_PAGE_SIZE));
trace_i915_gem_evict_node(vm, target, flags);
/* Retire before we search the active list. Although we have
......@@ -271,11 +274,13 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
check_color = vm->mm.color_adjust;
if (check_color) {
/* Expand search to cover neighbouring guard pages (or lack!) */
if (start > vm->start)
if (start)
start -= I915_GTT_PAGE_SIZE;
if (end < vm->start + vm->total)
end += I915_GTT_PAGE_SIZE;
/* Always look at the page afterwards to avoid the end-of-GTT */
end += I915_GTT_PAGE_SIZE;
}
GEM_BUG_ON(start >= end);
drm_mm_for_each_node_in_range(node, &vm->mm, start, end) {
/* If we find any non-objects (!vma), we cannot evict them */
......@@ -284,6 +289,7 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
break;
}
GEM_BUG_ON(!node->allocated);
vma = container_of(node, typeof(*vma), node);
/* If we are using coloring to insert guard pages between
......@@ -387,3 +393,7 @@ int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle)
return 0;
}
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "selftests/i915_gem_evict.c"
#endif
......@@ -28,12 +28,14 @@
#include <linux/dma_remapping.h>
#include <linux/reservation.h>
#include <linux/sync_file.h>
#include <linux/uaccess.h>
#include <drm/drmP.h>
#include <drm/i915_drm.h>
#include "i915_drv.h"
#include "i915_gem_clflush.h"
#include "i915_trace.h"
#include "intel_drv.h"
#include "intel_frontbuffer.h"
......@@ -1110,13 +1112,18 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
list_for_each_entry(vma, vmas, exec_list) {
struct drm_i915_gem_object *obj = vma->obj;
if (vma->exec_entry->flags & EXEC_OBJECT_ASYNC)
continue;
if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) {
i915_gem_clflush_object(obj, 0);
obj->base.write_domain = 0;
}
ret = i915_gem_request_await_object
(req, obj, obj->base.pending_write_domain);
if (ret)
return ret;
if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
i915_gem_clflush_object(obj, false);
}
/* Unconditionally flush any chipset caches (for streaming writes). */
......@@ -1297,12 +1304,12 @@ static void eb_export_fence(struct drm_i915_gem_object *obj,
* handle an error right now. Worst case should be missed
* synchronisation leading to rendering corruption.
*/
ww_mutex_lock(&resv->lock, NULL);
reservation_object_lock(resv, NULL);
if (flags & EXEC_OBJECT_WRITE)
reservation_object_add_excl_fence(resv, &req->fence);
else if (reservation_object_reserve_shared(resv) == 0)
reservation_object_add_shared_fence(resv, &req->fence);
ww_mutex_unlock(&resv->lock);
reservation_object_unlock(resv);
}
static void
......@@ -1313,8 +1320,6 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas,
list_for_each_entry(vma, vmas, exec_list) {
struct drm_i915_gem_object *obj = vma->obj;
u32 old_read = obj->base.read_domains;
u32 old_write = obj->base.write_domain;
obj->base.write_domain = obj->base.pending_write_domain;
if (obj->base.write_domain)
......@@ -1325,32 +1330,31 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas,
i915_vma_move_to_active(vma, req, vma->exec_entry->flags);
eb_export_fence(obj, req, vma->exec_entry->flags);
trace_i915_gem_object_change_domain(obj, old_read, old_write);
}
}
static int
i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req)
{
struct intel_ring *ring = req->ring;
int ret, i;
u32 *cs;
int i;
if (!IS_GEN7(req->i915) || req->engine->id != RCS) {
DRM_DEBUG("sol reset is gen7/rcs only\n");
return -EINVAL;
}
ret = intel_ring_begin(req, 4 * 3);
if (ret)
return ret;
cs = intel_ring_begin(req, 4 * 3);
if (IS_ERR(cs))
return PTR_ERR(cs);
for (i = 0; i < 4; i++) {
intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
intel_ring_emit_reg(ring, GEN7_SO_WRITE_OFFSET(i));
intel_ring_emit(ring, 0);
*cs++ = MI_LOAD_REGISTER_IMM(1);
*cs++ = i915_mmio_reg_offset(GEN7_SO_WRITE_OFFSET(i));
*cs++ = 0;
}
intel_ring_advance(ring);
intel_ring_advance(req, cs);
return 0;
}
......@@ -1403,15 +1407,20 @@ i915_gem_execbuffer_parse(struct intel_engine_cs *engine,
return vma;
}
static void
add_to_client(struct drm_i915_gem_request *req,
struct drm_file *file)
{
req->file_priv = file->driver_priv;
list_add_tail(&req->client_link, &req->file_priv->mm.request_list);
}
static int
execbuf_submit(struct i915_execbuffer_params *params,
struct drm_i915_gem_execbuffer2 *args,
struct list_head *vmas)
{
struct drm_i915_private *dev_priv = params->request->i915;
u64 exec_start, exec_len;
int instp_mode;
u32 instp_mask;
int ret;
ret = i915_gem_execbuffer_move_to_gpu(params->request, vmas);
......@@ -1422,56 +1431,11 @@ execbuf_submit(struct i915_execbuffer_params *params,
if (ret)
return ret;
instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK;
instp_mask = I915_EXEC_CONSTANTS_MASK;
switch (instp_mode) {
case I915_EXEC_CONSTANTS_REL_GENERAL:
case I915_EXEC_CONSTANTS_ABSOLUTE:
case I915_EXEC_CONSTANTS_REL_SURFACE:
if (instp_mode != 0 && params->engine->id != RCS) {
DRM_DEBUG("non-0 rel constants mode on non-RCS\n");
return -EINVAL;
}
if (instp_mode != dev_priv->relative_constants_mode) {
if (INTEL_INFO(dev_priv)->gen < 4) {
DRM_DEBUG("no rel constants on pre-gen4\n");
return -EINVAL;
}
if (INTEL_INFO(dev_priv)->gen > 5 &&
instp_mode == I915_EXEC_CONSTANTS_REL_SURFACE) {
DRM_DEBUG("rel surface constants mode invalid on gen5+\n");
return -EINVAL;
}
/* The HW changed the meaning on this bit on gen6 */
if (INTEL_INFO(dev_priv)->gen >= 6)
instp_mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE;
}
break;
default:
DRM_DEBUG("execbuf with unknown constants: %d\n", instp_mode);
if (args->flags & I915_EXEC_CONSTANTS_MASK) {
DRM_DEBUG("I915_EXEC_CONSTANTS_* unsupported\n");
return -EINVAL;
}
if (params->engine->id == RCS &&
instp_mode != dev_priv->relative_constants_mode) {
struct intel_ring *ring = params->request->ring;
ret = intel_ring_begin(params->request, 4);
if (ret)
return ret;
intel_ring_emit(ring, MI_NOOP);
intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
intel_ring_emit_reg(ring, INSTPM);
intel_ring_emit(ring, instp_mask << 16 | instp_mode);
intel_ring_advance(ring);
dev_priv->relative_constants_mode = instp_mode;
}
if (args->flags & I915_EXEC_GEN7_SOL_RESET) {
ret = i915_reset_gen7_sol_offsets(params->request);
if (ret)
......@@ -1491,8 +1455,6 @@ execbuf_submit(struct i915_execbuffer_params *params,
if (ret)
return ret;
trace_i915_gem_ring_dispatch(params->request, params->dispatch_flags);
i915_gem_execbuffer_move_to_active(vmas, params->request);
return 0;
......@@ -1591,6 +1553,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
struct i915_execbuffer_params *params = &params_master;
const u32 ctx_id = i915_execbuffer2_get_context_id(*args);
u32 dispatch_flags;
struct dma_fence *in_fence = NULL;
struct sync_file *out_fence = NULL;
int out_fence_fd = -1;
int ret;
bool need_relocs;
......@@ -1634,6 +1599,20 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
dispatch_flags |= I915_DISPATCH_RS;
}
if (args->flags & I915_EXEC_FENCE_IN) {
in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
if (!in_fence)
return -EINVAL;
}
if (args->flags & I915_EXEC_FENCE_OUT) {
out_fence_fd = get_unused_fd_flags(O_CLOEXEC);
if (out_fence_fd < 0) {
ret = out_fence_fd;
goto err_in_fence;
}
}
/* Take a local wakeref for preparing to dispatch the execbuf as
* we expect to access the hardware fairly frequently in the
* process. Upon first dispatch, we acquire another prolonged
......@@ -1778,6 +1757,21 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
goto err_batch_unpin;
}
if (in_fence) {
ret = i915_gem_request_await_dma_fence(params->request,
in_fence);
if (ret < 0)
goto err_request;
}
if (out_fence_fd != -1) {
out_fence = sync_file_create(&params->request->fence);
if (!out_fence) {
ret = -ENOMEM;
goto err_request;
}
}
/* Whilst this request exists, batch_obj will be on the
* active_list, and so will hold the active reference. Only when this
* request is retired will the the batch_obj be moved onto the
......@@ -1786,10 +1780,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
*/
params->request->batch = params->batch;
ret = i915_gem_request_add_to_client(params->request, file);
if (ret)
goto err_request;
/*
* Save assorted stuff away to pass through to *_submission().
* NB: This data should be 'persistent' and not local as it will
......@@ -1802,9 +1792,23 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
params->dispatch_flags = dispatch_flags;
params->ctx = ctx;
trace_i915_gem_request_queue(params->request, dispatch_flags);
ret = execbuf_submit(params, args, &eb->vmas);
err_request:
__i915_add_request(params->request, ret == 0);
add_to_client(params->request, file);
if (out_fence) {
if (ret == 0) {
fd_install(out_fence_fd, out_fence->file);
args->rsvd2 &= GENMASK_ULL(0, 31); /* keep in-fence */
args->rsvd2 |= (u64)out_fence_fd << 32;
out_fence_fd = -1;
} else {
fput(out_fence->file);
}
}
err_batch_unpin:
/*
......@@ -1826,6 +1830,10 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
/* intel_gpu_busy should also get a ref, so it will free when the device
* is really idle. */
intel_runtime_pm_put(dev_priv);
if (out_fence_fd != -1)
put_unused_fd(out_fence_fd);
err_in_fence:
dma_fence_put(in_fence);
return ret;
}
......@@ -1933,11 +1941,6 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data,
return -EINVAL;
}
if (args->rsvd2 != 0) {
DRM_DEBUG("dirty rvsd2 field\n");
return -EINVAL;
}
exec2_list = drm_malloc_gfp(args->buffer_count,
sizeof(*exec2_list),
GFP_TEMPORARY);
......
此差异已折叠。
......@@ -36,9 +36,11 @@
#include <linux/io-mapping.h>
#include <linux/mm.h>
#include <linux/pagevec.h>
#include "i915_gem_timeline.h"
#include "i915_gem_request.h"
#include "i915_selftest.h"
#define I915_GTT_PAGE_SIZE 4096UL
#define I915_GTT_MIN_ALIGNMENT I915_GTT_PAGE_SIZE
......@@ -51,11 +53,11 @@
struct drm_i915_file_private;
struct drm_i915_fence_reg;
typedef uint32_t gen6_pte_t;
typedef uint64_t gen8_pte_t;
typedef uint64_t gen8_pde_t;
typedef uint64_t gen8_ppgtt_pdpe_t;
typedef uint64_t gen8_ppgtt_pml4e_t;
typedef u32 gen6_pte_t;
typedef u64 gen8_pte_t;
typedef u64 gen8_pde_t;
typedef u64 gen8_ppgtt_pdpe_t;
typedef u64 gen8_ppgtt_pml4e_t;
#define ggtt_total_entries(ggtt) ((ggtt)->base.total >> PAGE_SHIFT)
......@@ -67,7 +69,7 @@ typedef uint64_t gen8_ppgtt_pml4e_t;
#define GEN6_PTE_UNCACHED (1 << 1)
#define GEN6_PTE_VALID (1 << 0)
#define I915_PTES(pte_len) (PAGE_SIZE / (pte_len))
#define I915_PTES(pte_len) ((unsigned int)(PAGE_SIZE / (pte_len)))
#define I915_PTE_MASK(pte_len) (I915_PTES(pte_len) - 1)
#define I915_PDES 512
#define I915_PDE_MASK (I915_PDES - 1)
......@@ -99,13 +101,20 @@ typedef uint64_t gen8_ppgtt_pml4e_t;
#define HSW_GTT_ADDR_ENCODE(addr) ((addr) | (((addr) >> 28) & 0x7f0))
#define HSW_PTE_ADDR_ENCODE(addr) HSW_GTT_ADDR_ENCODE(addr)
/* GEN8 legacy style address is defined as a 3 level page table:
/* GEN8 32b style address is defined as a 3 level page table:
* 31:30 | 29:21 | 20:12 | 11:0
* PDPE | PDE | PTE | offset
* The difference as compared to normal x86 3 level page table is the PDPEs are
* programmed via register.
*
* GEN8 48b legacy style address is defined as a 4 level page table:
*/
#define GEN8_3LVL_PDPES 4
#define GEN8_PDE_SHIFT 21
#define GEN8_PDE_MASK 0x1ff
#define GEN8_PTE_SHIFT 12
#define GEN8_PTE_MASK 0x1ff
#define GEN8_PTES I915_PTES(sizeof(gen8_pte_t))
/* GEN8 48b style address is defined as a 4 level page table:
* 47:39 | 38:30 | 29:21 | 20:12 | 11:0
* PML4E | PDPE | PDE | PTE | offset
*/
......@@ -116,15 +125,6 @@ typedef uint64_t gen8_ppgtt_pml4e_t;
/* NB: GEN8_PDPE_MASK is untrue for 32b platforms, but it has no impact on 32b page
* tables */
#define GEN8_PDPE_MASK 0x1ff
#define GEN8_PDE_SHIFT 21
#define GEN8_PDE_MASK 0x1ff
#define GEN8_PTE_SHIFT 12
#define GEN8_PTE_MASK 0x1ff
#define GEN8_LEGACY_PDPES 4
#define GEN8_PTES I915_PTES(sizeof(gen8_pte_t))
#define I915_PDPES_PER_PDP(dev_priv) (USES_FULL_48BIT_PPGTT(dev_priv) ?\
GEN8_PML4ES_PER_PML4 : GEN8_LEGACY_PDPES)
#define PPAT_UNCACHED_INDEX (_PAGE_PWT | _PAGE_PCD)
#define PPAT_CACHED_PDE_INDEX 0 /* WB LLC */
......@@ -141,7 +141,7 @@ typedef uint64_t gen8_ppgtt_pml4e_t;
#define GEN8_PPAT_WC (1<<0)
#define GEN8_PPAT_UC (0<<0)
#define GEN8_PPAT_ELLC_OVERRIDE (0<<2)
#define GEN8_PPAT(i, x) ((uint64_t) (x) << ((i) * 8))
#define GEN8_PPAT(i, x) ((u64)(x) << ((i) * 8))
struct sg_table;
......@@ -208,7 +208,7 @@ struct i915_page_dma {
/* For gen6/gen7 only. This is the offset in the GGTT
* where the page directory entries for PPGTT begin
*/
uint32_t ggtt_offset;
u32 ggtt_offset;
};
};
......@@ -218,28 +218,24 @@ struct i915_page_dma {
struct i915_page_table {
struct i915_page_dma base;
unsigned long *used_ptes;
unsigned int used_ptes;
};
struct i915_page_directory {
struct i915_page_dma base;
unsigned long *used_pdes;
struct i915_page_table *page_table[I915_PDES]; /* PDEs */
unsigned int used_pdes;
};
struct i915_page_directory_pointer {
struct i915_page_dma base;
unsigned long *used_pdpes;
struct i915_page_directory **page_directory;
unsigned int used_pdpes;
};
struct i915_pml4 {
struct i915_page_dma base;
DECLARE_BITMAP(used_pml4es, GEN8_PML4ES_PER_PML4);
struct i915_page_directory_pointer *pdps[GEN8_PML4ES_PER_PML4];
};
......@@ -247,6 +243,7 @@ struct i915_address_space {
struct drm_mm mm;
struct i915_gem_timeline timeline;
struct drm_i915_private *i915;
struct device *dma;
/* Every address space belongs to a struct file - except for the global
* GTT that is owned by the driver (and so @file is set to NULL). In
* principle, no information should leak from one context to another
......@@ -257,7 +254,6 @@ struct i915_address_space {
*/
struct drm_i915_file_private *file;
struct list_head global_link;
u64 start; /* Start offset always 0 for dri2 */
u64 total; /* size addr space maps (ex. 2GB for ggtt) */
bool closed;
......@@ -297,6 +293,9 @@ struct i915_address_space {
*/
struct list_head unbound_list;
struct pagevec free_pages;
bool pt_kmap_wc;
/* FIXME: Need a more generic return type */
gen6_pte_t (*pte_encode)(dma_addr_t addr,
enum i915_cache_level level,
......@@ -304,20 +303,19 @@ struct i915_address_space {
/* flags for pte_encode */
#define PTE_READ_ONLY (1<<0)
int (*allocate_va_range)(struct i915_address_space *vm,
uint64_t start,
uint64_t length);
u64 start, u64 length);
void (*clear_range)(struct i915_address_space *vm,
uint64_t start,
uint64_t length);
u64 start, u64 length);
void (*insert_page)(struct i915_address_space *vm,
dma_addr_t addr,
uint64_t offset,
u64 offset,
enum i915_cache_level cache_level,
u32 flags);
void (*insert_entries)(struct i915_address_space *vm,
struct sg_table *st,
uint64_t start,
enum i915_cache_level cache_level, u32 flags);
u64 start,
enum i915_cache_level cache_level,
u32 flags);
void (*cleanup)(struct i915_address_space *vm);
/** Unmap an object from an address space. This usually consists of
* setting the valid PTE entries to a reserved scratch page. */
......@@ -326,10 +324,18 @@ struct i915_address_space {
int (*bind_vma)(struct i915_vma *vma,
enum i915_cache_level cache_level,
u32 flags);
I915_SELFTEST_DECLARE(struct fault_attr fault_attr);
};
#define i915_is_ggtt(V) (!(V)->file)
static inline bool
i915_vm_is_48bit(const struct i915_address_space *vm)
{
return (vm->total - 1) >> 32;
}
/* The Graphics Translation Table is the way in which GEN hardware translates a
* Graphics Virtual Address into a Physical Address. In addition to the normal
* collateral associated with any va->pa translations GEN hardware also has a
......@@ -381,7 +387,6 @@ struct i915_hw_ppgtt {
gen6_pte_t __iomem *pd_addr;
int (*enable)(struct i915_hw_ppgtt *ppgtt);
int (*switch_mm)(struct i915_hw_ppgtt *ppgtt,
struct drm_i915_gem_request *req);
void (*debug_dump)(struct i915_hw_ppgtt *ppgtt, struct seq_file *m);
......@@ -409,9 +414,9 @@ struct i915_hw_ppgtt {
(pt = (pd)->page_table[iter], true); \
++iter)
static inline uint32_t i915_pte_index(uint64_t address, uint32_t pde_shift)
static inline u32 i915_pte_index(u64 address, unsigned int pde_shift)
{
const uint32_t mask = NUM_PTE(pde_shift) - 1;
const u32 mask = NUM_PTE(pde_shift) - 1;
return (address >> PAGE_SHIFT) & mask;
}
......@@ -420,11 +425,10 @@ static inline uint32_t i915_pte_index(uint64_t address, uint32_t pde_shift)
* does not cross a page table boundary, so the max value would be
* GEN6_PTES for GEN6, and GEN8_PTES for GEN8.
*/
static inline uint32_t i915_pte_count(uint64_t addr, size_t length,
uint32_t pde_shift)
static inline u32 i915_pte_count(u64 addr, u64 length, unsigned int pde_shift)
{
const uint64_t mask = ~((1ULL << pde_shift) - 1);
uint64_t end;
const u64 mask = ~((1ULL << pde_shift) - 1);
u64 end;
WARN_ON(length == 0);
WARN_ON(offset_in_page(addr|length));
......@@ -437,26 +441,35 @@ static inline uint32_t i915_pte_count(uint64_t addr, size_t length,
return i915_pte_index(end, pde_shift) - i915_pte_index(addr, pde_shift);
}
static inline uint32_t i915_pde_index(uint64_t addr, uint32_t shift)
static inline u32 i915_pde_index(u64 addr, u32 shift)
{
return (addr >> shift) & I915_PDE_MASK;
}
static inline uint32_t gen6_pte_index(uint32_t addr)
static inline u32 gen6_pte_index(u32 addr)
{
return i915_pte_index(addr, GEN6_PDE_SHIFT);
}
static inline size_t gen6_pte_count(uint32_t addr, uint32_t length)
static inline u32 gen6_pte_count(u32 addr, u32 length)
{
return i915_pte_count(addr, length, GEN6_PDE_SHIFT);
}
static inline uint32_t gen6_pde_index(uint32_t addr)
static inline u32 gen6_pde_index(u32 addr)
{
return i915_pde_index(addr, GEN6_PDE_SHIFT);
}
static inline unsigned int
i915_pdpes_per_pdp(const struct i915_address_space *vm)
{
if (i915_vm_is_48bit(vm))
return GEN8_PML4ES_PER_PML4;
return GEN8_3LVL_PDPES;
}
/* Equivalent to the gen6 version, For each pde iterates over every pde
* between from start until start + length. On gen8+ it simply iterates
* over every page directory entry in a page directory.
......@@ -471,7 +484,7 @@ static inline uint32_t gen6_pde_index(uint32_t addr)
#define gen8_for_each_pdpe(pd, pdp, start, length, iter) \
for (iter = gen8_pdpe_index(start); \
length > 0 && iter < I915_PDPES_PER_PDP(dev) && \
length > 0 && iter < i915_pdpes_per_pdp(vm) && \
(pd = (pdp)->page_directory[iter], true); \
({ u64 temp = ALIGN(start+1, 1 << GEN8_PDPE_SHIFT); \
temp = min(temp - start, length); \
......@@ -485,27 +498,27 @@ static inline uint32_t gen6_pde_index(uint32_t addr)
temp = min(temp - start, length); \
start += temp, length -= temp; }), ++iter)
static inline uint32_t gen8_pte_index(uint64_t address)
static inline u32 gen8_pte_index(u64 address)
{
return i915_pte_index(address, GEN8_PDE_SHIFT);
}
static inline uint32_t gen8_pde_index(uint64_t address)
static inline u32 gen8_pde_index(u64 address)
{
return i915_pde_index(address, GEN8_PDE_SHIFT);
}
static inline uint32_t gen8_pdpe_index(uint64_t address)
static inline u32 gen8_pdpe_index(u64 address)
{
return (address >> GEN8_PDPE_SHIFT) & GEN8_PDPE_MASK;
}
static inline uint32_t gen8_pml4e_index(uint64_t address)
static inline u32 gen8_pml4e_index(u64 address)
{
return (address >> GEN8_PML4E_SHIFT) & GEN8_PML4E_MASK;
}
static inline size_t gen8_pte_count(uint64_t address, uint64_t length)
static inline u64 gen8_pte_count(u64 address, u64 length)
{
return i915_pte_count(address, length, GEN8_PDE_SHIFT);
}
......@@ -513,9 +526,7 @@ static inline size_t gen8_pte_count(uint64_t address, uint64_t length)
static inline dma_addr_t
i915_page_dir_dma_addr(const struct i915_hw_ppgtt *ppgtt, const unsigned n)
{
return test_bit(n, ppgtt->pdp.used_pdpes) ?
px_dma(ppgtt->pdp.page_directory[n]) :
px_dma(ppgtt->base.scratch_pd);
return px_dma(ppgtt->pdp.page_directory[n]);
}
static inline struct i915_ggtt *
......@@ -525,6 +536,9 @@ i915_vm_to_ggtt(struct i915_address_space *vm)
return container_of(vm, struct i915_ggtt, base);
}
int i915_gem_init_aliasing_ppgtt(struct drm_i915_private *i915);
void i915_gem_fini_aliasing_ppgtt(struct drm_i915_private *i915);
int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv);
int i915_ggtt_init_hw(struct drm_i915_private *dev_priv);
int i915_ggtt_enable_hw(struct drm_i915_private *dev_priv);
......
......@@ -35,8 +35,10 @@ static void internal_free_pages(struct sg_table *st)
{
struct scatterlist *sg;
for (sg = st->sgl; sg; sg = __sg_next(sg))
__free_pages(sg_page(sg), get_order(sg->length));
for (sg = st->sgl; sg; sg = __sg_next(sg)) {
if (sg_page(sg))
__free_pages(sg_page(sg), get_order(sg->length));
}
sg_free_table(st);
kfree(st);
......@@ -133,6 +135,7 @@ i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj)
return st;
err:
sg_set_page(sg, NULL, 0, 0);
sg_mark_end(sg);
internal_free_pages(st);
return ERR_PTR(-ENOMEM);
......
......@@ -33,6 +33,8 @@
#include <drm/i915_drm.h>
#include "i915_selftest.h"
struct drm_i915_gem_object_ops {
unsigned int flags;
#define I915_GEM_OBJECT_HAS_STRUCT_PAGE 0x1
......@@ -84,6 +86,7 @@ struct drm_i915_gem_object {
struct list_head obj_exec_link;
struct list_head batch_pool_link;
I915_SELFTEST_DECLARE(struct list_head st_link);
unsigned long flags;
......@@ -162,19 +165,23 @@ struct drm_i915_gem_object {
struct reservation_object *resv;
/** References from framebuffers, locks out tiling changes. */
unsigned long framebuffer_references;
unsigned int framebuffer_references;
/** Record of address bit 17 of each page at last unbind. */
unsigned long *bit_17;
struct i915_gem_userptr {
uintptr_t ptr;
unsigned read_only :1;
union {
struct i915_gem_userptr {
uintptr_t ptr;
unsigned read_only :1;
struct i915_mm_struct *mm;
struct i915_mmu_object *mmu_object;
struct work_struct *work;
} userptr;
struct i915_mm_struct *mm;
struct i915_mmu_object *mmu_object;
struct work_struct *work;
} userptr;
unsigned long scratch;
};
/** for phys allocated objects */
struct drm_dma_handle *phys_handle;
......@@ -253,6 +260,16 @@ extern void drm_gem_object_unreference(struct drm_gem_object *);
__deprecated
extern void drm_gem_object_unreference_unlocked(struct drm_gem_object *);
static inline void i915_gem_object_lock(struct drm_i915_gem_object *obj)
{
reservation_object_lock(obj->resv, NULL);
}
static inline void i915_gem_object_unlock(struct drm_i915_gem_object *obj)
{
reservation_object_unlock(obj->resv);
}
static inline bool
i915_gem_object_is_dead(const struct drm_i915_gem_object *obj)
{
......@@ -299,6 +316,12 @@ i915_gem_object_clear_active_reference(struct drm_i915_gem_object *obj)
void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj);
static inline bool
i915_gem_object_is_framebuffer(const struct drm_i915_gem_object *obj)
{
return READ_ONCE(obj->framebuffer_references);
}
static inline unsigned int
i915_gem_object_get_tiling(struct drm_i915_gem_object *obj)
{
......@@ -357,5 +380,7 @@ i915_gem_object_last_write_engine(struct drm_i915_gem_object *obj)
return engine;
}
void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj);
#endif
......@@ -32,10 +32,12 @@
struct drm_file;
struct drm_i915_gem_object;
struct drm_i915_gem_request;
struct intel_wait {
struct rb_node node;
struct task_struct *tsk;
struct drm_i915_gem_request *request;
u32 seqno;
};
......@@ -119,18 +121,10 @@ struct drm_i915_gem_request {
* The submit fence is used to await upon all of the request's
* dependencies. When it is signaled, the request is ready to run.
* It is used by the driver to then queue the request for execution.
*
* The execute fence is used to signal when the request has been
* sent to hardware.
*
* It is illegal for the submit fence of one request to wait upon the
* execute fence of an earlier request. It should be sufficient to
* wait upon the submit fence of the earlier request.
*/
struct i915_sw_fence submit;
struct i915_sw_fence execute;
wait_queue_t submitq;
wait_queue_t execq;
wait_queue_head_t execute;
/* A list of everyone we wait upon, and everyone who waits upon us.
* Even though we will not be submitted to the hardware before the
......@@ -143,13 +137,12 @@ struct drm_i915_gem_request {
struct i915_priotree priotree;
struct i915_dependency dep;
u32 global_seqno;
/** GEM sequence number associated with the previous request,
* when the HWS breadcrumb is equal to this the GPU is processing
* this request.
/** GEM sequence number associated with this request on the
* global execution timeline. It is zero when the request is not
* on the HW queue (i.e. not on the engine timeline list).
* Its value is guarded by the timeline spinlock.
*/
u32 previous_seqno;
u32 global_seqno;
/** Position in the ring of the start of the request */
u32 head;
......@@ -187,7 +180,7 @@ struct drm_i915_gem_request {
struct drm_i915_file_private *file_priv;
/** file_priv list entry for this request */
struct list_head client_list;
struct list_head client_link;
};
extern const struct dma_fence_ops i915_fence_ops;
......@@ -200,8 +193,6 @@ static inline bool dma_fence_is_i915(const struct dma_fence *fence)
struct drm_i915_gem_request * __must_check
i915_gem_request_alloc(struct intel_engine_cs *engine,
struct i915_gem_context *ctx);
int i915_gem_request_add_to_client(struct drm_i915_gem_request *req,
struct drm_file *file);
void i915_gem_request_retire_upto(struct drm_i915_gem_request *req);
static inline struct drm_i915_gem_request *
......@@ -243,6 +234,30 @@ static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst,
*pdst = src;
}
/**
* i915_gem_request_global_seqno - report the current global seqno
* @request - the request
*
* A request is assigned a global seqno only when it is on the hardware
* execution queue. The global seqno can be used to maintain a list of
* requests on the same engine in retirement order, for example for
* constructing a priority queue for waiting. Prior to its execution, or
* if it is subsequently removed in the event of preemption, its global
* seqno is zero. As both insertion and removal from the execution queue
* may operate in IRQ context, it is not guarded by the usual struct_mutex
* BKL. Instead those relying on the global seqno must be prepared for its
* value to change between reads. Only when the request is complete can
* the global seqno be stable (due to the memory barriers on submitting
* the commands to the hardware to write the breadcrumb, if the HWS shows
* that it has passed the global seqno and the global seqno is unchanged
* after the read, it is indeed complete).
*/
static u32
i915_gem_request_global_seqno(const struct drm_i915_gem_request *request)
{
return READ_ONCE(request->global_seqno);
}
int
i915_gem_request_await_object(struct drm_i915_gem_request *to,
struct drm_i915_gem_object *obj,
......@@ -259,6 +274,9 @@ void __i915_add_request(struct drm_i915_gem_request *req, bool flush_caches);
void __i915_gem_request_submit(struct drm_i915_gem_request *request);
void i915_gem_request_submit(struct drm_i915_gem_request *request);
void __i915_gem_request_unsubmit(struct drm_i915_gem_request *request);
void i915_gem_request_unsubmit(struct drm_i915_gem_request *request);
struct intel_rps_client;
#define NO_WAITBOOST ERR_PTR(-1)
#define IS_RPS_CLIENT(p) (!IS_ERR(p))
......@@ -283,46 +301,55 @@ static inline bool i915_seqno_passed(u32 seq1, u32 seq2)
}
static inline bool
__i915_gem_request_started(const struct drm_i915_gem_request *req)
__i915_gem_request_started(const struct drm_i915_gem_request *req, u32 seqno)
{
GEM_BUG_ON(!req->global_seqno);
GEM_BUG_ON(!seqno);
return i915_seqno_passed(intel_engine_get_seqno(req->engine),
req->previous_seqno);
seqno - 1);
}
static inline bool
i915_gem_request_started(const struct drm_i915_gem_request *req)
{
if (!req->global_seqno)
u32 seqno;
seqno = i915_gem_request_global_seqno(req);
if (!seqno)
return false;
return __i915_gem_request_started(req);
return __i915_gem_request_started(req, seqno);
}
static inline bool
__i915_gem_request_completed(const struct drm_i915_gem_request *req)
__i915_gem_request_completed(const struct drm_i915_gem_request *req, u32 seqno)
{
GEM_BUG_ON(!req->global_seqno);
return i915_seqno_passed(intel_engine_get_seqno(req->engine),
req->global_seqno);
GEM_BUG_ON(!seqno);
return i915_seqno_passed(intel_engine_get_seqno(req->engine), seqno) &&
seqno == i915_gem_request_global_seqno(req);
}
static inline bool
i915_gem_request_completed(const struct drm_i915_gem_request *req)
{
if (!req->global_seqno)
u32 seqno;
seqno = i915_gem_request_global_seqno(req);
if (!seqno)
return false;
return __i915_gem_request_completed(req);
return __i915_gem_request_completed(req, seqno);
}
bool __i915_spin_request(const struct drm_i915_gem_request *request,
int state, unsigned long timeout_us);
u32 seqno, int state, unsigned long timeout_us);
static inline bool i915_spin_request(const struct drm_i915_gem_request *request,
int state, unsigned long timeout_us)
{
return (__i915_gem_request_started(request) &&
__i915_spin_request(request, state, timeout_us));
u32 seqno;
seqno = i915_gem_request_global_seqno(request);
return (__i915_gem_request_started(request, seqno) &&
__i915_spin_request(request, seqno, state, timeout_us));
}
/* We treat requests as fences. This is not be to confused with our
......
......@@ -207,7 +207,7 @@ i915_gem_shrink(struct drm_i915_private *dev_priv,
if (!(flags & I915_SHRINK_ACTIVE) &&
(i915_gem_object_is_active(obj) ||
obj->framebuffer_references))
i915_gem_object_is_framebuffer(obj)))
continue;
if (!can_release_pages(obj))
......@@ -259,10 +259,13 @@ unsigned long i915_gem_shrink_all(struct drm_i915_private *dev_priv)
{
unsigned long freed;
intel_runtime_pm_get(dev_priv);
freed = i915_gem_shrink(dev_priv, -1UL,
I915_SHRINK_BOUND |
I915_SHRINK_UNBOUND |
I915_SHRINK_ACTIVE);
intel_runtime_pm_put(dev_priv);
rcu_barrier(); /* wait until our RCU delayed slab frees are completed */
return freed;
......@@ -380,9 +383,7 @@ i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr)
if (!i915_gem_shrinker_lock_uninterruptible(dev_priv, &slu, 5000))
return NOTIFY_DONE;
intel_runtime_pm_get(dev_priv);
freed_pages = i915_gem_shrink_all(dev_priv);
intel_runtime_pm_put(dev_priv);
/* Because we may be allocating inside our own driver, we cannot
* assert that there are no objects with pinned pages that are not
......
......@@ -79,12 +79,12 @@ void i915_gem_stolen_remove_node(struct drm_i915_private *dev_priv,
mutex_unlock(&dev_priv->mm.stolen_lock);
}
static unsigned long i915_stolen_to_physical(struct drm_i915_private *dev_priv)
static dma_addr_t i915_stolen_to_dma(struct drm_i915_private *dev_priv)
{
struct pci_dev *pdev = dev_priv->drm.pdev;
struct i915_ggtt *ggtt = &dev_priv->ggtt;
struct resource *r;
u32 base;
dma_addr_t base;
/* Almost universally we can find the Graphics Base of Stolen Memory
* at register BSM (0x5c) in the igfx configuration space. On a few
......@@ -189,14 +189,14 @@ static unsigned long i915_stolen_to_physical(struct drm_i915_private *dev_priv)
base = tom - tseg_size - ggtt->stolen_size;
}
if (base == 0)
if (base == 0 || add_overflows(base, ggtt->stolen_size))
return 0;
/* make sure we don't clobber the GTT if it's within stolen memory */
if (INTEL_GEN(dev_priv) <= 4 &&
!IS_G33(dev_priv) && !IS_PINEVIEW(dev_priv) && !IS_G4X(dev_priv)) {
struct {
u32 start, end;
dma_addr_t start, end;
} stolen[2] = {
{ .start = base, .end = base + ggtt->stolen_size, },
{ .start = base, .end = base + ggtt->stolen_size, },
......@@ -228,11 +228,13 @@ static unsigned long i915_stolen_to_physical(struct drm_i915_private *dev_priv)
if (stolen[0].start != stolen[1].start ||
stolen[0].end != stolen[1].end) {
dma_addr_t end = base + ggtt->stolen_size - 1;
DRM_DEBUG_KMS("GTT within stolen memory at 0x%llx-0x%llx\n",
(unsigned long long)ggtt_start,
(unsigned long long)ggtt_end - 1);
DRM_DEBUG_KMS("Stolen memory adjusted to 0x%x-0x%x\n",
base, base + (u32)ggtt->stolen_size - 1);
DRM_DEBUG_KMS("Stolen memory adjusted to %pad - %pad\n",
&base, &end);
}
}
......@@ -261,8 +263,10 @@ static unsigned long i915_stolen_to_physical(struct drm_i915_private *dev_priv)
* range. Apparently this works.
*/
if (r == NULL && !IS_GEN3(dev_priv)) {
DRM_ERROR("conflict detected with stolen region: [0x%08x - 0x%08x]\n",
base, base + (uint32_t)ggtt->stolen_size);
dma_addr_t end = base + ggtt->stolen_size;
DRM_ERROR("conflict detected with stolen region: [%pad - %pad]\n",
&base, &end);
base = 0;
}
}
......@@ -281,13 +285,13 @@ void i915_gem_cleanup_stolen(struct drm_device *dev)
}
static void g4x_get_stolen_reserved(struct drm_i915_private *dev_priv,
phys_addr_t *base, u32 *size)
dma_addr_t *base, u32 *size)
{
struct i915_ggtt *ggtt = &dev_priv->ggtt;
uint32_t reg_val = I915_READ(IS_GM45(dev_priv) ?
CTG_STOLEN_RESERVED :
ELK_STOLEN_RESERVED);
phys_addr_t stolen_top = dev_priv->mm.stolen_base + ggtt->stolen_size;
dma_addr_t stolen_top = dev_priv->mm.stolen_base + ggtt->stolen_size;
*base = (reg_val & G4X_STOLEN_RESERVED_ADDR2_MASK) << 16;
......@@ -304,7 +308,7 @@ static void g4x_get_stolen_reserved(struct drm_i915_private *dev_priv,
}
static void gen6_get_stolen_reserved(struct drm_i915_private *dev_priv,
phys_addr_t *base, u32 *size)
dma_addr_t *base, u32 *size)
{
uint32_t reg_val = I915_READ(GEN6_STOLEN_RESERVED);
......@@ -330,7 +334,7 @@ static void gen6_get_stolen_reserved(struct drm_i915_private *dev_priv,
}
static void gen7_get_stolen_reserved(struct drm_i915_private *dev_priv,
phys_addr_t *base, u32 *size)
dma_addr_t *base, u32 *size)
{
uint32_t reg_val = I915_READ(GEN6_STOLEN_RESERVED);
......@@ -350,7 +354,7 @@ static void gen7_get_stolen_reserved(struct drm_i915_private *dev_priv,
}
static void chv_get_stolen_reserved(struct drm_i915_private *dev_priv,
phys_addr_t *base, u32 *size)
dma_addr_t *base, u32 *size)
{
uint32_t reg_val = I915_READ(GEN6_STOLEN_RESERVED);
......@@ -376,11 +380,11 @@ static void chv_get_stolen_reserved(struct drm_i915_private *dev_priv,
}
static void bdw_get_stolen_reserved(struct drm_i915_private *dev_priv,
phys_addr_t *base, u32 *size)
dma_addr_t *base, u32 *size)
{
struct i915_ggtt *ggtt = &dev_priv->ggtt;
uint32_t reg_val = I915_READ(GEN6_STOLEN_RESERVED);
phys_addr_t stolen_top;
dma_addr_t stolen_top;
stolen_top = dev_priv->mm.stolen_base + ggtt->stolen_size;
......@@ -399,7 +403,7 @@ static void bdw_get_stolen_reserved(struct drm_i915_private *dev_priv,
int i915_gem_init_stolen(struct drm_i915_private *dev_priv)
{
struct i915_ggtt *ggtt = &dev_priv->ggtt;
phys_addr_t reserved_base, stolen_top;
dma_addr_t reserved_base, stolen_top;
u32 reserved_total, reserved_size;
u32 stolen_usable_start;
......@@ -420,7 +424,7 @@ int i915_gem_init_stolen(struct drm_i915_private *dev_priv)
if (ggtt->stolen_size == 0)
return 0;
dev_priv->mm.stolen_base = i915_stolen_to_physical(dev_priv);
dev_priv->mm.stolen_base = i915_stolen_to_dma(dev_priv);
if (dev_priv->mm.stolen_base == 0)
return 0;
......@@ -469,8 +473,8 @@ int i915_gem_init_stolen(struct drm_i915_private *dev_priv)
if (reserved_base < dev_priv->mm.stolen_base ||
reserved_base + reserved_size > stolen_top) {
phys_addr_t reserved_top = reserved_base + reserved_size;
DRM_DEBUG_KMS("Stolen reserved area [%pa - %pa] outside stolen memory [%pa - %pa]\n",
dma_addr_t reserved_top = reserved_base + reserved_size;
DRM_DEBUG_KMS("Stolen reserved area [%pad - %pad] outside stolen memory [%pad - %pad]\n",
&reserved_base, &reserved_top,
&dev_priv->mm.stolen_base, &stolen_top);
return 0;
......
......@@ -158,13 +158,8 @@ i915_tiling_ok(struct drm_i915_gem_object *obj,
if (stride > 8192)
return false;
if (IS_GEN3(i915)) {
if (obj->base.size > I830_FENCE_MAX_SIZE_VAL << 20)
return false;
} else {
if (obj->base.size > I830_FENCE_MAX_SIZE_VAL << 19)
return false;
}
if (!is_power_of_2(stride))
return false;
}
if (IS_GEN2(i915) ||
......@@ -176,12 +171,7 @@ i915_tiling_ok(struct drm_i915_gem_object *obj,
if (!stride || !IS_ALIGNED(stride, tile_width))
return false;
/* 965+ just needs multiples of tile width */
if (INTEL_GEN(i915) >= 4)
return true;
/* Pre-965 needs power of two tile widths */
return is_power_of_2(stride);
return true;
}
static bool i915_vma_fence_prepare(struct i915_vma *vma,
......@@ -248,7 +238,7 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj,
if ((tiling | stride) == obj->tiling_and_stride)
return 0;
if (obj->framebuffer_references)
if (i915_gem_object_is_framebuffer(obj))
return -EBUSY;
/* We need to rebind the object if its current allocation
......@@ -268,6 +258,12 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj,
if (err)
return err;
i915_gem_object_lock(obj);
if (i915_gem_object_is_framebuffer(obj)) {
i915_gem_object_unlock(obj);
return -EBUSY;
}
/* If the memory has unknown (i.e. varying) swizzling, we pin the
* pages to prevent them being swapped out and causing corruption
* due to the change in swizzling.
......@@ -304,6 +300,7 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj,
}
obj->tiling_and_stride = tiling | stride;
i915_gem_object_unlock(obj);
/* Force the fence to be reacquired for GTT access */
i915_gem_release_mmap(obj);
......
......@@ -33,7 +33,13 @@ struct i915_gem_timeline;
struct intel_timeline {
u64 fence_context;
u32 last_submitted_seqno;
u32 seqno;
/**
* Count of outstanding requests, from the time they are constructed
* to the moment they are retired. Loosely coupled to hardware.
*/
u32 inflight_seqnos;
spinlock_t lock;
......@@ -56,7 +62,6 @@ struct intel_timeline {
struct i915_gem_timeline {
struct list_head link;
atomic_t seqno;
struct drm_i915_private *i915;
const char *name;
......
......@@ -348,7 +348,7 @@ int i915_guc_wq_reserve(struct drm_i915_gem_request *request)
u32 freespace;
int ret;
spin_lock(&client->wq_lock);
spin_lock_irq(&client->wq_lock);
freespace = CIRC_SPACE(client->wq_tail, desc->head, client->wq_size);
freespace -= client->wq_rsvd;
if (likely(freespace >= wqi_size)) {
......@@ -358,21 +358,27 @@ int i915_guc_wq_reserve(struct drm_i915_gem_request *request)
client->no_wq_space++;
ret = -EAGAIN;
}
spin_unlock(&client->wq_lock);
spin_unlock_irq(&client->wq_lock);
return ret;
}
static void guc_client_update_wq_rsvd(struct i915_guc_client *client, int size)
{
unsigned long flags;
spin_lock_irqsave(&client->wq_lock, flags);
client->wq_rsvd += size;
spin_unlock_irqrestore(&client->wq_lock, flags);
}
void i915_guc_wq_unreserve(struct drm_i915_gem_request *request)
{
const size_t wqi_size = sizeof(struct guc_wq_item);
const int wqi_size = sizeof(struct guc_wq_item);
struct i915_guc_client *client = request->i915->guc.execbuf_client;
GEM_BUG_ON(READ_ONCE(client->wq_rsvd) < wqi_size);
spin_lock(&client->wq_lock);
client->wq_rsvd -= wqi_size;
spin_unlock(&client->wq_lock);
guc_client_update_wq_rsvd(client, -wqi_size);
}
/* Construct a Work Item and append it to the GuC's Work Queue */
......@@ -509,15 +515,18 @@ static void __i915_guc_submit(struct drm_i915_gem_request *rq)
unsigned int engine_id = engine->id;
struct intel_guc *guc = &rq->i915->guc;
struct i915_guc_client *client = guc->execbuf_client;
unsigned long flags;
int b_ret;
spin_lock(&client->wq_lock);
guc_wq_item_append(client, rq);
/* WA to flush out the pending GMADR writes to ring buffer. */
if (i915_vma_is_map_and_fenceable(rq->ring->vma))
POSTING_READ_FW(GUC_STATUS);
trace_i915_gem_request_in(rq, 0);
spin_lock_irqsave(&client->wq_lock, flags);
guc_wq_item_append(client, rq);
b_ret = guc_ring_doorbell(client);
client->submissions[engine_id] += 1;
......@@ -527,7 +536,8 @@ static void __i915_guc_submit(struct drm_i915_gem_request *rq)
guc->submissions[engine_id] += 1;
guc->last_seqno[engine_id] = rq->global_seqno;
spin_unlock(&client->wq_lock);
spin_unlock_irqrestore(&client->wq_lock, flags);
}
static void i915_guc_submit(struct drm_i915_gem_request *rq)
......@@ -943,16 +953,19 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv)
/* Take over from manual control of ELSP (execlists) */
for_each_engine(engine, dev_priv, id) {
const int wqi_size = sizeof(struct guc_wq_item);
struct drm_i915_gem_request *rq;
engine->submit_request = i915_guc_submit;
engine->schedule = NULL;
/* Replay the current set of previously submitted requests */
spin_lock_irq(&engine->timeline->lock);
list_for_each_entry(rq, &engine->timeline->requests, link) {
client->wq_rsvd += sizeof(struct guc_wq_item);
guc_client_update_wq_rsvd(client, wqi_size);
__i915_guc_submit(rq);
}
spin_unlock_irq(&engine->timeline->lock);
}
return 0;
......
此差异已折叠。
......@@ -145,7 +145,7 @@ MODULE_PARM_DESC(enable_psr, "Enable PSR "
"(0=disabled, 1=enabled - link mode chosen per-platform, 2=force link-standby mode, 3=force link-off mode) "
"Default: -1 (use per-chip default)");
module_param_named_unsafe(alpha_support, i915.alpha_support, int, 0400);
module_param_named_unsafe(alpha_support, i915.alpha_support, bool, 0400);
MODULE_PARM_DESC(alpha_support,
"Enable alpha quality driver support for latest hardware. "
"See also CONFIG_DRM_I915_ALPHA_SUPPORT.");
......@@ -205,9 +205,9 @@ module_param_named(verbose_state_checks, i915.verbose_state_checks, bool, 0600);
MODULE_PARM_DESC(verbose_state_checks,
"Enable verbose logs (ie. WARN_ON()) in case of unexpected hw state conditions.");
module_param_named_unsafe(nuclear_pageflip, i915.nuclear_pageflip, bool, 0600);
module_param_named_unsafe(nuclear_pageflip, i915.nuclear_pageflip, bool, 0400);
MODULE_PARM_DESC(nuclear_pageflip,
"Force atomic modeset functionality; asynchronous mode is not yet supported. (default: false).");
"Force enable atomic functionality on platforms that don't have full support yet.");
/* WA to get away with the default setting in VBT for early platforms.Will be removed */
module_param_named_unsafe(edp_vswing, i915.edp_vswing, int, 0400);
......
此差异已折叠。
此差异已折叠。
......@@ -1008,7 +1008,7 @@ static void hsw_disable_metric_set(struct drm_i915_private *dev_priv)
static void gen7_update_oacontrol_locked(struct drm_i915_private *dev_priv)
{
assert_spin_locked(&dev_priv->perf.hook_lock);
lockdep_assert_held(&dev_priv->perf.hook_lock);
if (dev_priv->perf.oa.exclusive_stream->enabled) {
struct i915_gem_context *ctx =
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册