提交 b81a6179 编写于 作者: D Dave Airlie

Merge tag 'drm-intel-next-2016-09-19' of git://anongit.freedesktop.org/drm-intel into drm-next

- refactor the sseu code (Imre)
- refine guc dmesg output (Dave Gordon)
- more vgpu work
- more skl wm fixes (Lyude)
- refactor dpll code in prep for upfront link training (Jim Bride et al)
- consolidate all platform feature checks into intel_device_info (Carlos Santa)
- refactor elsp/execlist submission as prep for re-submission after hang
  recovery and eventually scheduling (Chris Wilson)
- allow synchronous gpu reset handling, to remove tricky/impossible/fragile
  error recovery code (Chris Wilson)
- prep work for nonblocking (execlist) submission, using fences to track
  depencies and drive elsp submission (Chris Wilson)
- partial error recover/resubmission of non-guilty batches after hangs (Chris Wilson)
- full dma-buf implicit fencing support (Chris Wilson)
- dp link training fixes (Jim, Dhinkaran, Navare, ...)
- obey dp branch device pixel rate/bpc/clock limits (Mika Kahola), needed for
  many vga dongles
- bunch of small cleanups and polish all over, as usual

[airlied: printing macros collided]

* tag 'drm-intel-next-2016-09-19' of git://anongit.freedesktop.org/drm-intel: (163 commits)
  drm/i915: Update DRIVER_DATE to 20160919
  drm: Fix DisplayPort branch device ID kernel-doc
  drm/i915: use NULL for NULL pointers
  drm/i915: do not use 'false' as a NULL pointer
  drm/i915: make intel_dp_compute_bpp static
  drm: Add DP branch device info on debugfs
  drm/i915: Update bits per component for display info
  drm/i915: Check pixel rate for DP to VGA dongle
  drm/i915: Read DP branch device SW revision
  drm/i915: Read DP branch device HW revision
  drm/i915: Cleanup DisplayPort AUX channel initialization
  drm: Read DP branch device id
  drm: Helper to read max bits per component
  drm: Helper to read max clock rate
  drm: Drop VGA from bpc definitions
  drm: Add missing DP downstream port types
  drm/i915: Add ddb size field to device info structure
  drm/i915/guc: general tidying up (submission)
  drm/i915/guc: general tidying up (loader)
  drm/i915: clarify PMINTRMSK/pm_intr_keep usage
  ...
...@@ -507,8 +507,7 @@ static const struct pci_device_id intel_early_ids[] __initconst = { ...@@ -507,8 +507,7 @@ static const struct pci_device_id intel_early_ids[] __initconst = {
INTEL_I915GM_IDS(&gen3_early_ops), INTEL_I915GM_IDS(&gen3_early_ops),
INTEL_I945G_IDS(&gen3_early_ops), INTEL_I945G_IDS(&gen3_early_ops),
INTEL_I945GM_IDS(&gen3_early_ops), INTEL_I945GM_IDS(&gen3_early_ops),
INTEL_VLV_M_IDS(&gen6_early_ops), INTEL_VLV_IDS(&gen6_early_ops),
INTEL_VLV_D_IDS(&gen6_early_ops),
INTEL_PINEVIEW_IDS(&gen3_early_ops), INTEL_PINEVIEW_IDS(&gen3_early_ops),
INTEL_I965G_IDS(&gen3_early_ops), INTEL_I965G_IDS(&gen3_early_ops),
INTEL_G33_IDS(&gen3_early_ops), INTEL_G33_IDS(&gen3_early_ops),
...@@ -521,10 +520,8 @@ static const struct pci_device_id intel_early_ids[] __initconst = { ...@@ -521,10 +520,8 @@ static const struct pci_device_id intel_early_ids[] __initconst = {
INTEL_SNB_M_IDS(&gen6_early_ops), INTEL_SNB_M_IDS(&gen6_early_ops),
INTEL_IVB_M_IDS(&gen6_early_ops), INTEL_IVB_M_IDS(&gen6_early_ops),
INTEL_IVB_D_IDS(&gen6_early_ops), INTEL_IVB_D_IDS(&gen6_early_ops),
INTEL_HSW_D_IDS(&gen6_early_ops), INTEL_HSW_IDS(&gen6_early_ops),
INTEL_HSW_M_IDS(&gen6_early_ops), INTEL_BDW_IDS(&gen8_early_ops),
INTEL_BDW_M_IDS(&gen8_early_ops),
INTEL_BDW_D_IDS(&gen8_early_ops),
INTEL_CHV_IDS(&chv_early_ops), INTEL_CHV_IDS(&chv_early_ops),
INTEL_SKL_IDS(&gen9_early_ops), INTEL_SKL_IDS(&gen9_early_ops),
INTEL_BXT_IDS(&gen9_early_ops), INTEL_BXT_IDS(&gen9_early_ops),
......
...@@ -439,6 +439,179 @@ int drm_dp_link_configure(struct drm_dp_aux *aux, struct drm_dp_link *link) ...@@ -439,6 +439,179 @@ int drm_dp_link_configure(struct drm_dp_aux *aux, struct drm_dp_link *link)
} }
EXPORT_SYMBOL(drm_dp_link_configure); EXPORT_SYMBOL(drm_dp_link_configure);
/**
* drm_dp_downstream_max_clock() - extract branch device max
* pixel rate for legacy VGA
* converter or max TMDS clock
* rate for others
* @dpcd: DisplayPort configuration data
* @port_cap: port capabilities
*
* Returns max clock in kHz on success or 0 if max clock not defined
*/
int drm_dp_downstream_max_clock(const u8 dpcd[DP_RECEIVER_CAP_SIZE],
const u8 port_cap[4])
{
int type = port_cap[0] & DP_DS_PORT_TYPE_MASK;
bool detailed_cap_info = dpcd[DP_DOWNSTREAMPORT_PRESENT] &
DP_DETAILED_CAP_INFO_AVAILABLE;
if (!detailed_cap_info)
return 0;
switch (type) {
case DP_DS_PORT_TYPE_VGA:
return port_cap[1] * 8 * 1000;
case DP_DS_PORT_TYPE_DVI:
case DP_DS_PORT_TYPE_HDMI:
case DP_DS_PORT_TYPE_DP_DUALMODE:
return port_cap[1] * 2500;
default:
return 0;
}
}
EXPORT_SYMBOL(drm_dp_downstream_max_clock);
/**
* drm_dp_downstream_max_bpc() - extract branch device max
* bits per component
* @dpcd: DisplayPort configuration data
* @port_cap: port capabilities
*
* Returns max bpc on success or 0 if max bpc not defined
*/
int drm_dp_downstream_max_bpc(const u8 dpcd[DP_RECEIVER_CAP_SIZE],
const u8 port_cap[4])
{
int type = port_cap[0] & DP_DS_PORT_TYPE_MASK;
bool detailed_cap_info = dpcd[DP_DOWNSTREAMPORT_PRESENT] &
DP_DETAILED_CAP_INFO_AVAILABLE;
int bpc;
if (!detailed_cap_info)
return 0;
switch (type) {
case DP_DS_PORT_TYPE_VGA:
case DP_DS_PORT_TYPE_DVI:
case DP_DS_PORT_TYPE_HDMI:
case DP_DS_PORT_TYPE_DP_DUALMODE:
bpc = port_cap[2] & DP_DS_MAX_BPC_MASK;
switch (bpc) {
case DP_DS_8BPC:
return 8;
case DP_DS_10BPC:
return 10;
case DP_DS_12BPC:
return 12;
case DP_DS_16BPC:
return 16;
}
default:
return 0;
}
}
EXPORT_SYMBOL(drm_dp_downstream_max_bpc);
/**
* drm_dp_downstream_id() - identify branch device
* @aux: DisplayPort AUX channel
* @id: DisplayPort branch device id
*
* Returns branch device id on success or NULL on failure
*/
int drm_dp_downstream_id(struct drm_dp_aux *aux, char id[6])
{
return drm_dp_dpcd_read(aux, DP_BRANCH_ID, id, 6);
}
EXPORT_SYMBOL(drm_dp_downstream_id);
/**
* drm_dp_downstream_debug() - debug DP branch devices
* @m: pointer for debugfs file
* @dpcd: DisplayPort configuration data
* @port_cap: port capabilities
* @aux: DisplayPort AUX channel
*
*/
void drm_dp_downstream_debug(struct seq_file *m,
const u8 dpcd[DP_RECEIVER_CAP_SIZE],
const u8 port_cap[4], struct drm_dp_aux *aux)
{
bool detailed_cap_info = dpcd[DP_DOWNSTREAMPORT_PRESENT] &
DP_DETAILED_CAP_INFO_AVAILABLE;
int clk;
int bpc;
char id[6];
int len;
uint8_t rev[2];
int type = port_cap[0] & DP_DS_PORT_TYPE_MASK;
bool branch_device = dpcd[DP_DOWNSTREAMPORT_PRESENT] &
DP_DWN_STRM_PORT_PRESENT;
seq_printf(m, "\tDP branch device present: %s\n",
branch_device ? "yes" : "no");
if (!branch_device)
return;
switch (type) {
case DP_DS_PORT_TYPE_DP:
seq_puts(m, "\t\tType: DisplayPort\n");
break;
case DP_DS_PORT_TYPE_VGA:
seq_puts(m, "\t\tType: VGA\n");
break;
case DP_DS_PORT_TYPE_DVI:
seq_puts(m, "\t\tType: DVI\n");
break;
case DP_DS_PORT_TYPE_HDMI:
seq_puts(m, "\t\tType: HDMI\n");
break;
case DP_DS_PORT_TYPE_NON_EDID:
seq_puts(m, "\t\tType: others without EDID support\n");
break;
case DP_DS_PORT_TYPE_DP_DUALMODE:
seq_puts(m, "\t\tType: DP++\n");
break;
case DP_DS_PORT_TYPE_WIRELESS:
seq_puts(m, "\t\tType: Wireless\n");
break;
default:
seq_puts(m, "\t\tType: N/A\n");
}
drm_dp_downstream_id(aux, id);
seq_printf(m, "\t\tID: %s\n", id);
len = drm_dp_dpcd_read(aux, DP_BRANCH_HW_REV, &rev[0], 1);
if (len > 0)
seq_printf(m, "\t\tHW: %d.%d\n",
(rev[0] & 0xf0) >> 4, rev[0] & 0xf);
len = drm_dp_dpcd_read(aux, DP_BRANCH_SW_REV, &rev, 2);
if (len > 0)
seq_printf(m, "\t\tSW: %d.%d\n", rev[0], rev[1]);
if (detailed_cap_info) {
clk = drm_dp_downstream_max_clock(dpcd, port_cap);
if (clk > 0) {
if (type == DP_DS_PORT_TYPE_VGA)
seq_printf(m, "\t\tMax dot clock: %d kHz\n", clk);
else
seq_printf(m, "\t\tMax TMDS clock: %d kHz\n", clk);
}
bpc = drm_dp_downstream_max_bpc(dpcd, port_cap);
if (bpc > 0)
seq_printf(m, "\t\tMax bpc: %d\n", bpc);
}
}
EXPORT_SYMBOL(drm_dp_downstream_debug);
/* /*
* I2C-over-AUX implementation * I2C-over-AUX implementation
*/ */
......
...@@ -16,6 +16,7 @@ i915-y := i915_drv.o \ ...@@ -16,6 +16,7 @@ i915-y := i915_drv.o \
i915_params.o \ i915_params.o \
i915_pci.o \ i915_pci.o \
i915_suspend.o \ i915_suspend.o \
i915_sw_fence.o \
i915_sysfs.o \ i915_sysfs.o \
intel_csr.o \ intel_csr.o \
intel_device_info.o \ intel_device_info.o \
......
...@@ -984,7 +984,7 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj, ...@@ -984,7 +984,7 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
src = ERR_PTR(-ENODEV); src = ERR_PTR(-ENODEV);
if (src_needs_clflush && if (src_needs_clflush &&
i915_memcpy_from_wc((void *)(uintptr_t)batch_start_offset, 0, 0)) { i915_memcpy_from_wc((void *)(uintptr_t)batch_start_offset, NULL, 0)) {
src = i915_gem_object_pin_map(src_obj, I915_MAP_WC); src = i915_gem_object_pin_map(src_obj, I915_MAP_WC);
if (!IS_ERR(src)) { if (!IS_ERR(src)) {
i915_memcpy_from_wc(dst, i915_memcpy_from_wc(dst,
......
此差异已折叠。
此差异已折叠。
...@@ -70,7 +70,7 @@ ...@@ -70,7 +70,7 @@
#define DRIVER_NAME "i915" #define DRIVER_NAME "i915"
#define DRIVER_DESC "Intel Graphics" #define DRIVER_DESC "Intel Graphics"
#define DRIVER_DATE "20160822" #define DRIVER_DATE "20160919"
#undef WARN_ON #undef WARN_ON
/* Many gcc seem to no see through this and fall over :( */ /* Many gcc seem to no see through this and fall over :( */
...@@ -510,8 +510,12 @@ struct drm_i915_display_funcs { ...@@ -510,8 +510,12 @@ struct drm_i915_display_funcs {
struct intel_initial_plane_config *); struct intel_initial_plane_config *);
int (*crtc_compute_clock)(struct intel_crtc *crtc, int (*crtc_compute_clock)(struct intel_crtc *crtc,
struct intel_crtc_state *crtc_state); struct intel_crtc_state *crtc_state);
void (*crtc_enable)(struct drm_crtc *crtc); void (*crtc_enable)(struct intel_crtc_state *pipe_config,
void (*crtc_disable)(struct drm_crtc *crtc); struct drm_atomic_state *old_state);
void (*crtc_disable)(struct intel_crtc_state *old_crtc_state,
struct drm_atomic_state *old_state);
void (*update_crtcs)(struct drm_atomic_state *state,
unsigned int *crtc_vblank_mask);
void (*audio_codec_enable)(struct drm_connector *connector, void (*audio_codec_enable)(struct drm_connector *connector,
struct intel_encoder *encoder, struct intel_encoder *encoder,
const struct drm_display_mode *adjusted_mode); const struct drm_display_mode *adjusted_mode);
...@@ -575,8 +579,6 @@ struct intel_uncore_funcs { ...@@ -575,8 +579,6 @@ struct intel_uncore_funcs {
uint16_t val, bool trace); uint16_t val, bool trace);
void (*mmio_writel)(struct drm_i915_private *dev_priv, i915_reg_t r, void (*mmio_writel)(struct drm_i915_private *dev_priv, i915_reg_t r,
uint32_t val, bool trace); uint32_t val, bool trace);
void (*mmio_writeq)(struct drm_i915_private *dev_priv, i915_reg_t r,
uint64_t val, bool trace);
}; };
struct intel_uncore { struct intel_uncore {
...@@ -637,7 +639,7 @@ struct intel_csr { ...@@ -637,7 +639,7 @@ struct intel_csr {
func(is_i915g) sep \ func(is_i915g) sep \
func(is_i945gm) sep \ func(is_i945gm) sep \
func(is_g33) sep \ func(is_g33) sep \
func(need_gfx_hws) sep \ func(hws_needs_physical) sep \
func(is_g4x) sep \ func(is_g4x) sep \
func(is_pineview) sep \ func(is_pineview) sep \
func(is_broadwater) sep \ func(is_broadwater) sep \
...@@ -652,6 +654,19 @@ struct intel_csr { ...@@ -652,6 +654,19 @@ struct intel_csr {
func(is_kabylake) sep \ func(is_kabylake) sep \
func(is_preliminary) sep \ func(is_preliminary) sep \
func(has_fbc) sep \ func(has_fbc) sep \
func(has_psr) sep \
func(has_runtime_pm) sep \
func(has_csr) sep \
func(has_resource_streamer) sep \
func(has_rc6) sep \
func(has_rc6p) sep \
func(has_dp_mst) sep \
func(has_gmbus_irq) sep \
func(has_hw_contexts) sep \
func(has_logical_ring_contexts) sep \
func(has_l3_dpf) sep \
func(has_gmch_display) sep \
func(has_guc) sep \
func(has_pipe_cxsr) sep \ func(has_pipe_cxsr) sep \
func(has_hotplug) sep \ func(has_hotplug) sep \
func(cursor_needs_physical) sep \ func(cursor_needs_physical) sep \
...@@ -667,6 +682,24 @@ struct intel_csr { ...@@ -667,6 +682,24 @@ struct intel_csr {
#define DEFINE_FLAG(name) u8 name:1 #define DEFINE_FLAG(name) u8 name:1
#define SEP_SEMICOLON ; #define SEP_SEMICOLON ;
struct sseu_dev_info {
u8 slice_mask;
u8 subslice_mask;
u8 eu_total;
u8 eu_per_subslice;
u8 min_eu_in_pool;
/* For each slice, which subslice(s) has(have) 7 EUs (bitfield)? */
u8 subslice_7eu[3];
u8 has_slice_pg:1;
u8 has_subslice_pg:1;
u8 has_eu_pg:1;
};
static inline unsigned int sseu_subslice_total(const struct sseu_dev_info *sseu)
{
return hweight8(sseu->slice_mask) * hweight8(sseu->subslice_mask);
}
struct intel_device_info { struct intel_device_info {
u32 display_mmio_offset; u32 display_mmio_offset;
u16 device_id; u16 device_id;
...@@ -677,6 +710,7 @@ struct intel_device_info { ...@@ -677,6 +710,7 @@ struct intel_device_info {
u8 ring_mask; /* Rings supported by the HW */ u8 ring_mask; /* Rings supported by the HW */
u8 num_rings; u8 num_rings;
DEV_INFO_FOR_EACH_FLAG(DEFINE_FLAG, SEP_SEMICOLON); DEV_INFO_FOR_EACH_FLAG(DEFINE_FLAG, SEP_SEMICOLON);
u16 ddb_size; /* in blocks */
/* Register offsets for the various display pipes and transcoders */ /* Register offsets for the various display pipes and transcoders */
int pipe_offsets[I915_MAX_TRANSCODERS]; int pipe_offsets[I915_MAX_TRANSCODERS];
int trans_offsets[I915_MAX_TRANSCODERS]; int trans_offsets[I915_MAX_TRANSCODERS];
...@@ -684,17 +718,7 @@ struct intel_device_info { ...@@ -684,17 +718,7 @@ struct intel_device_info {
int cursor_offsets[I915_MAX_PIPES]; int cursor_offsets[I915_MAX_PIPES];
/* Slice/subslice/EU info */ /* Slice/subslice/EU info */
u8 slice_total; struct sseu_dev_info sseu;
u8 subslice_total;
u8 subslice_per_slice;
u8 eu_total;
u8 eu_per_subslice;
u8 min_eu_in_pool;
/* For each slice, which subslice(s) has(have) 7 EUs (bitfield)? */
u8 subslice_7eu[3];
u8 has_slice_pg:1;
u8 has_subslice_pg:1;
u8 has_eu_pg:1;
struct color_luts { struct color_luts {
u16 degamma_lut_size; u16 degamma_lut_size;
...@@ -1161,6 +1185,7 @@ struct intel_gen6_power_mgmt { ...@@ -1161,6 +1185,7 @@ struct intel_gen6_power_mgmt {
bool interrupts_enabled; bool interrupts_enabled;
u32 pm_iir; u32 pm_iir;
/* PM interrupt bits that should never be masked */
u32 pm_intr_keep; u32 pm_intr_keep;
/* Frequencies are stored in potentially platform dependent multiples. /* Frequencies are stored in potentially platform dependent multiples.
...@@ -1334,7 +1359,7 @@ struct i915_gem_mm { ...@@ -1334,7 +1359,7 @@ struct i915_gem_mm {
bool interruptible; bool interruptible;
/* the indicator for dispatch video commands on two BSD rings */ /* the indicator for dispatch video commands on two BSD rings */
unsigned int bsd_engine_dispatch_index; atomic_t bsd_engine_dispatch_index;
/** Bit 6 swizzling required for X tiling */ /** Bit 6 swizzling required for X tiling */
uint32_t bit_6_swizzle_x; uint32_t bit_6_swizzle_x;
...@@ -1382,9 +1407,10 @@ struct i915_gpu_error { ...@@ -1382,9 +1407,10 @@ struct i915_gpu_error {
* State variable controlling the reset flow and count * State variable controlling the reset flow and count
* *
* This is a counter which gets incremented when reset is triggered, * This is a counter which gets incremented when reset is triggered,
* and again when reset has been handled. So odd values (lowest bit set) *
* means that reset is in progress and even values that * Before the reset commences, the I915_RESET_IN_PROGRESS bit is set
* (reset_counter >> 1):th reset was successfully completed. * meaning that any waiters holding onto the struct_mutex should
* relinquish the lock immediately in order for the reset to start.
* *
* If reset is not completed succesfully, the I915_WEDGE bit is * If reset is not completed succesfully, the I915_WEDGE bit is
* set meaning that hardware is terminally sour and there is no * set meaning that hardware is terminally sour and there is no
...@@ -1399,10 +1425,11 @@ struct i915_gpu_error { ...@@ -1399,10 +1425,11 @@ struct i915_gpu_error {
* naturally enforces the correct ordering between the bail-out of the * naturally enforces the correct ordering between the bail-out of the
* waiter and the gpu reset work code. * waiter and the gpu reset work code.
*/ */
atomic_t reset_counter; unsigned long reset_count;
#define I915_RESET_IN_PROGRESS_FLAG 1 unsigned long flags;
#define I915_WEDGED (1 << 31) #define I915_RESET_IN_PROGRESS 0
#define I915_WEDGED (BITS_PER_LONG - 1)
/** /**
* Waitqueue to signal when a hang is detected. Used to for waiters * Waitqueue to signal when a hang is detected. Used to for waiters
...@@ -1956,6 +1983,13 @@ struct drm_i915_private { ...@@ -1956,6 +1983,13 @@ struct drm_i915_private {
struct i915_suspend_saved_registers regfile; struct i915_suspend_saved_registers regfile;
struct vlv_s0ix_state vlv_s0ix_state; struct vlv_s0ix_state vlv_s0ix_state;
enum {
I915_SKL_SAGV_UNKNOWN = 0,
I915_SKL_SAGV_DISABLED,
I915_SKL_SAGV_ENABLED,
I915_SKL_SAGV_NOT_CONTROLLED
} skl_sagv_status;
struct { struct {
/* /*
* Raw watermark latency values: * Raw watermark latency values:
...@@ -2010,6 +2044,7 @@ struct drm_i915_private { ...@@ -2010,6 +2044,7 @@ struct drm_i915_private {
/* Abstract the submission mechanism (legacy ringbuffer or execlists) away */ /* Abstract the submission mechanism (legacy ringbuffer or execlists) away */
struct { struct {
void (*resume)(struct drm_i915_private *);
void (*cleanup_engine)(struct intel_engine_cs *engine); void (*cleanup_engine)(struct intel_engine_cs *engine);
/** /**
...@@ -2057,9 +2092,9 @@ static inline struct drm_i915_private *to_i915(const struct drm_device *dev) ...@@ -2057,9 +2092,9 @@ static inline struct drm_i915_private *to_i915(const struct drm_device *dev)
return container_of(dev, struct drm_i915_private, drm); return container_of(dev, struct drm_i915_private, drm);
} }
static inline struct drm_i915_private *dev_to_i915(struct device *dev) static inline struct drm_i915_private *kdev_to_i915(struct device *kdev)
{ {
return to_i915(dev_get_drvdata(dev)); return to_i915(dev_get_drvdata(kdev));
} }
static inline struct drm_i915_private *guc_to_i915(struct intel_guc *guc) static inline struct drm_i915_private *guc_to_i915(struct intel_guc *guc)
...@@ -2082,13 +2117,16 @@ static inline struct drm_i915_private *guc_to_i915(struct intel_guc *guc) ...@@ -2082,13 +2117,16 @@ static inline struct drm_i915_private *guc_to_i915(struct intel_guc *guc)
for_each_if (((id__) = (engine__)->id, \ for_each_if (((id__) = (engine__)->id, \
intel_engine_initialized(engine__))) intel_engine_initialized(engine__)))
#define __mask_next_bit(mask) ({ \
int __idx = ffs(mask) - 1; \
mask &= ~BIT(__idx); \
__idx; \
})
/* Iterator over subset of engines selected by mask */ /* Iterator over subset of engines selected by mask */
#define for_each_engine_masked(engine__, dev_priv__, mask__) \ #define for_each_engine_masked(engine__, dev_priv__, mask__, tmp__) \
for ((engine__) = &(dev_priv__)->engine[0]; \ for (tmp__ = mask__ & INTEL_INFO(dev_priv__)->ring_mask; \
(engine__) < &(dev_priv__)->engine[I915_NUM_ENGINES]; \ tmp__ ? (engine__ = &(dev_priv__)->engine[__mask_next_bit(tmp__)]), 1 : 0; )
(engine__)++) \
for_each_if (((mask__) & intel_engine_flag(engine__)) && \
intel_engine_initialized(engine__))
enum hdmi_force_audio { enum hdmi_force_audio {
HDMI_AUDIO_OFF_DVI = -2, /* no aux data for HDMI-DVI converter */ HDMI_AUDIO_OFF_DVI = -2, /* no aux data for HDMI-DVI converter */
...@@ -2716,10 +2754,10 @@ struct drm_i915_cmd_table { ...@@ -2716,10 +2754,10 @@ struct drm_i915_cmd_table {
#define HAS_EDRAM(dev) (!!(__I915__(dev)->edram_cap & EDRAM_ENABLED)) #define HAS_EDRAM(dev) (!!(__I915__(dev)->edram_cap & EDRAM_ENABLED))
#define HAS_WT(dev) ((IS_HASWELL(dev) || IS_BROADWELL(dev)) && \ #define HAS_WT(dev) ((IS_HASWELL(dev) || IS_BROADWELL(dev)) && \
HAS_EDRAM(dev)) HAS_EDRAM(dev))
#define I915_NEED_GFX_HWS(dev) (INTEL_INFO(dev)->need_gfx_hws) #define HWS_NEEDS_PHYSICAL(dev) (INTEL_INFO(dev)->hws_needs_physical)
#define HAS_HW_CONTEXTS(dev) (INTEL_INFO(dev)->gen >= 6) #define HAS_HW_CONTEXTS(dev) (INTEL_INFO(dev)->has_hw_contexts)
#define HAS_LOGICAL_RING_CONTEXTS(dev) (INTEL_INFO(dev)->gen >= 8) #define HAS_LOGICAL_RING_CONTEXTS(dev) (INTEL_INFO(dev)->has_logical_ring_contexts)
#define USES_PPGTT(dev) (i915.enable_ppgtt) #define USES_PPGTT(dev) (i915.enable_ppgtt)
#define USES_FULL_PPGTT(dev) (i915.enable_ppgtt >= 2) #define USES_FULL_PPGTT(dev) (i915.enable_ppgtt >= 2)
#define USES_FULL_48BIT_PPGTT(dev) (i915.enable_ppgtt == 3) #define USES_FULL_48BIT_PPGTT(dev) (i915.enable_ppgtt == 3)
...@@ -2743,7 +2781,7 @@ struct drm_i915_cmd_table { ...@@ -2743,7 +2781,7 @@ struct drm_i915_cmd_table {
* interrupt source and so prevents the other device from working properly. * interrupt source and so prevents the other device from working properly.
*/ */
#define HAS_AUX_IRQ(dev) (INTEL_INFO(dev)->gen >= 5) #define HAS_AUX_IRQ(dev) (INTEL_INFO(dev)->gen >= 5)
#define HAS_GMBUS_IRQ(dev) (INTEL_INFO(dev)->gen >= 5) #define HAS_GMBUS_IRQ(dev) (INTEL_INFO(dev)->has_gmbus_irq)
/* With the 945 and later, Y tiling got adjusted so that it was 32 128-byte /* With the 945 and later, Y tiling got adjusted so that it was 32 128-byte
* rows, which changed the alignment requirements and fence programming. * rows, which changed the alignment requirements and fence programming.
...@@ -2759,38 +2797,27 @@ struct drm_i915_cmd_table { ...@@ -2759,38 +2797,27 @@ struct drm_i915_cmd_table {
#define HAS_IPS(dev) (IS_HSW_ULT(dev) || IS_BROADWELL(dev)) #define HAS_IPS(dev) (IS_HSW_ULT(dev) || IS_BROADWELL(dev))
#define HAS_DP_MST(dev) (IS_HASWELL(dev) || IS_BROADWELL(dev) || \ #define HAS_DP_MST(dev) (INTEL_INFO(dev)->has_dp_mst)
INTEL_INFO(dev)->gen >= 9)
#define HAS_DDI(dev) (INTEL_INFO(dev)->has_ddi) #define HAS_DDI(dev) (INTEL_INFO(dev)->has_ddi)
#define HAS_FPGA_DBG_UNCLAIMED(dev) (INTEL_INFO(dev)->has_fpga_dbg) #define HAS_FPGA_DBG_UNCLAIMED(dev) (INTEL_INFO(dev)->has_fpga_dbg)
#define HAS_PSR(dev) (IS_HASWELL(dev) || IS_BROADWELL(dev) || \ #define HAS_PSR(dev) (INTEL_INFO(dev)->has_psr)
IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev) || \ #define HAS_RUNTIME_PM(dev) (INTEL_INFO(dev)->has_runtime_pm)
IS_SKYLAKE(dev) || IS_KABYLAKE(dev)) #define HAS_RC6(dev) (INTEL_INFO(dev)->has_rc6)
#define HAS_RUNTIME_PM(dev) (IS_GEN6(dev) || IS_HASWELL(dev) || \ #define HAS_RC6p(dev) (INTEL_INFO(dev)->has_rc6p)
IS_BROADWELL(dev) || IS_VALLEYVIEW(dev) || \
IS_CHERRYVIEW(dev) || IS_SKYLAKE(dev) || \ #define HAS_CSR(dev) (INTEL_INFO(dev)->has_csr)
IS_KABYLAKE(dev) || IS_BROXTON(dev))
#define HAS_RC6(dev) (INTEL_INFO(dev)->gen >= 6)
#define HAS_RC6p(dev) (IS_GEN6(dev) || IS_IVYBRIDGE(dev))
#define HAS_CSR(dev) (IS_GEN9(dev))
/* /*
* For now, anything with a GuC requires uCode loading, and then supports * For now, anything with a GuC requires uCode loading, and then supports
* command submission once loaded. But these are logically independent * command submission once loaded. But these are logically independent
* properties, so we have separate macros to test them. * properties, so we have separate macros to test them.
*/ */
#define HAS_GUC(dev) (IS_GEN9(dev)) #define HAS_GUC(dev) (INTEL_INFO(dev)->has_guc)
#define HAS_GUC_UCODE(dev) (HAS_GUC(dev)) #define HAS_GUC_UCODE(dev) (HAS_GUC(dev))
#define HAS_GUC_SCHED(dev) (HAS_GUC(dev)) #define HAS_GUC_SCHED(dev) (HAS_GUC(dev))
#define HAS_RESOURCE_STREAMER(dev) (IS_HASWELL(dev) || \ #define HAS_RESOURCE_STREAMER(dev) (INTEL_INFO(dev)->has_resource_streamer)
INTEL_INFO(dev)->gen >= 8)
#define HAS_CORE_RING_FREQ(dev) (INTEL_INFO(dev)->gen >= 6 && \
!IS_VALLEYVIEW(dev) && !IS_CHERRYVIEW(dev) && \
!IS_BROXTON(dev))
#define HAS_POOLED_EU(dev) (INTEL_INFO(dev)->has_pooled_eu) #define HAS_POOLED_EU(dev) (INTEL_INFO(dev)->has_pooled_eu)
...@@ -2818,11 +2845,10 @@ struct drm_i915_cmd_table { ...@@ -2818,11 +2845,10 @@ struct drm_i915_cmd_table {
#define HAS_PCH_NOP(dev) (INTEL_PCH_TYPE(dev) == PCH_NOP) #define HAS_PCH_NOP(dev) (INTEL_PCH_TYPE(dev) == PCH_NOP)
#define HAS_PCH_SPLIT(dev) (INTEL_PCH_TYPE(dev) != PCH_NONE) #define HAS_PCH_SPLIT(dev) (INTEL_PCH_TYPE(dev) != PCH_NONE)
#define HAS_GMCH_DISPLAY(dev) (INTEL_INFO(dev)->gen < 5 || \ #define HAS_GMCH_DISPLAY(dev) (INTEL_INFO(dev)->has_gmch_display)
IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev))
/* DPF == dynamic parity feature */ /* DPF == dynamic parity feature */
#define HAS_L3_DPF(dev) (IS_IVYBRIDGE(dev) || IS_HASWELL(dev)) #define HAS_L3_DPF(dev) (INTEL_INFO(dev)->has_l3_dpf)
#define NUM_L3_SLICES(dev) (IS_HSW_GT3(dev) ? 2 : HAS_L3_DPF(dev)) #define NUM_L3_SLICES(dev) (IS_HSW_GT3(dev) ? 2 : HAS_L3_DPF(dev))
#define GT_FREQUENCY_MULTIPLIER 50 #define GT_FREQUENCY_MULTIPLIER 50
...@@ -2861,7 +2887,7 @@ extern long i915_compat_ioctl(struct file *filp, unsigned int cmd, ...@@ -2861,7 +2887,7 @@ extern long i915_compat_ioctl(struct file *filp, unsigned int cmd,
#endif #endif
extern int intel_gpu_reset(struct drm_i915_private *dev_priv, u32 engine_mask); extern int intel_gpu_reset(struct drm_i915_private *dev_priv, u32 engine_mask);
extern bool intel_has_gpu_reset(struct drm_i915_private *dev_priv); extern bool intel_has_gpu_reset(struct drm_i915_private *dev_priv);
extern int i915_reset(struct drm_i915_private *dev_priv); extern void i915_reset(struct drm_i915_private *dev_priv);
extern int intel_guc_reset(struct drm_i915_private *dev_priv); extern int intel_guc_reset(struct drm_i915_private *dev_priv);
extern void intel_engine_init_hangcheck(struct intel_engine_cs *engine); extern void intel_engine_init_hangcheck(struct intel_engine_cs *engine);
extern unsigned long i915_chipset_val(struct drm_i915_private *dev_priv); extern unsigned long i915_chipset_val(struct drm_i915_private *dev_priv);
...@@ -3197,8 +3223,6 @@ i915_gem_obj_finish_shmem_access(struct drm_i915_gem_object *obj) ...@@ -3197,8 +3223,6 @@ i915_gem_obj_finish_shmem_access(struct drm_i915_gem_object *obj)
} }
int __must_check i915_mutex_lock_interruptible(struct drm_device *dev); int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
int i915_gem_object_sync(struct drm_i915_gem_object *obj,
struct drm_i915_gem_request *to);
void i915_vma_move_to_active(struct i915_vma *vma, void i915_vma_move_to_active(struct i915_vma *vma,
struct drm_i915_gem_request *req, struct drm_i915_gem_request *req,
unsigned int flags); unsigned int flags);
...@@ -3207,6 +3231,7 @@ int i915_gem_dumb_create(struct drm_file *file_priv, ...@@ -3207,6 +3231,7 @@ int i915_gem_dumb_create(struct drm_file *file_priv,
struct drm_mode_create_dumb *args); struct drm_mode_create_dumb *args);
int i915_gem_mmap_gtt(struct drm_file *file_priv, struct drm_device *dev, int i915_gem_mmap_gtt(struct drm_file *file_priv, struct drm_device *dev,
uint32_t handle, uint64_t *offset); uint32_t handle, uint64_t *offset);
int i915_gem_mmap_gtt_version(void);
void i915_gem_track_fb(struct drm_i915_gem_object *old, void i915_gem_track_fb(struct drm_i915_gem_object *old,
struct drm_i915_gem_object *new, struct drm_i915_gem_object *new,
...@@ -3219,54 +3244,35 @@ i915_gem_find_active_request(struct intel_engine_cs *engine); ...@@ -3219,54 +3244,35 @@ i915_gem_find_active_request(struct intel_engine_cs *engine);
void i915_gem_retire_requests(struct drm_i915_private *dev_priv); void i915_gem_retire_requests(struct drm_i915_private *dev_priv);
static inline u32 i915_reset_counter(struct i915_gpu_error *error)
{
return atomic_read(&error->reset_counter);
}
static inline bool __i915_reset_in_progress(u32 reset)
{
return unlikely(reset & I915_RESET_IN_PROGRESS_FLAG);
}
static inline bool __i915_reset_in_progress_or_wedged(u32 reset)
{
return unlikely(reset & (I915_RESET_IN_PROGRESS_FLAG | I915_WEDGED));
}
static inline bool __i915_terminally_wedged(u32 reset)
{
return unlikely(reset & I915_WEDGED);
}
static inline bool i915_reset_in_progress(struct i915_gpu_error *error) static inline bool i915_reset_in_progress(struct i915_gpu_error *error)
{ {
return __i915_reset_in_progress(i915_reset_counter(error)); return unlikely(test_bit(I915_RESET_IN_PROGRESS, &error->flags));
} }
static inline bool i915_reset_in_progress_or_wedged(struct i915_gpu_error *error) static inline bool i915_terminally_wedged(struct i915_gpu_error *error)
{ {
return __i915_reset_in_progress_or_wedged(i915_reset_counter(error)); return unlikely(test_bit(I915_WEDGED, &error->flags));
} }
static inline bool i915_terminally_wedged(struct i915_gpu_error *error) static inline bool i915_reset_in_progress_or_wedged(struct i915_gpu_error *error)
{ {
return __i915_terminally_wedged(i915_reset_counter(error)); return i915_reset_in_progress(error) | i915_terminally_wedged(error);
} }
static inline u32 i915_reset_count(struct i915_gpu_error *error) static inline u32 i915_reset_count(struct i915_gpu_error *error)
{ {
return ((i915_reset_counter(error) & ~I915_WEDGED) + 1) / 2; return READ_ONCE(error->reset_count);
} }
void i915_gem_reset(struct drm_device *dev); void i915_gem_reset(struct drm_i915_private *dev_priv);
void i915_gem_set_wedged(struct drm_i915_private *dev_priv);
bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force); bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force);
int __must_check i915_gem_init(struct drm_device *dev); int __must_check i915_gem_init(struct drm_device *dev);
int __must_check i915_gem_init_hw(struct drm_device *dev); int __must_check i915_gem_init_hw(struct drm_device *dev);
void i915_gem_init_swizzling(struct drm_device *dev); void i915_gem_init_swizzling(struct drm_device *dev);
void i915_gem_cleanup_engines(struct drm_device *dev); void i915_gem_cleanup_engines(struct drm_device *dev);
int __must_check i915_gem_wait_for_idle(struct drm_i915_private *dev_priv, int __must_check i915_gem_wait_for_idle(struct drm_i915_private *dev_priv,
bool interruptible); unsigned int flags);
int __must_check i915_gem_suspend(struct drm_device *dev); int __must_check i915_gem_suspend(struct drm_device *dev);
void i915_gem_resume(struct drm_device *dev); void i915_gem_resume(struct drm_device *dev);
int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf); int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
...@@ -3388,7 +3394,6 @@ void i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj); ...@@ -3388,7 +3394,6 @@ void i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj);
int __must_check i915_gem_context_init(struct drm_device *dev); int __must_check i915_gem_context_init(struct drm_device *dev);
void i915_gem_context_lost(struct drm_i915_private *dev_priv); void i915_gem_context_lost(struct drm_i915_private *dev_priv);
void i915_gem_context_fini(struct drm_device *dev); void i915_gem_context_fini(struct drm_device *dev);
void i915_gem_context_reset(struct drm_device *dev);
int i915_gem_context_open(struct drm_device *dev, struct drm_file *file); int i915_gem_context_open(struct drm_device *dev, struct drm_file *file);
void i915_gem_context_close(struct drm_device *dev, struct drm_file *file); void i915_gem_context_close(struct drm_device *dev, struct drm_file *file);
int i915_switch_context(struct drm_i915_gem_request *req); int i915_switch_context(struct drm_i915_gem_request *req);
...@@ -3507,13 +3512,13 @@ static inline bool i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_objec ...@@ -3507,13 +3512,13 @@ static inline bool i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_objec
int i915_debugfs_register(struct drm_i915_private *dev_priv); int i915_debugfs_register(struct drm_i915_private *dev_priv);
void i915_debugfs_unregister(struct drm_i915_private *dev_priv); void i915_debugfs_unregister(struct drm_i915_private *dev_priv);
int i915_debugfs_connector_add(struct drm_connector *connector); int i915_debugfs_connector_add(struct drm_connector *connector);
void intel_display_crc_init(struct drm_device *dev); void intel_display_crc_init(struct drm_i915_private *dev_priv);
#else #else
static inline int i915_debugfs_register(struct drm_i915_private *dev_priv) {return 0;} static inline int i915_debugfs_register(struct drm_i915_private *dev_priv) {return 0;}
static inline void i915_debugfs_unregister(struct drm_i915_private *dev_priv) {} static inline void i915_debugfs_unregister(struct drm_i915_private *dev_priv) {}
static inline int i915_debugfs_connector_add(struct drm_connector *connector) static inline int i915_debugfs_connector_add(struct drm_connector *connector)
{ return 0; } { return 0; }
static inline void intel_display_crc_init(struct drm_device *dev) {} static inline void intel_display_crc_init(struct drm_i915_private *dev_priv) {}
#endif #endif
/* i915_gpu_error.c */ /* i915_gpu_error.c */
...@@ -3557,8 +3562,8 @@ extern int i915_save_state(struct drm_device *dev); ...@@ -3557,8 +3562,8 @@ extern int i915_save_state(struct drm_device *dev);
extern int i915_restore_state(struct drm_device *dev); extern int i915_restore_state(struct drm_device *dev);
/* i915_sysfs.c */ /* i915_sysfs.c */
void i915_setup_sysfs(struct drm_device *dev_priv); void i915_setup_sysfs(struct drm_i915_private *dev_priv);
void i915_teardown_sysfs(struct drm_device *dev_priv); void i915_teardown_sysfs(struct drm_i915_private *dev_priv);
/* intel_i2c.c */ /* intel_i2c.c */
extern int intel_setup_gmbus(struct drm_device *dev); extern int intel_setup_gmbus(struct drm_device *dev);
...@@ -3735,9 +3740,16 @@ int intel_freq_opcode(struct drm_i915_private *dev_priv, int val); ...@@ -3735,9 +3740,16 @@ int intel_freq_opcode(struct drm_i915_private *dev_priv, int val);
* will be implemented using 2 32-bit writes in an arbitrary order with * will be implemented using 2 32-bit writes in an arbitrary order with
* an arbitrary delay between them. This can cause the hardware to * an arbitrary delay between them. This can cause the hardware to
* act upon the intermediate value, possibly leading to corruption and * act upon the intermediate value, possibly leading to corruption and
* machine death. You have been warned. * machine death. For this reason we do not support I915_WRITE64, or
* dev_priv->uncore.funcs.mmio_writeq.
*
* When reading a 64-bit value as two 32-bit values, the delay may cause
* the two reads to mismatch, e.g. a timestamp overflowing. Also note that
* occasionally a 64-bit register does not actualy support a full readq
* and must be read using two 32-bit reads.
*
* You have been warned.
*/ */
#define I915_WRITE64(reg, val) dev_priv->uncore.funcs.mmio_writeq(dev_priv, (reg), (val), true)
#define I915_READ64(reg) dev_priv->uncore.funcs.mmio_readq(dev_priv, (reg), true) #define I915_READ64(reg) dev_priv->uncore.funcs.mmio_readq(dev_priv, (reg), true)
#define I915_READ64_2x32(lower_reg, upper_reg) ({ \ #define I915_READ64_2x32(lower_reg, upper_reg) ({ \
...@@ -3780,7 +3792,7 @@ __raw_write(64, q) ...@@ -3780,7 +3792,7 @@ __raw_write(64, q)
#undef __raw_write #undef __raw_write
/* These are untraced mmio-accessors that are only valid to be used inside /* These are untraced mmio-accessors that are only valid to be used inside
* criticial sections inside IRQ handlers where forcewake is explicitly * critical sections inside IRQ handlers where forcewake is explicitly
* controlled. * controlled.
* Think twice, and think again, before using these. * Think twice, and think again, before using these.
* Note: Should only be used between intel_uncore_forcewake_irqlock() and * Note: Should only be used between intel_uncore_forcewake_irqlock() and
...@@ -3852,7 +3864,9 @@ wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms) ...@@ -3852,7 +3864,9 @@ wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms)
schedule_timeout_uninterruptible(remaining_jiffies); schedule_timeout_uninterruptible(remaining_jiffies);
} }
} }
static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req)
static inline bool
__i915_request_irq_complete(struct drm_i915_gem_request *req)
{ {
struct intel_engine_cs *engine = req->engine; struct intel_engine_cs *engine = req->engine;
...@@ -3914,17 +3928,6 @@ static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req) ...@@ -3914,17 +3928,6 @@ static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req)
return true; return true;
} }
/* We need to check whether any gpu reset happened in between
* the request being submitted and now. If a reset has occurred,
* the seqno will have been advance past ours and our request
* is complete. If we are in the process of handling a reset,
* the request is effectively complete as the rendering will
* be discarded, but we need to return in order to drop the
* struct_mutex.
*/
if (i915_reset_in_progress(&req->i915->gpu_error))
return true;
return false; return false;
} }
......
...@@ -386,7 +386,8 @@ __unsafe_wait_rendering(struct drm_i915_gem_object *obj, ...@@ -386,7 +386,8 @@ __unsafe_wait_rendering(struct drm_i915_gem_object *obj,
int ret; int ret;
ret = i915_gem_active_wait_unlocked(&active[idx], ret = i915_gem_active_wait_unlocked(&active[idx],
true, NULL, rps); I915_WAIT_INTERRUPTIBLE,
NULL, rps);
if (ret) if (ret)
return ret; return ret;
} }
...@@ -1679,6 +1680,56 @@ static unsigned int tile_row_pages(struct drm_i915_gem_object *obj) ...@@ -1679,6 +1680,56 @@ static unsigned int tile_row_pages(struct drm_i915_gem_object *obj)
return size >> PAGE_SHIFT; return size >> PAGE_SHIFT;
} }
/**
* i915_gem_mmap_gtt_version - report the current feature set for GTT mmaps
*
* A history of the GTT mmap interface:
*
* 0 - Everything had to fit into the GTT. Both parties of a memcpy had to
* aligned and suitable for fencing, and still fit into the available
* mappable space left by the pinned display objects. A classic problem
* we called the page-fault-of-doom where we would ping-pong between
* two objects that could not fit inside the GTT and so the memcpy
* would page one object in at the expense of the other between every
* single byte.
*
* 1 - Objects can be any size, and have any compatible fencing (X Y, or none
* as set via i915_gem_set_tiling() [DRM_I915_GEM_SET_TILING]). If the
* object is too large for the available space (or simply too large
* for the mappable aperture!), a view is created instead and faulted
* into userspace. (This view is aligned and sized appropriately for
* fenced access.)
*
* Restrictions:
*
* * snoopable objects cannot be accessed via the GTT. It can cause machine
* hangs on some architectures, corruption on others. An attempt to service
* a GTT page fault from a snoopable object will generate a SIGBUS.
*
* * the object must be able to fit into RAM (physical memory, though no
* limited to the mappable aperture).
*
*
* Caveats:
*
* * a new GTT page fault will synchronize rendering from the GPU and flush
* all data to system memory. Subsequent access will not be synchronized.
*
* * all mappings are revoked on runtime device suspend.
*
* * there are only 8, 16 or 32 fence registers to share between all users
* (older machines require fence register for display and blitter access
* as well). Contention of the fence registers will cause the previous users
* to be unmapped and any new access will generate new page faults.
*
* * running out of memory while servicing a fault may generate a SIGBUS,
* rather than the expected SIGSEGV.
*/
int i915_gem_mmap_gtt_version(void)
{
return 1;
}
/** /**
* i915_gem_fault - fault a page into the GTT * i915_gem_fault - fault a page into the GTT
* @area: CPU VMA in question * @area: CPU VMA in question
...@@ -1694,6 +1745,9 @@ static unsigned int tile_row_pages(struct drm_i915_gem_object *obj) ...@@ -1694,6 +1745,9 @@ static unsigned int tile_row_pages(struct drm_i915_gem_object *obj)
* from the GTT and/or fence registers to make room. So performance may * from the GTT and/or fence registers to make room. So performance may
* suffer if the GTT working set is large or there are few fence registers * suffer if the GTT working set is large or there are few fence registers
* left. * left.
*
* The current feature set supported by i915_gem_fault() and thus GTT mmaps
* is exposed via I915_PARAM_MMAP_GTT_VERSION (see i915_gem_mmap_gtt_version).
*/ */
int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf) int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf)
{ {
...@@ -1973,7 +2027,7 @@ static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj) ...@@ -1973,7 +2027,7 @@ static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj)
* to claim that space for ourselves, we need to take the big * to claim that space for ourselves, we need to take the big
* struct_mutex to free the requests+objects and allocate our slot. * struct_mutex to free the requests+objects and allocate our slot.
*/ */
err = i915_gem_wait_for_idle(dev_priv, true); err = i915_gem_wait_for_idle(dev_priv, I915_WAIT_INTERRUPTIBLE);
if (err) if (err)
return err; return err;
...@@ -2495,32 +2549,94 @@ i915_gem_find_active_request(struct intel_engine_cs *engine) ...@@ -2495,32 +2549,94 @@ i915_gem_find_active_request(struct intel_engine_cs *engine)
if (i915_gem_request_completed(request)) if (i915_gem_request_completed(request))
continue; continue;
if (!i915_sw_fence_done(&request->submit))
break;
return request; return request;
} }
return NULL; return NULL;
} }
static void i915_gem_reset_engine_status(struct intel_engine_cs *engine) static void reset_request(struct drm_i915_gem_request *request)
{
void *vaddr = request->ring->vaddr;
u32 head;
/* As this request likely depends on state from the lost
* context, clear out all the user operations leaving the
* breadcrumb at the end (so we get the fence notifications).
*/
head = request->head;
if (request->postfix < head) {
memset(vaddr + head, 0, request->ring->size - head);
head = 0;
}
memset(vaddr + head, 0, request->postfix - head);
}
static void i915_gem_reset_engine(struct intel_engine_cs *engine)
{ {
struct drm_i915_gem_request *request; struct drm_i915_gem_request *request;
struct i915_gem_context *incomplete_ctx;
bool ring_hung; bool ring_hung;
/* Ensure irq handler finishes, and not run again. */
tasklet_kill(&engine->irq_tasklet);
if (engine->irq_seqno_barrier)
engine->irq_seqno_barrier(engine);
request = i915_gem_find_active_request(engine); request = i915_gem_find_active_request(engine);
if (request == NULL) if (!request)
return; return;
ring_hung = engine->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG; ring_hung = engine->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG;
i915_set_reset_status(request->ctx, ring_hung); i915_set_reset_status(request->ctx, ring_hung);
if (!ring_hung)
return;
DRM_DEBUG_DRIVER("resetting %s to restart from tail of request 0x%x\n",
engine->name, request->fence.seqno);
/* Setup the CS to resume from the breadcrumb of the hung request */
engine->reset_hw(engine, request);
/* Users of the default context do not rely on logical state
* preserved between batches. They have to emit full state on
* every batch and so it is safe to execute queued requests following
* the hang.
*
* Other contexts preserve state, now corrupt. We want to skip all
* queued requests that reference the corrupt context.
*/
incomplete_ctx = request->ctx;
if (i915_gem_context_is_default(incomplete_ctx))
return;
list_for_each_entry_continue(request, &engine->request_list, link) list_for_each_entry_continue(request, &engine->request_list, link)
i915_set_reset_status(request->ctx, false); if (request->ctx == incomplete_ctx)
reset_request(request);
} }
static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine) void i915_gem_reset(struct drm_i915_private *dev_priv)
{ {
struct drm_i915_gem_request *request; struct intel_engine_cs *engine;
struct intel_ring *ring;
i915_gem_retire_requests(dev_priv);
for_each_engine(engine, dev_priv)
i915_gem_reset_engine(engine);
i915_gem_restore_fences(&dev_priv->drm);
}
static void nop_submit_request(struct drm_i915_gem_request *request)
{
}
static void i915_gem_cleanup_engine(struct intel_engine_cs *engine)
{
engine->submit_request = nop_submit_request;
/* Mark all pending requests as complete so that any concurrent /* Mark all pending requests as complete so that any concurrent
* (lockless) lookup doesn't try and wait upon the request as we * (lockless) lookup doesn't try and wait upon the request as we
...@@ -2535,60 +2651,30 @@ static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine) ...@@ -2535,60 +2651,30 @@ static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine)
*/ */
if (i915.enable_execlists) { if (i915.enable_execlists) {
/* Ensure irq handler finishes or is cancelled. */ spin_lock(&engine->execlist_lock);
tasklet_kill(&engine->irq_tasklet); INIT_LIST_HEAD(&engine->execlist_queue);
i915_gem_request_put(engine->execlist_port[0].request);
intel_execlists_cancel_requests(engine); i915_gem_request_put(engine->execlist_port[1].request);
} memset(engine->execlist_port, 0, sizeof(engine->execlist_port));
spin_unlock(&engine->execlist_lock);
/*
* We must free the requests after all the corresponding objects have
* been moved off active lists. Which is the same order as the normal
* retire_requests function does. This is important if object hold
* implicit references on things like e.g. ppgtt address spaces through
* the request.
*/
request = i915_gem_active_raw(&engine->last_request,
&engine->i915->drm.struct_mutex);
if (request)
i915_gem_request_retire_upto(request);
GEM_BUG_ON(intel_engine_is_active(engine));
/* Having flushed all requests from all queues, we know that all
* ringbuffers must now be empty. However, since we do not reclaim
* all space when retiring the request (to prevent HEADs colliding
* with rapid ringbuffer wraparound) the amount of available space
* upon reset is less than when we start. Do one more pass over
* all the ringbuffers to reset last_retired_head.
*/
list_for_each_entry(ring, &engine->buffers, link) {
ring->last_retired_head = ring->tail;
intel_ring_update_space(ring);
} }
engine->i915->gt.active_engines &= ~intel_engine_flag(engine); engine->i915->gt.active_engines &= ~intel_engine_flag(engine);
} }
void i915_gem_reset(struct drm_device *dev) void i915_gem_set_wedged(struct drm_i915_private *dev_priv)
{ {
struct drm_i915_private *dev_priv = to_i915(dev);
struct intel_engine_cs *engine; struct intel_engine_cs *engine;
/* lockdep_assert_held(&dev_priv->drm.struct_mutex);
* Before we free the objects from the requests, we need to inspect set_bit(I915_WEDGED, &dev_priv->gpu_error.flags);
* them for finding the guilty party. As the requests only borrow
* their reference to the objects, the inspection must be done first.
*/
for_each_engine(engine, dev_priv)
i915_gem_reset_engine_status(engine);
i915_gem_context_lost(dev_priv);
for_each_engine(engine, dev_priv) for_each_engine(engine, dev_priv)
i915_gem_reset_engine_cleanup(engine); i915_gem_cleanup_engine(engine);
mod_delayed_work(dev_priv->wq, &dev_priv->gt.idle_work, 0); mod_delayed_work(dev_priv->wq, &dev_priv->gt.idle_work, 0);
i915_gem_context_reset(dev); i915_gem_retire_requests(dev_priv);
i915_gem_restore_fences(dev);
} }
static void static void
...@@ -2721,7 +2807,8 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) ...@@ -2721,7 +2807,8 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
active = __I915_BO_ACTIVE(obj); active = __I915_BO_ACTIVE(obj);
for_each_active(active, idx) { for_each_active(active, idx) {
s64 *timeout = args->timeout_ns >= 0 ? &args->timeout_ns : NULL; s64 *timeout = args->timeout_ns >= 0 ? &args->timeout_ns : NULL;
ret = i915_gem_active_wait_unlocked(&obj->last_read[idx], true, ret = i915_gem_active_wait_unlocked(&obj->last_read[idx],
I915_WAIT_INTERRUPTIBLE,
timeout, rps); timeout, rps);
if (ret) if (ret)
break; break;
...@@ -2731,96 +2818,6 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) ...@@ -2731,96 +2818,6 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
return ret; return ret;
} }
static int
__i915_gem_object_sync(struct drm_i915_gem_request *to,
struct drm_i915_gem_request *from)
{
int ret;
if (to->engine == from->engine)
return 0;
if (!i915.semaphores) {
ret = i915_wait_request(from,
from->i915->mm.interruptible,
NULL,
NO_WAITBOOST);
if (ret)
return ret;
} else {
int idx = intel_engine_sync_index(from->engine, to->engine);
if (from->fence.seqno <= from->engine->semaphore.sync_seqno[idx])
return 0;
trace_i915_gem_ring_sync_to(to, from);
ret = to->engine->semaphore.sync_to(to, from);
if (ret)
return ret;
from->engine->semaphore.sync_seqno[idx] = from->fence.seqno;
}
return 0;
}
/**
* i915_gem_object_sync - sync an object to a ring.
*
* @obj: object which may be in use on another ring.
* @to: request we are wishing to use
*
* This code is meant to abstract object synchronization with the GPU.
* Conceptually we serialise writes between engines inside the GPU.
* We only allow one engine to write into a buffer at any time, but
* multiple readers. To ensure each has a coherent view of memory, we must:
*
* - If there is an outstanding write request to the object, the new
* request must wait for it to complete (either CPU or in hw, requests
* on the same ring will be naturally ordered).
*
* - If we are a write request (pending_write_domain is set), the new
* request must wait for outstanding read requests to complete.
*
* Returns 0 if successful, else propagates up the lower layer error.
*/
int
i915_gem_object_sync(struct drm_i915_gem_object *obj,
struct drm_i915_gem_request *to)
{
struct i915_gem_active *active;
unsigned long active_mask;
int idx;
lockdep_assert_held(&obj->base.dev->struct_mutex);
active_mask = i915_gem_object_get_active(obj);
if (!active_mask)
return 0;
if (obj->base.pending_write_domain) {
active = obj->last_read;
} else {
active_mask = 1;
active = &obj->last_write;
}
for_each_active(active_mask, idx) {
struct drm_i915_gem_request *request;
int ret;
request = i915_gem_active_peek(&active[idx],
&obj->base.dev->struct_mutex);
if (!request)
continue;
ret = __i915_gem_object_sync(to, request);
if (ret)
return ret;
}
return 0;
}
static void __i915_vma_iounmap(struct i915_vma *vma) static void __i915_vma_iounmap(struct i915_vma *vma)
{ {
GEM_BUG_ON(i915_vma_is_pinned(vma)); GEM_BUG_ON(i915_vma_is_pinned(vma));
...@@ -2924,7 +2921,7 @@ int i915_vma_unbind(struct i915_vma *vma) ...@@ -2924,7 +2921,7 @@ int i915_vma_unbind(struct i915_vma *vma)
} }
int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv, int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv,
bool interruptible) unsigned int flags)
{ {
struct intel_engine_cs *engine; struct intel_engine_cs *engine;
int ret; int ret;
...@@ -2933,7 +2930,7 @@ int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv, ...@@ -2933,7 +2930,7 @@ int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv,
if (engine->last_context == NULL) if (engine->last_context == NULL)
continue; continue;
ret = intel_engine_idle(engine, interruptible); ret = intel_engine_idle(engine, flags);
if (ret) if (ret)
return ret; return ret;
} }
...@@ -3688,7 +3685,7 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) ...@@ -3688,7 +3685,7 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
if (target == NULL) if (target == NULL)
return 0; return 0;
ret = i915_wait_request(target, true, NULL, NULL); ret = i915_wait_request(target, I915_WAIT_INTERRUPTIBLE, NULL, NULL);
i915_gem_request_put(target); i915_gem_request_put(target);
return ret; return ret;
...@@ -4244,7 +4241,9 @@ int i915_gem_suspend(struct drm_device *dev) ...@@ -4244,7 +4241,9 @@ int i915_gem_suspend(struct drm_device *dev)
if (ret) if (ret)
goto err; goto err;
ret = i915_gem_wait_for_idle(dev_priv, true); ret = i915_gem_wait_for_idle(dev_priv,
I915_WAIT_INTERRUPTIBLE |
I915_WAIT_LOCKED);
if (ret) if (ret)
goto err; goto err;
...@@ -4280,8 +4279,7 @@ void i915_gem_resume(struct drm_device *dev) ...@@ -4280,8 +4279,7 @@ void i915_gem_resume(struct drm_device *dev)
* guarantee that the context image is complete. So let's just reset * guarantee that the context image is complete. So let's just reset
* it and start again. * it and start again.
*/ */
if (i915.enable_execlists) dev_priv->gt.resume(dev_priv);
intel_lr_context_reset(dev_priv, dev_priv->kernel_context);
mutex_unlock(&dev->struct_mutex); mutex_unlock(&dev->struct_mutex);
} }
...@@ -4433,8 +4431,10 @@ int i915_gem_init(struct drm_device *dev) ...@@ -4433,8 +4431,10 @@ int i915_gem_init(struct drm_device *dev)
mutex_lock(&dev->struct_mutex); mutex_lock(&dev->struct_mutex);
if (!i915.enable_execlists) { if (!i915.enable_execlists) {
dev_priv->gt.resume = intel_legacy_submission_resume;
dev_priv->gt.cleanup_engine = intel_engine_cleanup; dev_priv->gt.cleanup_engine = intel_engine_cleanup;
} else { } else {
dev_priv->gt.resume = intel_lr_context_resume;
dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup; dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup;
} }
...@@ -4467,7 +4467,7 @@ int i915_gem_init(struct drm_device *dev) ...@@ -4467,7 +4467,7 @@ int i915_gem_init(struct drm_device *dev)
* for all other failure, such as an allocation failure, bail. * for all other failure, such as an allocation failure, bail.
*/ */
DRM_ERROR("Failed to initialize GPU, declaring it wedged\n"); DRM_ERROR("Failed to initialize GPU, declaring it wedged\n");
atomic_or(I915_WEDGED, &dev_priv->gpu_error.reset_counter); i915_gem_set_wedged(dev_priv);
ret = 0; ret = 0;
} }
...@@ -4569,6 +4569,8 @@ i915_gem_load_init(struct drm_device *dev) ...@@ -4569,6 +4569,8 @@ i915_gem_load_init(struct drm_device *dev)
dev_priv->mm.interruptible = true; dev_priv->mm.interruptible = true;
atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0);
spin_lock_init(&dev_priv->fb_tracking.lock); spin_lock_init(&dev_priv->fb_tracking.lock);
} }
...@@ -4587,6 +4589,11 @@ void i915_gem_load_cleanup(struct drm_device *dev) ...@@ -4587,6 +4589,11 @@ void i915_gem_load_cleanup(struct drm_device *dev)
int i915_gem_freeze_late(struct drm_i915_private *dev_priv) int i915_gem_freeze_late(struct drm_i915_private *dev_priv)
{ {
struct drm_i915_gem_object *obj; struct drm_i915_gem_object *obj;
struct list_head *phases[] = {
&dev_priv->mm.unbound_list,
&dev_priv->mm.bound_list,
NULL
}, **p;
/* Called just before we write the hibernation image. /* Called just before we write the hibernation image.
* *
...@@ -4597,17 +4604,19 @@ int i915_gem_freeze_late(struct drm_i915_private *dev_priv) ...@@ -4597,17 +4604,19 @@ int i915_gem_freeze_late(struct drm_i915_private *dev_priv)
* *
* To make sure the hibernation image contains the latest state, * To make sure the hibernation image contains the latest state,
* we update that state just before writing out the image. * we update that state just before writing out the image.
*
* To try and reduce the hibernation image, we manually shrink
* the objects as well.
*/ */
list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list) { i915_gem_shrink_all(dev_priv);
obj->base.read_domains = I915_GEM_DOMAIN_CPU;
obj->base.write_domain = I915_GEM_DOMAIN_CPU;
}
list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { for (p = phases; *p; p++) {
list_for_each_entry(obj, *p, global_list) {
obj->base.read_domains = I915_GEM_DOMAIN_CPU; obj->base.read_domains = I915_GEM_DOMAIN_CPU;
obj->base.write_domain = I915_GEM_DOMAIN_CPU; obj->base.write_domain = I915_GEM_DOMAIN_CPU;
} }
}
return 0; return 0;
} }
......
...@@ -420,22 +420,6 @@ static void i915_gem_context_unpin(struct i915_gem_context *ctx, ...@@ -420,22 +420,6 @@ static void i915_gem_context_unpin(struct i915_gem_context *ctx,
} }
} }
void i915_gem_context_reset(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = to_i915(dev);
lockdep_assert_held(&dev->struct_mutex);
if (i915.enable_execlists) {
struct i915_gem_context *ctx;
list_for_each_entry(ctx, &dev_priv->context_list, link)
intel_lr_context_reset(dev_priv, ctx);
}
i915_gem_context_lost(dev_priv);
}
int i915_gem_context_init(struct drm_device *dev) int i915_gem_context_init(struct drm_device *dev)
{ {
struct drm_i915_private *dev_priv = to_i915(dev); struct drm_i915_private *dev_priv = to_i915(dev);
......
...@@ -170,7 +170,9 @@ i915_gem_evict_something(struct i915_address_space *vm, ...@@ -170,7 +170,9 @@ i915_gem_evict_something(struct i915_address_space *vm,
if (ret) if (ret)
return ret; return ret;
ret = i915_gem_wait_for_idle(dev_priv, true); ret = i915_gem_wait_for_idle(dev_priv,
I915_WAIT_INTERRUPTIBLE |
I915_WAIT_LOCKED);
if (ret) if (ret)
return ret; return ret;
...@@ -275,7 +277,9 @@ int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle) ...@@ -275,7 +277,9 @@ int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle)
return ret; return ret;
} }
ret = i915_gem_wait_for_idle(dev_priv, true); ret = i915_gem_wait_for_idle(dev_priv,
I915_WAIT_INTERRUPTIBLE |
I915_WAIT_LOCKED);
if (ret) if (ret)
return ret; return ret;
......
...@@ -1131,13 +1131,25 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req, ...@@ -1131,13 +1131,25 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
list_for_each_entry(vma, vmas, exec_list) { list_for_each_entry(vma, vmas, exec_list) {
struct drm_i915_gem_object *obj = vma->obj; struct drm_i915_gem_object *obj = vma->obj;
struct reservation_object *resv;
if (obj->flags & other_rings) { if (obj->flags & other_rings) {
ret = i915_gem_object_sync(obj, req); ret = i915_gem_request_await_object
(req, obj, obj->base.pending_write_domain);
if (ret) if (ret)
return ret; return ret;
} }
resv = i915_gem_object_get_dmabuf_resv(obj);
if (resv) {
ret = i915_sw_fence_await_reservation
(&req->submit, resv, &i915_fence_ops,
obj->base.pending_write_domain, 10*HZ,
GFP_KERNEL | __GFP_NOWARN);
if (ret < 0)
return ret;
}
if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
i915_gem_clflush_object(obj, false); i915_gem_clflush_object(obj, false);
} }
...@@ -1253,12 +1265,9 @@ static struct i915_gem_context * ...@@ -1253,12 +1265,9 @@ static struct i915_gem_context *
i915_gem_validate_context(struct drm_device *dev, struct drm_file *file, i915_gem_validate_context(struct drm_device *dev, struct drm_file *file,
struct intel_engine_cs *engine, const u32 ctx_id) struct intel_engine_cs *engine, const u32 ctx_id)
{ {
struct i915_gem_context *ctx = NULL; struct i915_gem_context *ctx;
struct i915_ctx_hang_stats *hs; struct i915_ctx_hang_stats *hs;
if (engine->id != RCS && ctx_id != DEFAULT_CONTEXT_HANDLE)
return ERR_PTR(-EINVAL);
ctx = i915_gem_context_lookup(file->driver_priv, ctx_id); ctx = i915_gem_context_lookup(file->driver_priv, ctx_id);
if (IS_ERR(ctx)) if (IS_ERR(ctx))
return ctx; return ctx;
...@@ -1538,13 +1547,9 @@ gen8_dispatch_bsd_engine(struct drm_i915_private *dev_priv, ...@@ -1538,13 +1547,9 @@ gen8_dispatch_bsd_engine(struct drm_i915_private *dev_priv,
struct drm_i915_file_private *file_priv = file->driver_priv; struct drm_i915_file_private *file_priv = file->driver_priv;
/* Check whether the file_priv has already selected one ring. */ /* Check whether the file_priv has already selected one ring. */
if ((int)file_priv->bsd_engine < 0) { if ((int)file_priv->bsd_engine < 0)
/* If not, use the ping-pong mechanism to select one. */ file_priv->bsd_engine = atomic_fetch_xor(1,
mutex_lock(&dev_priv->drm.struct_mutex); &dev_priv->mm.bsd_engine_dispatch_index);
file_priv->bsd_engine = dev_priv->mm.bsd_engine_dispatch_index;
dev_priv->mm.bsd_engine_dispatch_index ^= 1;
mutex_unlock(&dev_priv->drm.struct_mutex);
}
return file_priv->bsd_engine; return file_priv->bsd_engine;
} }
......
...@@ -32,6 +32,8 @@ ...@@ -32,6 +32,8 @@
#include "i915_trace.h" #include "i915_trace.h"
#include "intel_drv.h" #include "intel_drv.h"
#define I915_GFP_DMA (GFP_KERNEL | __GFP_HIGHMEM)
/** /**
* DOC: Global GTT views * DOC: Global GTT views
* *
...@@ -122,8 +124,11 @@ int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv, ...@@ -122,8 +124,11 @@ int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv,
has_full_48bit_ppgtt = has_full_48bit_ppgtt =
IS_BROADWELL(dev_priv) || INTEL_GEN(dev_priv) >= 9; IS_BROADWELL(dev_priv) || INTEL_GEN(dev_priv) >= 9;
if (intel_vgpu_active(dev_priv)) if (intel_vgpu_active(dev_priv)) {
has_full_ppgtt = false; /* emulation is too hard */ /* emulation is too hard */
has_full_ppgtt = false;
has_full_48bit_ppgtt = false;
}
if (!has_aliasing_ppgtt) if (!has_aliasing_ppgtt)
return 0; return 0;
...@@ -158,7 +163,7 @@ int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv, ...@@ -158,7 +163,7 @@ int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv,
return 0; return 0;
} }
if (INTEL_GEN(dev_priv) >= 8 && i915.enable_execlists) if (INTEL_GEN(dev_priv) >= 8 && i915.enable_execlists && has_full_ppgtt)
return has_full_48bit_ppgtt ? 3 : 2; return has_full_48bit_ppgtt ? 3 : 2;
else else
return has_aliasing_ppgtt ? 1 : 0; return has_aliasing_ppgtt ? 1 : 0;
...@@ -326,16 +331,16 @@ static gen6_pte_t iris_pte_encode(dma_addr_t addr, ...@@ -326,16 +331,16 @@ static gen6_pte_t iris_pte_encode(dma_addr_t addr,
static int __setup_page_dma(struct drm_device *dev, static int __setup_page_dma(struct drm_device *dev,
struct i915_page_dma *p, gfp_t flags) struct i915_page_dma *p, gfp_t flags)
{ {
struct device *device = &dev->pdev->dev; struct device *kdev = &dev->pdev->dev;
p->page = alloc_page(flags); p->page = alloc_page(flags);
if (!p->page) if (!p->page)
return -ENOMEM; return -ENOMEM;
p->daddr = dma_map_page(device, p->daddr = dma_map_page(kdev,
p->page, 0, 4096, PCI_DMA_BIDIRECTIONAL); p->page, 0, 4096, PCI_DMA_BIDIRECTIONAL);
if (dma_mapping_error(device, p->daddr)) { if (dma_mapping_error(kdev, p->daddr)) {
__free_page(p->page); __free_page(p->page);
return -EINVAL; return -EINVAL;
} }
...@@ -345,15 +350,17 @@ static int __setup_page_dma(struct drm_device *dev, ...@@ -345,15 +350,17 @@ static int __setup_page_dma(struct drm_device *dev,
static int setup_page_dma(struct drm_device *dev, struct i915_page_dma *p) static int setup_page_dma(struct drm_device *dev, struct i915_page_dma *p)
{ {
return __setup_page_dma(dev, p, GFP_KERNEL); return __setup_page_dma(dev, p, I915_GFP_DMA);
} }
static void cleanup_page_dma(struct drm_device *dev, struct i915_page_dma *p) static void cleanup_page_dma(struct drm_device *dev, struct i915_page_dma *p)
{ {
struct pci_dev *pdev = dev->pdev;
if (WARN_ON(!p->page)) if (WARN_ON(!p->page))
return; return;
dma_unmap_page(&dev->pdev->dev, p->daddr, 4096, PCI_DMA_BIDIRECTIONAL); dma_unmap_page(&pdev->dev, p->daddr, 4096, PCI_DMA_BIDIRECTIONAL);
__free_page(p->page); __free_page(p->page);
memset(p, 0, sizeof(*p)); memset(p, 0, sizeof(*p));
} }
...@@ -407,33 +414,18 @@ static void fill_page_dma_32(struct drm_device *dev, struct i915_page_dma *p, ...@@ -407,33 +414,18 @@ static void fill_page_dma_32(struct drm_device *dev, struct i915_page_dma *p,
fill_page_dma(dev, p, v); fill_page_dma(dev, p, v);
} }
static struct i915_page_scratch *alloc_scratch_page(struct drm_device *dev) static int
setup_scratch_page(struct drm_device *dev,
struct i915_page_dma *scratch,
gfp_t gfp)
{ {
struct i915_page_scratch *sp; return __setup_page_dma(dev, scratch, gfp | __GFP_ZERO);
int ret;
sp = kzalloc(sizeof(*sp), GFP_KERNEL);
if (sp == NULL)
return ERR_PTR(-ENOMEM);
ret = __setup_page_dma(dev, px_base(sp), GFP_DMA32 | __GFP_ZERO);
if (ret) {
kfree(sp);
return ERR_PTR(ret);
}
set_pages_uc(px_page(sp), 1);
return sp;
} }
static void free_scratch_page(struct drm_device *dev, static void cleanup_scratch_page(struct drm_device *dev,
struct i915_page_scratch *sp) struct i915_page_dma *scratch)
{ {
set_pages_wb(px_page(sp), 1); cleanup_page_dma(dev, scratch);
cleanup_px(dev, sp);
kfree(sp);
} }
static struct i915_page_table *alloc_pt(struct drm_device *dev) static struct i915_page_table *alloc_pt(struct drm_device *dev)
...@@ -479,7 +471,7 @@ static void gen8_initialize_pt(struct i915_address_space *vm, ...@@ -479,7 +471,7 @@ static void gen8_initialize_pt(struct i915_address_space *vm,
{ {
gen8_pte_t scratch_pte; gen8_pte_t scratch_pte;
scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page), scratch_pte = gen8_pte_encode(vm->scratch_page.daddr,
I915_CACHE_LLC, true); I915_CACHE_LLC, true);
fill_px(vm->dev, pt, scratch_pte); fill_px(vm->dev, pt, scratch_pte);
...@@ -490,9 +482,9 @@ static void gen6_initialize_pt(struct i915_address_space *vm, ...@@ -490,9 +482,9 @@ static void gen6_initialize_pt(struct i915_address_space *vm,
{ {
gen6_pte_t scratch_pte; gen6_pte_t scratch_pte;
WARN_ON(px_dma(vm->scratch_page) == 0); WARN_ON(vm->scratch_page.daddr == 0);
scratch_pte = vm->pte_encode(px_dma(vm->scratch_page), scratch_pte = vm->pte_encode(vm->scratch_page.daddr,
I915_CACHE_LLC, true, 0); I915_CACHE_LLC, true, 0);
fill32_px(vm->dev, pt, scratch_pte); fill32_px(vm->dev, pt, scratch_pte);
...@@ -776,7 +768,7 @@ static void gen8_ppgtt_clear_range(struct i915_address_space *vm, ...@@ -776,7 +768,7 @@ static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
bool use_scratch) bool use_scratch)
{ {
struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
gen8_pte_t scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page), gen8_pte_t scratch_pte = gen8_pte_encode(vm->scratch_page.daddr,
I915_CACHE_LLC, use_scratch); I915_CACHE_LLC, use_scratch);
if (!USES_FULL_48BIT_PPGTT(vm->dev)) { if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
...@@ -882,9 +874,9 @@ static int gen8_init_scratch(struct i915_address_space *vm) ...@@ -882,9 +874,9 @@ static int gen8_init_scratch(struct i915_address_space *vm)
struct drm_device *dev = vm->dev; struct drm_device *dev = vm->dev;
int ret; int ret;
vm->scratch_page = alloc_scratch_page(dev); ret = setup_scratch_page(dev, &vm->scratch_page, I915_GFP_DMA);
if (IS_ERR(vm->scratch_page)) if (ret)
return PTR_ERR(vm->scratch_page); return ret;
vm->scratch_pt = alloc_pt(dev); vm->scratch_pt = alloc_pt(dev);
if (IS_ERR(vm->scratch_pt)) { if (IS_ERR(vm->scratch_pt)) {
...@@ -918,7 +910,7 @@ static int gen8_init_scratch(struct i915_address_space *vm) ...@@ -918,7 +910,7 @@ static int gen8_init_scratch(struct i915_address_space *vm)
free_pt: free_pt:
free_pt(dev, vm->scratch_pt); free_pt(dev, vm->scratch_pt);
free_scratch_page: free_scratch_page:
free_scratch_page(dev, vm->scratch_page); cleanup_scratch_page(dev, &vm->scratch_page);
return ret; return ret;
} }
...@@ -962,7 +954,7 @@ static void gen8_free_scratch(struct i915_address_space *vm) ...@@ -962,7 +954,7 @@ static void gen8_free_scratch(struct i915_address_space *vm)
free_pdp(dev, vm->scratch_pdp); free_pdp(dev, vm->scratch_pdp);
free_pd(dev, vm->scratch_pd); free_pd(dev, vm->scratch_pd);
free_pt(dev, vm->scratch_pt); free_pt(dev, vm->scratch_pt);
free_scratch_page(dev, vm->scratch_page); cleanup_scratch_page(dev, &vm->scratch_page);
} }
static void gen8_ppgtt_cleanup_3lvl(struct drm_device *dev, static void gen8_ppgtt_cleanup_3lvl(struct drm_device *dev,
...@@ -1459,7 +1451,7 @@ static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) ...@@ -1459,7 +1451,7 @@ static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
struct i915_address_space *vm = &ppgtt->base; struct i915_address_space *vm = &ppgtt->base;
uint64_t start = ppgtt->base.start; uint64_t start = ppgtt->base.start;
uint64_t length = ppgtt->base.total; uint64_t length = ppgtt->base.total;
gen8_pte_t scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page), gen8_pte_t scratch_pte = gen8_pte_encode(vm->scratch_page.daddr,
I915_CACHE_LLC, true); I915_CACHE_LLC, true);
if (!USES_FULL_48BIT_PPGTT(vm->dev)) { if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
...@@ -1576,7 +1568,7 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) ...@@ -1576,7 +1568,7 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
uint32_t pte, pde; uint32_t pte, pde;
uint32_t start = ppgtt->base.start, length = ppgtt->base.total; uint32_t start = ppgtt->base.start, length = ppgtt->base.total;
scratch_pte = vm->pte_encode(px_dma(vm->scratch_page), scratch_pte = vm->pte_encode(vm->scratch_page.daddr,
I915_CACHE_LLC, true, 0); I915_CACHE_LLC, true, 0);
gen6_for_each_pde(unused, &ppgtt->pd, start, length, pde) { gen6_for_each_pde(unused, &ppgtt->pd, start, length, pde) {
...@@ -1801,7 +1793,7 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm, ...@@ -1801,7 +1793,7 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
unsigned first_pte = first_entry % GEN6_PTES; unsigned first_pte = first_entry % GEN6_PTES;
unsigned last_pte, i; unsigned last_pte, i;
scratch_pte = vm->pte_encode(px_dma(vm->scratch_page), scratch_pte = vm->pte_encode(vm->scratch_page.daddr,
I915_CACHE_LLC, true, 0); I915_CACHE_LLC, true, 0);
while (num_entries) { while (num_entries) {
...@@ -1947,14 +1939,15 @@ static int gen6_alloc_va_range(struct i915_address_space *vm, ...@@ -1947,14 +1939,15 @@ static int gen6_alloc_va_range(struct i915_address_space *vm,
static int gen6_init_scratch(struct i915_address_space *vm) static int gen6_init_scratch(struct i915_address_space *vm)
{ {
struct drm_device *dev = vm->dev; struct drm_device *dev = vm->dev;
int ret;
vm->scratch_page = alloc_scratch_page(dev); ret = setup_scratch_page(dev, &vm->scratch_page, I915_GFP_DMA);
if (IS_ERR(vm->scratch_page)) if (ret)
return PTR_ERR(vm->scratch_page); return ret;
vm->scratch_pt = alloc_pt(dev); vm->scratch_pt = alloc_pt(dev);
if (IS_ERR(vm->scratch_pt)) { if (IS_ERR(vm->scratch_pt)) {
free_scratch_page(dev, vm->scratch_page); cleanup_scratch_page(dev, &vm->scratch_page);
return PTR_ERR(vm->scratch_pt); return PTR_ERR(vm->scratch_pt);
} }
...@@ -1968,7 +1961,7 @@ static void gen6_free_scratch(struct i915_address_space *vm) ...@@ -1968,7 +1961,7 @@ static void gen6_free_scratch(struct i915_address_space *vm)
struct drm_device *dev = vm->dev; struct drm_device *dev = vm->dev;
free_pt(dev, vm->scratch_pt); free_pt(dev, vm->scratch_pt);
free_scratch_page(dev, vm->scratch_page); cleanup_scratch_page(dev, &vm->scratch_page);
} }
static void gen6_ppgtt_cleanup(struct i915_address_space *vm) static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
...@@ -2311,12 +2304,7 @@ int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj) ...@@ -2311,12 +2304,7 @@ int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj)
static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
{ {
#ifdef writeq
writeq(pte, addr); writeq(pte, addr);
#else
iowrite32((u32)pte, addr);
iowrite32(pte >> 32, addr + 4);
#endif
} }
static void gen8_ggtt_insert_page(struct i915_address_space *vm, static void gen8_ggtt_insert_page(struct i915_address_space *vm,
...@@ -2509,7 +2497,7 @@ static void gen8_ggtt_clear_range(struct i915_address_space *vm, ...@@ -2509,7 +2497,7 @@ static void gen8_ggtt_clear_range(struct i915_address_space *vm,
first_entry, num_entries, max_entries)) first_entry, num_entries, max_entries))
num_entries = max_entries; num_entries = max_entries;
scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page), scratch_pte = gen8_pte_encode(vm->scratch_page.daddr,
I915_CACHE_LLC, I915_CACHE_LLC,
use_scratch); use_scratch);
for (i = 0; i < num_entries; i++) for (i = 0; i < num_entries; i++)
...@@ -2541,7 +2529,7 @@ static void gen6_ggtt_clear_range(struct i915_address_space *vm, ...@@ -2541,7 +2529,7 @@ static void gen6_ggtt_clear_range(struct i915_address_space *vm,
first_entry, num_entries, max_entries)) first_entry, num_entries, max_entries))
num_entries = max_entries; num_entries = max_entries;
scratch_pte = vm->pte_encode(px_dma(vm->scratch_page), scratch_pte = vm->pte_encode(vm->scratch_page.daddr,
I915_CACHE_LLC, use_scratch, 0); I915_CACHE_LLC, use_scratch, 0);
for (i = 0; i < num_entries; i++) for (i = 0; i < num_entries; i++)
...@@ -2685,19 +2673,19 @@ static void ggtt_unbind_vma(struct i915_vma *vma) ...@@ -2685,19 +2673,19 @@ static void ggtt_unbind_vma(struct i915_vma *vma)
void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj) void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj)
{ {
struct drm_device *dev = obj->base.dev; struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
struct drm_i915_private *dev_priv = to_i915(dev); struct device *kdev = &dev_priv->drm.pdev->dev;
struct i915_ggtt *ggtt = &dev_priv->ggtt; struct i915_ggtt *ggtt = &dev_priv->ggtt;
if (unlikely(ggtt->do_idle_maps)) { if (unlikely(ggtt->do_idle_maps)) {
if (i915_gem_wait_for_idle(dev_priv, false)) { if (i915_gem_wait_for_idle(dev_priv, I915_WAIT_LOCKED)) {
DRM_ERROR("Failed to wait for idle; VT'd may hang.\n"); DRM_ERROR("Failed to wait for idle; VT'd may hang.\n");
/* Wait a bit, in hopes it avoids the hang */ /* Wait a bit, in hopes it avoids the hang */
udelay(10); udelay(10);
} }
} }
dma_unmap_sg(&dev->pdev->dev, obj->pages->sgl, obj->pages->nents, dma_unmap_sg(kdev, obj->pages->sgl, obj->pages->nents,
PCI_DMA_BIDIRECTIONAL); PCI_DMA_BIDIRECTIONAL);
} }
...@@ -2894,8 +2882,8 @@ static size_t gen9_get_stolen_size(u16 gen9_gmch_ctl) ...@@ -2894,8 +2882,8 @@ static size_t gen9_get_stolen_size(u16 gen9_gmch_ctl)
static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
{ {
struct pci_dev *pdev = ggtt->base.dev->pdev; struct pci_dev *pdev = ggtt->base.dev->pdev;
struct i915_page_scratch *scratch_page;
phys_addr_t phys_addr; phys_addr_t phys_addr;
int ret;
/* For Modern GENs the PTEs and register space are split in the BAR */ /* For Modern GENs the PTEs and register space are split in the BAR */
phys_addr = pci_resource_start(pdev, 0) + pci_resource_len(pdev, 0) / 2; phys_addr = pci_resource_start(pdev, 0) + pci_resource_len(pdev, 0) / 2;
...@@ -2916,16 +2904,16 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) ...@@ -2916,16 +2904,16 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
return -ENOMEM; return -ENOMEM;
} }
scratch_page = alloc_scratch_page(ggtt->base.dev); ret = setup_scratch_page(ggtt->base.dev,
if (IS_ERR(scratch_page)) { &ggtt->base.scratch_page,
GFP_DMA32);
if (ret) {
DRM_ERROR("Scratch setup failed\n"); DRM_ERROR("Scratch setup failed\n");
/* iounmap will also get called at remove, but meh */ /* iounmap will also get called at remove, but meh */
iounmap(ggtt->gsm); iounmap(ggtt->gsm);
return PTR_ERR(scratch_page); return ret;
} }
ggtt->base.scratch_page = scratch_page;
return 0; return 0;
} }
...@@ -3007,7 +2995,7 @@ static void gen6_gmch_remove(struct i915_address_space *vm) ...@@ -3007,7 +2995,7 @@ static void gen6_gmch_remove(struct i915_address_space *vm)
struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
iounmap(ggtt->gsm); iounmap(ggtt->gsm);
free_scratch_page(vm->dev, vm->scratch_page); cleanup_scratch_page(vm->dev, &vm->scratch_page);
} }
static int gen8_gmch_probe(struct i915_ggtt *ggtt) static int gen8_gmch_probe(struct i915_ggtt *ggtt)
...@@ -3244,8 +3232,7 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev) ...@@ -3244,8 +3232,7 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev)
{ {
struct drm_i915_private *dev_priv = to_i915(dev); struct drm_i915_private *dev_priv = to_i915(dev);
struct i915_ggtt *ggtt = &dev_priv->ggtt; struct i915_ggtt *ggtt = &dev_priv->ggtt;
struct drm_i915_gem_object *obj; struct drm_i915_gem_object *obj, *on;
struct i915_vma *vma;
i915_check_and_clear_faults(dev_priv); i915_check_and_clear_faults(dev_priv);
...@@ -3253,20 +3240,32 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev) ...@@ -3253,20 +3240,32 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev)
ggtt->base.clear_range(&ggtt->base, ggtt->base.start, ggtt->base.total, ggtt->base.clear_range(&ggtt->base, ggtt->base.start, ggtt->base.total,
true); true);
/* Cache flush objects bound into GGTT and rebind them. */ ggtt->base.closed = true; /* skip rewriting PTE on VMA unbind */
list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
/* clflush objects bound into the GGTT and rebind them. */
list_for_each_entry_safe(obj, on,
&dev_priv->mm.bound_list, global_list) {
bool ggtt_bound = false;
struct i915_vma *vma;
list_for_each_entry(vma, &obj->vma_list, obj_link) { list_for_each_entry(vma, &obj->vma_list, obj_link) {
if (vma->vm != &ggtt->base) if (vma->vm != &ggtt->base)
continue; continue;
if (!i915_vma_unbind(vma))
continue;
WARN_ON(i915_vma_bind(vma, obj->cache_level, WARN_ON(i915_vma_bind(vma, obj->cache_level,
PIN_UPDATE)); PIN_UPDATE));
ggtt_bound = true;
} }
if (obj->pin_display) if (ggtt_bound)
WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false)); WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false));
} }
ggtt->base.closed = false;
if (INTEL_INFO(dev)->gen >= 8) { if (INTEL_INFO(dev)->gen >= 8) {
if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev)) if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev))
chv_setup_private_ppat(dev_priv); chv_setup_private_ppat(dev_priv);
......
...@@ -312,10 +312,6 @@ struct i915_page_dma { ...@@ -312,10 +312,6 @@ struct i915_page_dma {
#define px_page(px) (px_base(px)->page) #define px_page(px) (px_base(px)->page)
#define px_dma(px) (px_base(px)->daddr) #define px_dma(px) (px_base(px)->daddr)
struct i915_page_scratch {
struct i915_page_dma base;
};
struct i915_page_table { struct i915_page_table {
struct i915_page_dma base; struct i915_page_dma base;
...@@ -361,7 +357,7 @@ struct i915_address_space { ...@@ -361,7 +357,7 @@ struct i915_address_space {
bool closed; bool closed;
struct i915_page_scratch *scratch_page; struct i915_page_dma scratch_page;
struct i915_page_table *scratch_pt; struct i915_page_table *scratch_pt;
struct i915_page_directory *scratch_pd; struct i915_page_directory *scratch_pd;
struct i915_page_directory_pointer *scratch_pdp; /* GEN8+ & 48b PPGTT */ struct i915_page_directory_pointer *scratch_pdp; /* GEN8+ & 48b PPGTT */
......
...@@ -28,6 +28,7 @@ ...@@ -28,6 +28,7 @@
#include <linux/fence.h> #include <linux/fence.h>
#include "i915_gem.h" #include "i915_gem.h"
#include "i915_sw_fence.h"
struct intel_wait { struct intel_wait {
struct rb_node node; struct rb_node node;
...@@ -82,26 +83,32 @@ struct drm_i915_gem_request { ...@@ -82,26 +83,32 @@ struct drm_i915_gem_request {
struct intel_ring *ring; struct intel_ring *ring;
struct intel_signal_node signaling; struct intel_signal_node signaling;
struct i915_sw_fence submit;
wait_queue_t submitq;
/** GEM sequence number associated with the previous request, /** GEM sequence number associated with the previous request,
* when the HWS breadcrumb is equal to this the GPU is processing * when the HWS breadcrumb is equal to this the GPU is processing
* this request. * this request.
*/ */
u32 previous_seqno; u32 previous_seqno;
/** Position in the ringbuffer of the start of the request */ /** Position in the ring of the start of the request */
u32 head; u32 head;
/** /**
* Position in the ringbuffer of the start of the postfix. * Position in the ring of the start of the postfix.
* This is required to calculate the maximum available ringbuffer * This is required to calculate the maximum available ring space
* space without overwriting the postfix. * without overwriting the postfix.
*/ */
u32 postfix; u32 postfix;
/** Position in the ringbuffer of the end of the whole request */ /** Position in the ring of the end of the whole request */
u32 tail; u32 tail;
/** Preallocate space in the ringbuffer for the emitting the request */ /** Position in the ring of the end of any workarounds after the tail */
u32 wa_tail;
/** Preallocate space in the ring for the emitting the request */
u32 reserved_space; u32 reserved_space;
/** /**
...@@ -134,27 +141,8 @@ struct drm_i915_gem_request { ...@@ -134,27 +141,8 @@ struct drm_i915_gem_request {
/** file_priv list entry for this request */ /** file_priv list entry for this request */
struct list_head client_list; struct list_head client_list;
/** /** Link in the execlist submission queue, guarded by execlist_lock. */
* The ELSP only accepts two elements at a time, so we queue
* context/tail pairs on a given queue (ring->execlist_queue) until the
* hardware is available. The queue serves a double purpose: we also use
* it to keep track of the up to 2 contexts currently in the hardware
* (usually one in execution and the other queued up by the GPU): We
* only remove elements from the head of the queue when the hardware
* informs us that an element has been completed.
*
* All accesses to the queue are mediated by a spinlock
* (ring->execlist_lock).
*/
/** Execlist link in the submission queue.*/
struct list_head execlist_link; struct list_head execlist_link;
/** Execlists no. of times this request has been sent to the ELSP */
int elsp_submitted;
/** Execlists context hardware id. */
unsigned int ctx_hw_id;
}; };
extern const struct fence_ops i915_fence_ops; extern const struct fence_ops i915_fence_ops;
...@@ -222,6 +210,11 @@ static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst, ...@@ -222,6 +210,11 @@ static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst,
*pdst = src; *pdst = src;
} }
int
i915_gem_request_await_object(struct drm_i915_gem_request *to,
struct drm_i915_gem_object *obj,
bool write);
void __i915_add_request(struct drm_i915_gem_request *req, bool flush_caches); void __i915_add_request(struct drm_i915_gem_request *req, bool flush_caches);
#define i915_add_request(req) \ #define i915_add_request(req) \
__i915_add_request(req, true) __i915_add_request(req, true)
...@@ -234,10 +227,12 @@ struct intel_rps_client; ...@@ -234,10 +227,12 @@ struct intel_rps_client;
#define IS_RPS_USER(p) (!IS_ERR_OR_NULL(p)) #define IS_RPS_USER(p) (!IS_ERR_OR_NULL(p))
int i915_wait_request(struct drm_i915_gem_request *req, int i915_wait_request(struct drm_i915_gem_request *req,
bool interruptible, unsigned int flags,
s64 *timeout, s64 *timeout,
struct intel_rps_client *rps) struct intel_rps_client *rps)
__attribute__((nonnull(1))); __attribute__((nonnull(1)));
#define I915_WAIT_INTERRUPTIBLE BIT(0)
#define I915_WAIT_LOCKED BIT(1) /* struct_mutex held, handle GPU reset */
static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine); static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine);
...@@ -472,6 +467,19 @@ __i915_gem_active_get_rcu(const struct i915_gem_active *active) ...@@ -472,6 +467,19 @@ __i915_gem_active_get_rcu(const struct i915_gem_active *active)
if (!request || i915_gem_request_completed(request)) if (!request || i915_gem_request_completed(request))
return NULL; return NULL;
/* An especially silly compiler could decide to recompute the
* result of i915_gem_request_completed, more specifically
* re-emit the load for request->fence.seqno. A race would catch
* a later seqno value, which could flip the result from true to
* false. Which means part of the instructions below might not
* be executed, while later on instructions are executed. Due to
* barriers within the refcounting the inconsistency can't reach
* past the call to i915_gem_request_get_rcu, but not executing
* that while still executing i915_gem_request_put() creates
* havoc enough. Prevent this with a compiler barrier.
*/
barrier();
request = i915_gem_request_get_rcu(request); request = i915_gem_request_get_rcu(request);
/* What stops the following rcu_access_pointer() from occurring /* What stops the following rcu_access_pointer() from occurring
...@@ -578,13 +586,15 @@ i915_gem_active_wait(const struct i915_gem_active *active, struct mutex *mutex) ...@@ -578,13 +586,15 @@ i915_gem_active_wait(const struct i915_gem_active *active, struct mutex *mutex)
if (!request) if (!request)
return 0; return 0;
return i915_wait_request(request, true, NULL, NULL); return i915_wait_request(request,
I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED,
NULL, NULL);
} }
/** /**
* i915_gem_active_wait_unlocked - waits until the request is completed * i915_gem_active_wait_unlocked - waits until the request is completed
* @active - the active request on which to wait * @active - the active request on which to wait
* @interruptible - whether the wait can be woken by a userspace signal * @flags - how to wait
* @timeout - how long to wait at most * @timeout - how long to wait at most
* @rps - userspace client to charge for a waitboost * @rps - userspace client to charge for a waitboost
* *
...@@ -605,7 +615,7 @@ i915_gem_active_wait(const struct i915_gem_active *active, struct mutex *mutex) ...@@ -605,7 +615,7 @@ i915_gem_active_wait(const struct i915_gem_active *active, struct mutex *mutex)
*/ */
static inline int static inline int
i915_gem_active_wait_unlocked(const struct i915_gem_active *active, i915_gem_active_wait_unlocked(const struct i915_gem_active *active,
bool interruptible, unsigned int flags,
s64 *timeout, s64 *timeout,
struct intel_rps_client *rps) struct intel_rps_client *rps)
{ {
...@@ -614,7 +624,7 @@ i915_gem_active_wait_unlocked(const struct i915_gem_active *active, ...@@ -614,7 +624,7 @@ i915_gem_active_wait_unlocked(const struct i915_gem_active *active,
request = i915_gem_active_get_unlocked(active); request = i915_gem_active_get_unlocked(active);
if (request) { if (request) {
ret = i915_wait_request(request, interruptible, timeout, rps); ret = i915_wait_request(request, flags, timeout, rps);
i915_gem_request_put(request); i915_gem_request_put(request);
} }
...@@ -641,7 +651,9 @@ i915_gem_active_retire(struct i915_gem_active *active, ...@@ -641,7 +651,9 @@ i915_gem_active_retire(struct i915_gem_active *active,
if (!request) if (!request)
return 0; return 0;
ret = i915_wait_request(request, true, NULL, NULL); ret = i915_wait_request(request,
I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED,
NULL, NULL);
if (ret) if (ret)
return ret; return ret;
......
...@@ -323,7 +323,7 @@ i915_gem_shrinker_lock_uninterruptible(struct drm_i915_private *dev_priv, ...@@ -323,7 +323,7 @@ i915_gem_shrinker_lock_uninterruptible(struct drm_i915_private *dev_priv,
unsigned long timeout = jiffies + msecs_to_jiffies_timeout(timeout_ms); unsigned long timeout = jiffies + msecs_to_jiffies_timeout(timeout_ms);
do { do {
if (i915_gem_wait_for_idle(dev_priv, false) == 0 && if (i915_gem_wait_for_idle(dev_priv, 0) == 0 &&
i915_gem_shrinker_lock(&dev_priv->drm, &slu->unlock)) i915_gem_shrinker_lock(&dev_priv->drm, &slu->unlock))
break; break;
...@@ -414,7 +414,7 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr ...@@ -414,7 +414,7 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr
return NOTIFY_DONE; return NOTIFY_DONE;
/* Force everything onto the inactive lists */ /* Force everything onto the inactive lists */
ret = i915_gem_wait_for_idle(dev_priv, false); ret = i915_gem_wait_for_idle(dev_priv, I915_WAIT_LOCKED);
if (ret) if (ret)
goto out; goto out;
......
...@@ -92,6 +92,7 @@ void i915_gem_stolen_remove_node(struct drm_i915_private *dev_priv, ...@@ -92,6 +92,7 @@ void i915_gem_stolen_remove_node(struct drm_i915_private *dev_priv,
static unsigned long i915_stolen_to_physical(struct drm_device *dev) static unsigned long i915_stolen_to_physical(struct drm_device *dev)
{ {
struct drm_i915_private *dev_priv = to_i915(dev); struct drm_i915_private *dev_priv = to_i915(dev);
struct pci_dev *pdev = dev_priv->drm.pdev;
struct i915_ggtt *ggtt = &dev_priv->ggtt; struct i915_ggtt *ggtt = &dev_priv->ggtt;
struct resource *r; struct resource *r;
u32 base; u32 base;
...@@ -111,7 +112,7 @@ static unsigned long i915_stolen_to_physical(struct drm_device *dev) ...@@ -111,7 +112,7 @@ static unsigned long i915_stolen_to_physical(struct drm_device *dev)
if (INTEL_INFO(dev)->gen >= 3) { if (INTEL_INFO(dev)->gen >= 3) {
u32 bsm; u32 bsm;
pci_read_config_dword(dev->pdev, INTEL_BSM, &bsm); pci_read_config_dword(pdev, INTEL_BSM, &bsm);
base = bsm & INTEL_BSM_MASK; base = bsm & INTEL_BSM_MASK;
} else if (IS_I865G(dev)) { } else if (IS_I865G(dev)) {
...@@ -119,7 +120,7 @@ static unsigned long i915_stolen_to_physical(struct drm_device *dev) ...@@ -119,7 +120,7 @@ static unsigned long i915_stolen_to_physical(struct drm_device *dev)
u16 toud = 0; u16 toud = 0;
u8 tmp; u8 tmp;
pci_bus_read_config_byte(dev->pdev->bus, PCI_DEVFN(0, 0), pci_bus_read_config_byte(pdev->bus, PCI_DEVFN(0, 0),
I845_ESMRAMC, &tmp); I845_ESMRAMC, &tmp);
if (tmp & TSEG_ENABLE) { if (tmp & TSEG_ENABLE) {
...@@ -133,7 +134,7 @@ static unsigned long i915_stolen_to_physical(struct drm_device *dev) ...@@ -133,7 +134,7 @@ static unsigned long i915_stolen_to_physical(struct drm_device *dev)
} }
} }
pci_bus_read_config_word(dev->pdev->bus, PCI_DEVFN(0, 0), pci_bus_read_config_word(pdev->bus, PCI_DEVFN(0, 0),
I865_TOUD, &toud); I865_TOUD, &toud);
base = (toud << 16) + tseg_size; base = (toud << 16) + tseg_size;
...@@ -142,13 +143,13 @@ static unsigned long i915_stolen_to_physical(struct drm_device *dev) ...@@ -142,13 +143,13 @@ static unsigned long i915_stolen_to_physical(struct drm_device *dev)
u32 tom; u32 tom;
u8 tmp; u8 tmp;
pci_bus_read_config_byte(dev->pdev->bus, PCI_DEVFN(0, 0), pci_bus_read_config_byte(pdev->bus, PCI_DEVFN(0, 0),
I85X_ESMRAMC, &tmp); I85X_ESMRAMC, &tmp);
if (tmp & TSEG_ENABLE) if (tmp & TSEG_ENABLE)
tseg_size = MB(1); tseg_size = MB(1);
pci_bus_read_config_byte(dev->pdev->bus, PCI_DEVFN(0, 1), pci_bus_read_config_byte(pdev->bus, PCI_DEVFN(0, 1),
I85X_DRB3, &tmp); I85X_DRB3, &tmp);
tom = tmp * MB(32); tom = tmp * MB(32);
...@@ -158,7 +159,7 @@ static unsigned long i915_stolen_to_physical(struct drm_device *dev) ...@@ -158,7 +159,7 @@ static unsigned long i915_stolen_to_physical(struct drm_device *dev)
u32 tom; u32 tom;
u8 tmp; u8 tmp;
pci_bus_read_config_byte(dev->pdev->bus, PCI_DEVFN(0, 0), pci_bus_read_config_byte(pdev->bus, PCI_DEVFN(0, 0),
I845_ESMRAMC, &tmp); I845_ESMRAMC, &tmp);
if (tmp & TSEG_ENABLE) { if (tmp & TSEG_ENABLE) {
...@@ -172,7 +173,7 @@ static unsigned long i915_stolen_to_physical(struct drm_device *dev) ...@@ -172,7 +173,7 @@ static unsigned long i915_stolen_to_physical(struct drm_device *dev)
} }
} }
pci_bus_read_config_byte(dev->pdev->bus, PCI_DEVFN(0, 0), pci_bus_read_config_byte(pdev->bus, PCI_DEVFN(0, 0),
I830_DRB3, &tmp); I830_DRB3, &tmp);
tom = tmp * MB(32); tom = tmp * MB(32);
...@@ -182,7 +183,7 @@ static unsigned long i915_stolen_to_physical(struct drm_device *dev) ...@@ -182,7 +183,7 @@ static unsigned long i915_stolen_to_physical(struct drm_device *dev)
u32 tom; u32 tom;
u8 tmp; u8 tmp;
pci_bus_read_config_byte(dev->pdev->bus, PCI_DEVFN(0, 0), pci_bus_read_config_byte(pdev->bus, PCI_DEVFN(0, 0),
I830_ESMRAMC, &tmp); I830_ESMRAMC, &tmp);
if (tmp & TSEG_ENABLE) { if (tmp & TSEG_ENABLE) {
...@@ -192,7 +193,7 @@ static unsigned long i915_stolen_to_physical(struct drm_device *dev) ...@@ -192,7 +193,7 @@ static unsigned long i915_stolen_to_physical(struct drm_device *dev)
tseg_size = KB(512); tseg_size = KB(512);
} }
pci_bus_read_config_byte(dev->pdev->bus, PCI_DEVFN(0, 0), pci_bus_read_config_byte(pdev->bus, PCI_DEVFN(0, 0),
I830_DRB3, &tmp); I830_DRB3, &tmp);
tom = tmp * MB(32); tom = tmp * MB(32);
......
...@@ -68,7 +68,7 @@ static void wait_rendering(struct drm_i915_gem_object *obj) ...@@ -68,7 +68,7 @@ static void wait_rendering(struct drm_i915_gem_object *obj)
for_each_active(active, idx) for_each_active(active, idx)
i915_gem_active_wait_unlocked(&obj->last_read[idx], i915_gem_active_wait_unlocked(&obj->last_read[idx],
false, NULL, NULL); 0, NULL, NULL);
} }
static void cancel_userptr(struct work_struct *work) static void cancel_userptr(struct work_struct *work)
......
...@@ -336,6 +336,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, ...@@ -336,6 +336,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
{ {
struct drm_device *dev = error_priv->dev; struct drm_device *dev = error_priv->dev;
struct drm_i915_private *dev_priv = to_i915(dev); struct drm_i915_private *dev_priv = to_i915(dev);
struct pci_dev *pdev = dev_priv->drm.pdev;
struct drm_i915_error_state *error = error_priv->error; struct drm_i915_error_state *error = error_priv->error;
struct drm_i915_error_object *obj; struct drm_i915_error_object *obj;
int i, j, offset, elt; int i, j, offset, elt;
...@@ -367,11 +368,11 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, ...@@ -367,11 +368,11 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
} }
err_printf(m, "Reset count: %u\n", error->reset_count); err_printf(m, "Reset count: %u\n", error->reset_count);
err_printf(m, "Suspend count: %u\n", error->suspend_count); err_printf(m, "Suspend count: %u\n", error->suspend_count);
err_printf(m, "PCI ID: 0x%04x\n", dev->pdev->device); err_printf(m, "PCI ID: 0x%04x\n", pdev->device);
err_printf(m, "PCI Revision: 0x%02x\n", dev->pdev->revision); err_printf(m, "PCI Revision: 0x%02x\n", pdev->revision);
err_printf(m, "PCI Subsystem: %04x:%04x\n", err_printf(m, "PCI Subsystem: %04x:%04x\n",
dev->pdev->subsystem_vendor, pdev->subsystem_vendor,
dev->pdev->subsystem_device); pdev->subsystem_device);
err_printf(m, "IOMMU enabled?: %d\n", error->iommu); err_printf(m, "IOMMU enabled?: %d\n", error->iommu);
if (HAS_CSR(dev)) { if (HAS_CSR(dev)) {
...@@ -488,7 +489,10 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, ...@@ -488,7 +489,10 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
} }
} }
if (ee->num_waiters) { if (IS_ERR(ee->waiters)) {
err_printf(m, "%s --- ? waiters [unable to acquire spinlock]\n",
dev_priv->engine[i].name);
} else if (ee->num_waiters) {
err_printf(m, "%s --- %d waiters\n", err_printf(m, "%s --- %d waiters\n",
dev_priv->engine[i].name, dev_priv->engine[i].name,
ee->num_waiters); ee->num_waiters);
...@@ -647,6 +651,7 @@ static void i915_error_state_free(struct kref *error_ref) ...@@ -647,6 +651,7 @@ static void i915_error_state_free(struct kref *error_ref)
i915_error_object_free(ee->wa_ctx); i915_error_object_free(ee->wa_ctx);
kfree(ee->requests); kfree(ee->requests);
if (!IS_ERR_OR_NULL(ee->waiters))
kfree(ee->waiters); kfree(ee->waiters);
} }
...@@ -932,7 +937,14 @@ static void error_record_engine_waiters(struct intel_engine_cs *engine, ...@@ -932,7 +937,14 @@ static void error_record_engine_waiters(struct intel_engine_cs *engine,
ee->num_waiters = 0; ee->num_waiters = 0;
ee->waiters = NULL; ee->waiters = NULL;
spin_lock(&b->lock); if (RB_EMPTY_ROOT(&b->waiters))
return;
if (!spin_trylock(&b->lock)) {
ee->waiters = ERR_PTR(-EDEADLK);
return;
}
count = 0; count = 0;
for (rb = rb_first(&b->waiters); rb != NULL; rb = rb_next(rb)) for (rb = rb_first(&b->waiters); rb != NULL; rb = rb_next(rb))
count++; count++;
...@@ -946,9 +958,13 @@ static void error_record_engine_waiters(struct intel_engine_cs *engine, ...@@ -946,9 +958,13 @@ static void error_record_engine_waiters(struct intel_engine_cs *engine,
if (!waiter) if (!waiter)
return; return;
ee->waiters = waiter; if (!spin_trylock(&b->lock)) {
kfree(waiter);
ee->waiters = ERR_PTR(-EDEADLK);
return;
}
spin_lock(&b->lock); ee->waiters = waiter;
for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) {
struct intel_wait *w = container_of(rb, typeof(*w), node); struct intel_wait *w = container_of(rb, typeof(*w), node);
...@@ -1009,7 +1025,7 @@ static void error_record_engine_registers(struct drm_i915_error_state *error, ...@@ -1009,7 +1025,7 @@ static void error_record_engine_registers(struct drm_i915_error_state *error,
if (INTEL_GEN(dev_priv) > 2) if (INTEL_GEN(dev_priv) > 2)
ee->mode = I915_READ_MODE(engine); ee->mode = I915_READ_MODE(engine);
if (I915_NEED_GFX_HWS(dev_priv)) { if (!HWS_NEEDS_PHYSICAL(dev_priv)) {
i915_reg_t mmio; i915_reg_t mmio;
if (IS_GEN7(dev_priv)) { if (IS_GEN7(dev_priv)) {
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册