diff --git a/Documentation/gpu/drm-kms-helpers.rst b/Documentation/gpu/drm-kms-helpers.rst index 14102ae035dcf06025c03b818e5d34ce2921f428..0fe726a6ee6786b57337f8e3c357bb409eee4869 100644 --- a/Documentation/gpu/drm-kms-helpers.rst +++ b/Documentation/gpu/drm-kms-helpers.rst @@ -181,6 +181,12 @@ Panel Helper Reference .. kernel-doc:: drivers/gpu/drm/drm_panel_orientation_quirks.c :export: +HDCP Helper Functions Reference +=============================== + +.. kernel-doc:: drivers/gpu/drm/drm_hdcp.c + :export: + Display Port Helper Functions Reference ======================================= diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index 4c3dc4268b652c0a1b07e4c0455ad798585987b6..af37253a8ec412a3c7b1d27a42ec50606e4d2e56 100644 --- a/drivers/gpu/drm/Makefile +++ b/drivers/gpu/drm/Makefile @@ -17,7 +17,7 @@ drm-y := drm_auth.o drm_cache.o \ drm_plane.o drm_color_mgmt.o drm_print.o \ drm_dumb_buffers.o drm_mode_config.o drm_vblank.o \ drm_syncobj.o drm_lease.o drm_writeback.o drm_client.o \ - drm_atomic_uapi.o + drm_atomic_uapi.o drm_hdcp.o drm-$(CONFIG_DRM_LEGACY) += drm_legacy_misc.o drm_bufs.o drm_context.o drm_dma.o drm_scatter.o drm_lock.o drm-$(CONFIG_DRM_LIB_RANDOM) += lib/drm_random.o diff --git a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c index 225f5e5dd69b8b525fb73f2f356b54f65b85bf0e..257d69b21d994ead2b320d2a6d6cdee676398553 100644 --- a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c +++ b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c @@ -115,7 +115,7 @@ EXPORT_SYMBOL_GPL(analogix_dp_psr_enabled); int analogix_dp_enable_psr(struct analogix_dp_device *dp) { - struct edp_vsc_psr psr_vsc; + struct dp_sdp psr_vsc; if (!dp->psr_enable) return 0; @@ -127,8 +127,8 @@ int analogix_dp_enable_psr(struct analogix_dp_device *dp) psr_vsc.sdp_header.HB2 = 0x2; psr_vsc.sdp_header.HB3 = 0x8; - psr_vsc.DB0 = 0; - psr_vsc.DB1 = EDP_VSC_PSR_STATE_ACTIVE | EDP_VSC_PSR_CRC_VALUES_VALID; + psr_vsc.db[0] = 0; + psr_vsc.db[1] = EDP_VSC_PSR_STATE_ACTIVE | EDP_VSC_PSR_CRC_VALUES_VALID; return analogix_dp_send_psr_spd(dp, &psr_vsc, true); } @@ -136,7 +136,7 @@ EXPORT_SYMBOL_GPL(analogix_dp_enable_psr); int analogix_dp_disable_psr(struct analogix_dp_device *dp) { - struct edp_vsc_psr psr_vsc; + struct dp_sdp psr_vsc; int ret; if (!dp->psr_enable) @@ -149,8 +149,8 @@ int analogix_dp_disable_psr(struct analogix_dp_device *dp) psr_vsc.sdp_header.HB2 = 0x2; psr_vsc.sdp_header.HB3 = 0x8; - psr_vsc.DB0 = 0; - psr_vsc.DB1 = 0; + psr_vsc.db[0] = 0; + psr_vsc.db[1] = 0; ret = drm_dp_dpcd_writeb(&dp->aux, DP_SET_POWER, DP_SET_POWER_D0); if (ret != 1) { diff --git a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.h b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.h index 769255dc6e996bab7beb295a6c11cb9adf1c32e3..3e5fe90edf71a094435a70433b8064e292d3cd83 100644 --- a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.h +++ b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.h @@ -254,7 +254,7 @@ void analogix_dp_enable_scrambling(struct analogix_dp_device *dp); void analogix_dp_disable_scrambling(struct analogix_dp_device *dp); void analogix_dp_enable_psr_crc(struct analogix_dp_device *dp); int analogix_dp_send_psr_spd(struct analogix_dp_device *dp, - struct edp_vsc_psr *vsc, bool blocking); + struct dp_sdp *vsc, bool blocking); ssize_t analogix_dp_transfer(struct analogix_dp_device *dp, struct drm_dp_aux_msg *msg); diff --git a/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.c b/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.c index a5f2763d72e4b404eda77374e88da61806840474..cf17e2e21b1596c316cdb07c73ef5c21b787be5a 100644 --- a/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.c +++ b/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.c @@ -1041,7 +1041,7 @@ static ssize_t analogix_dp_get_psr_status(struct analogix_dp_device *dp) } int analogix_dp_send_psr_spd(struct analogix_dp_device *dp, - struct edp_vsc_psr *vsc, bool blocking) + struct dp_sdp *vsc, bool blocking) { unsigned int val; int ret; @@ -1069,8 +1069,8 @@ int analogix_dp_send_psr_spd(struct analogix_dp_device *dp, writel(0x5D, dp->reg_base + ANALOGIX_DP_SPD_PB3); /* configure DB0 / DB1 values */ - writel(vsc->DB0, dp->reg_base + ANALOGIX_DP_VSC_SHADOW_DB0); - writel(vsc->DB1, dp->reg_base + ANALOGIX_DP_VSC_SHADOW_DB1); + writel(vsc->db[0], dp->reg_base + ANALOGIX_DP_VSC_SHADOW_DB0); + writel(vsc->db[1], dp->reg_base + ANALOGIX_DP_VSC_SHADOW_DB1); /* set reuse spd inforframe */ val = readl(dp->reg_base + ANALOGIX_DP_VIDEO_CTL_3); @@ -1092,8 +1092,8 @@ int analogix_dp_send_psr_spd(struct analogix_dp_device *dp, ret = readx_poll_timeout(analogix_dp_get_psr_status, dp, psr_status, psr_status >= 0 && - ((vsc->DB1 && psr_status == DP_PSR_SINK_ACTIVE_RFB) || - (!vsc->DB1 && psr_status == DP_PSR_SINK_INACTIVE)), 1500, + ((vsc->db[1] && psr_status == DP_PSR_SINK_ACTIVE_RFB) || + (!vsc->db[1] && psr_status == DP_PSR_SINK_INACTIVE)), 1500, DP_TIMEOUT_PSR_LOOP_MS * 1000); if (ret) { dev_warn(dp->dev, "Failed to apply PSR %d\n", ret); diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c index 125605ff45afd3a02b8e043cc4a9135db9ca5c23..eb22e8bdd8531245b2199a9be0222bd402e1f8e4 100644 --- a/drivers/gpu/drm/drm_atomic_uapi.c +++ b/drivers/gpu/drm/drm_atomic_uapi.c @@ -741,7 +741,7 @@ static int drm_atomic_connector_set_property(struct drm_connector *connector, state->content_type = val; } else if (property == connector->scaling_mode_property) { state->scaling_mode = val; - } else if (property == connector->content_protection_property) { + } else if (property == config->content_protection_property) { if (val == DRM_MODE_CONTENT_PROTECTION_ENABLED) { DRM_DEBUG_KMS("only drivers can set CP Enabled\n"); return -EINVAL; @@ -826,7 +826,7 @@ drm_atomic_connector_get_property(struct drm_connector *connector, } else if (property == config->hdr_output_metadata_property) { *val = state->hdr_output_metadata ? state->hdr_output_metadata->base.id : 0; - } else if (property == connector->content_protection_property) { + } else if (property == config->content_protection_property) { *val = state->content_protection; } else if (property == config->writeback_fb_id_property) { /* Writeback framebuffer is one-shot, write and forget */ diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c index 365ace0c0c9e492f715c8eff32a95d73cedf8b24..c9ac8b9e83ea229efa41253a4205b73ce3a1393a 100644 --- a/drivers/gpu/drm/drm_connector.c +++ b/drivers/gpu/drm/drm_connector.c @@ -823,13 +823,6 @@ static const struct drm_prop_enum_list drm_tv_subconnector_enum_list[] = { DRM_ENUM_NAME_FN(drm_get_tv_subconnector_name, drm_tv_subconnector_enum_list) -static struct drm_prop_enum_list drm_cp_enum_list[] = { - { DRM_MODE_CONTENT_PROTECTION_UNDESIRED, "Undesired" }, - { DRM_MODE_CONTENT_PROTECTION_DESIRED, "Desired" }, - { DRM_MODE_CONTENT_PROTECTION_ENABLED, "Enabled" }, -}; -DRM_ENUM_NAME_FN(drm_get_content_protection_name, drm_cp_enum_list) - static const struct drm_prop_enum_list hdmi_colorspaces[] = { /* For Default case, driver will set the colorspace */ { DRM_MODE_COLORIMETRY_DEFAULT, "Default" }, @@ -1515,42 +1508,6 @@ int drm_connector_attach_scaling_mode_property(struct drm_connector *connector, } EXPORT_SYMBOL(drm_connector_attach_scaling_mode_property); -/** - * drm_connector_attach_content_protection_property - attach content protection - * property - * - * @connector: connector to attach CP property on. - * - * This is used to add support for content protection on select connectors. - * Content Protection is intentionally vague to allow for different underlying - * technologies, however it is most implemented by HDCP. - * - * The content protection will be set to &drm_connector_state.content_protection - * - * Returns: - * Zero on success, negative errno on failure. - */ -int drm_connector_attach_content_protection_property( - struct drm_connector *connector) -{ - struct drm_device *dev = connector->dev; - struct drm_property *prop; - - prop = drm_property_create_enum(dev, 0, "Content Protection", - drm_cp_enum_list, - ARRAY_SIZE(drm_cp_enum_list)); - if (!prop) - return -ENOMEM; - - drm_object_attach_property(&connector->base, prop, - DRM_MODE_CONTENT_PROTECTION_UNDESIRED); - - connector->content_protection_property = prop; - - return 0; -} -EXPORT_SYMBOL(drm_connector_attach_content_protection_property); - /** * drm_mode_create_aspect_ratio_property - create aspect ratio property * @dev: DRM device diff --git a/drivers/gpu/drm/drm_hdcp.c b/drivers/gpu/drm/drm_hdcp.c new file mode 100644 index 0000000000000000000000000000000000000000..cd837bd409f730483519728d54517079663b298b --- /dev/null +++ b/drivers/gpu/drm/drm_hdcp.c @@ -0,0 +1,382 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 Intel Corporation. + * + * Authors: + * Ramalingam C + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "drm_internal.h" + +static struct hdcp_srm { + u32 revoked_ksv_cnt; + u8 *revoked_ksv_list; + + /* Mutex to protect above struct member */ + struct mutex mutex; +} *srm_data; + +static inline void drm_hdcp_print_ksv(const u8 *ksv) +{ + DRM_DEBUG("\t%#02x, %#02x, %#02x, %#02x, %#02x\n", + ksv[0], ksv[1], ksv[2], ksv[3], ksv[4]); +} + +static u32 drm_hdcp_get_revoked_ksv_count(const u8 *buf, u32 vrls_length) +{ + u32 parsed_bytes = 0, ksv_count = 0, vrl_ksv_cnt, vrl_sz; + + while (parsed_bytes < vrls_length) { + vrl_ksv_cnt = *buf; + ksv_count += vrl_ksv_cnt; + + vrl_sz = (vrl_ksv_cnt * DRM_HDCP_KSV_LEN) + 1; + buf += vrl_sz; + parsed_bytes += vrl_sz; + } + + /* + * When vrls are not valid, ksvs are not considered. + * Hence SRM will be discarded. + */ + if (parsed_bytes != vrls_length) + ksv_count = 0; + + return ksv_count; +} + +static u32 drm_hdcp_get_revoked_ksvs(const u8 *buf, u8 *revoked_ksv_list, + u32 vrls_length) +{ + u32 parsed_bytes = 0, ksv_count = 0; + u32 vrl_ksv_cnt, vrl_ksv_sz, vrl_idx = 0; + + do { + vrl_ksv_cnt = *buf; + vrl_ksv_sz = vrl_ksv_cnt * DRM_HDCP_KSV_LEN; + + buf++; + + DRM_DEBUG("vrl: %d, Revoked KSVs: %d\n", vrl_idx++, + vrl_ksv_cnt); + memcpy(revoked_ksv_list, buf, vrl_ksv_sz); + + ksv_count += vrl_ksv_cnt; + revoked_ksv_list += vrl_ksv_sz; + buf += vrl_ksv_sz; + + parsed_bytes += (vrl_ksv_sz + 1); + } while (parsed_bytes < vrls_length); + + return ksv_count; +} + +static inline u32 get_vrl_length(const u8 *buf) +{ + return drm_hdcp_be24_to_cpu(buf); +} + +static int drm_hdcp_parse_hdcp1_srm(const u8 *buf, size_t count) +{ + struct hdcp_srm_header *header; + u32 vrl_length, ksv_count; + + if (count < (sizeof(struct hdcp_srm_header) + + DRM_HDCP_1_4_VRL_LENGTH_SIZE + DRM_HDCP_1_4_DCP_SIG_SIZE)) { + DRM_ERROR("Invalid blob length\n"); + return -EINVAL; + } + + header = (struct hdcp_srm_header *)buf; + DRM_DEBUG("SRM ID: 0x%x, SRM Ver: 0x%x, SRM Gen No: 0x%x\n", + header->srm_id, + be16_to_cpu(header->srm_version), header->srm_gen_no); + + WARN_ON(header->reserved); + + buf = buf + sizeof(*header); + vrl_length = get_vrl_length(buf); + if (count < (sizeof(struct hdcp_srm_header) + vrl_length) || + vrl_length < (DRM_HDCP_1_4_VRL_LENGTH_SIZE + + DRM_HDCP_1_4_DCP_SIG_SIZE)) { + DRM_ERROR("Invalid blob length or vrl length\n"); + return -EINVAL; + } + + /* Length of the all vrls combined */ + vrl_length -= (DRM_HDCP_1_4_VRL_LENGTH_SIZE + + DRM_HDCP_1_4_DCP_SIG_SIZE); + + if (!vrl_length) { + DRM_ERROR("No vrl found\n"); + return -EINVAL; + } + + buf += DRM_HDCP_1_4_VRL_LENGTH_SIZE; + ksv_count = drm_hdcp_get_revoked_ksv_count(buf, vrl_length); + if (!ksv_count) { + DRM_DEBUG("Revoked KSV count is 0\n"); + return count; + } + + kfree(srm_data->revoked_ksv_list); + srm_data->revoked_ksv_list = kcalloc(ksv_count, DRM_HDCP_KSV_LEN, + GFP_KERNEL); + if (!srm_data->revoked_ksv_list) { + DRM_ERROR("Out of Memory\n"); + return -ENOMEM; + } + + if (drm_hdcp_get_revoked_ksvs(buf, srm_data->revoked_ksv_list, + vrl_length) != ksv_count) { + srm_data->revoked_ksv_cnt = 0; + kfree(srm_data->revoked_ksv_list); + return -EINVAL; + } + + srm_data->revoked_ksv_cnt = ksv_count; + return count; +} + +static int drm_hdcp_parse_hdcp2_srm(const u8 *buf, size_t count) +{ + struct hdcp_srm_header *header; + u32 vrl_length, ksv_count, ksv_sz; + + if (count < (sizeof(struct hdcp_srm_header) + + DRM_HDCP_2_VRL_LENGTH_SIZE + DRM_HDCP_2_DCP_SIG_SIZE)) { + DRM_ERROR("Invalid blob length\n"); + return -EINVAL; + } + + header = (struct hdcp_srm_header *)buf; + DRM_DEBUG("SRM ID: 0x%x, SRM Ver: 0x%x, SRM Gen No: 0x%x\n", + header->srm_id & DRM_HDCP_SRM_ID_MASK, + be16_to_cpu(header->srm_version), header->srm_gen_no); + + if (header->reserved) + return -EINVAL; + + buf = buf + sizeof(*header); + vrl_length = get_vrl_length(buf); + + if (count < (sizeof(struct hdcp_srm_header) + vrl_length) || + vrl_length < (DRM_HDCP_2_VRL_LENGTH_SIZE + + DRM_HDCP_2_DCP_SIG_SIZE)) { + DRM_ERROR("Invalid blob length or vrl length\n"); + return -EINVAL; + } + + /* Length of the all vrls combined */ + vrl_length -= (DRM_HDCP_2_VRL_LENGTH_SIZE + + DRM_HDCP_2_DCP_SIG_SIZE); + + if (!vrl_length) { + DRM_ERROR("No vrl found\n"); + return -EINVAL; + } + + buf += DRM_HDCP_2_VRL_LENGTH_SIZE; + ksv_count = (*buf << 2) | DRM_HDCP_2_KSV_COUNT_2_LSBITS(*(buf + 1)); + if (!ksv_count) { + DRM_DEBUG("Revoked KSV count is 0\n"); + return count; + } + + kfree(srm_data->revoked_ksv_list); + srm_data->revoked_ksv_list = kcalloc(ksv_count, DRM_HDCP_KSV_LEN, + GFP_KERNEL); + if (!srm_data->revoked_ksv_list) { + DRM_ERROR("Out of Memory\n"); + return -ENOMEM; + } + + ksv_sz = ksv_count * DRM_HDCP_KSV_LEN; + buf += DRM_HDCP_2_NO_OF_DEV_PLUS_RESERVED_SZ; + + DRM_DEBUG("Revoked KSVs: %d\n", ksv_count); + memcpy(srm_data->revoked_ksv_list, buf, ksv_sz); + + srm_data->revoked_ksv_cnt = ksv_count; + return count; +} + +static inline bool is_srm_version_hdcp1(const u8 *buf) +{ + return *buf == (u8)(DRM_HDCP_1_4_SRM_ID << 4); +} + +static inline bool is_srm_version_hdcp2(const u8 *buf) +{ + return *buf == (u8)(DRM_HDCP_2_SRM_ID << 4 | DRM_HDCP_2_INDICATOR); +} + +static void drm_hdcp_srm_update(const u8 *buf, size_t count) +{ + if (count < sizeof(struct hdcp_srm_header)) + return; + + if (is_srm_version_hdcp1(buf)) + drm_hdcp_parse_hdcp1_srm(buf, count); + else if (is_srm_version_hdcp2(buf)) + drm_hdcp_parse_hdcp2_srm(buf, count); +} + +static void drm_hdcp_request_srm(struct drm_device *drm_dev) +{ + char fw_name[36] = "display_hdcp_srm.bin"; + const struct firmware *fw; + + int ret; + + ret = request_firmware_direct(&fw, (const char *)fw_name, + drm_dev->dev); + if (ret < 0) + goto exit; + + if (fw->size && fw->data) + drm_hdcp_srm_update(fw->data, fw->size); + +exit: + release_firmware(fw); +} + +/** + * drm_hdcp_check_ksvs_revoked - Check the revoked status of the IDs + * + * @drm_dev: drm_device for which HDCP revocation check is requested + * @ksvs: List of KSVs (HDCP receiver IDs) + * @ksv_count: KSV count passed in through @ksvs + * + * This function reads the HDCP System renewability Message(SRM Table) + * from userspace as a firmware and parses it for the revoked HDCP + * KSVs(Receiver IDs) detected by DCP LLC. Once the revoked KSVs are known, + * revoked state of the KSVs in the list passed in by display drivers are + * decided and response is sent. + * + * SRM should be presented in the name of "display_hdcp_srm.bin". + * + * Returns: + * TRUE on any of the KSV is revoked, else FALSE. + */ +bool drm_hdcp_check_ksvs_revoked(struct drm_device *drm_dev, u8 *ksvs, + u32 ksv_count) +{ + u32 rev_ksv_cnt, cnt, i, j; + u8 *rev_ksv_list; + + if (!srm_data) + return false; + + mutex_lock(&srm_data->mutex); + drm_hdcp_request_srm(drm_dev); + + rev_ksv_cnt = srm_data->revoked_ksv_cnt; + rev_ksv_list = srm_data->revoked_ksv_list; + + /* If the Revoked ksv list is empty */ + if (!rev_ksv_cnt || !rev_ksv_list) { + mutex_unlock(&srm_data->mutex); + return false; + } + + for (cnt = 0; cnt < ksv_count; cnt++) { + rev_ksv_list = srm_data->revoked_ksv_list; + for (i = 0; i < rev_ksv_cnt; i++) { + for (j = 0; j < DRM_HDCP_KSV_LEN; j++) + if (ksvs[j] != rev_ksv_list[j]) { + break; + } else if (j == (DRM_HDCP_KSV_LEN - 1)) { + DRM_DEBUG("Revoked KSV is "); + drm_hdcp_print_ksv(ksvs); + mutex_unlock(&srm_data->mutex); + return true; + } + /* Move the offset to next KSV in the revoked list */ + rev_ksv_list += DRM_HDCP_KSV_LEN; + } + + /* Iterate to next ksv_offset */ + ksvs += DRM_HDCP_KSV_LEN; + } + mutex_unlock(&srm_data->mutex); + return false; +} +EXPORT_SYMBOL_GPL(drm_hdcp_check_ksvs_revoked); + +int drm_setup_hdcp_srm(struct class *drm_class) +{ + srm_data = kzalloc(sizeof(*srm_data), GFP_KERNEL); + if (!srm_data) + return -ENOMEM; + mutex_init(&srm_data->mutex); + + return 0; +} + +void drm_teardown_hdcp_srm(struct class *drm_class) +{ + if (srm_data) { + kfree(srm_data->revoked_ksv_list); + kfree(srm_data); + } +} + +static struct drm_prop_enum_list drm_cp_enum_list[] = { + { DRM_MODE_CONTENT_PROTECTION_UNDESIRED, "Undesired" }, + { DRM_MODE_CONTENT_PROTECTION_DESIRED, "Desired" }, + { DRM_MODE_CONTENT_PROTECTION_ENABLED, "Enabled" }, +}; +DRM_ENUM_NAME_FN(drm_get_content_protection_name, drm_cp_enum_list) + +/** + * drm_connector_attach_content_protection_property - attach content protection + * property + * + * @connector: connector to attach CP property on. + * + * This is used to add support for content protection on select connectors. + * Content Protection is intentionally vague to allow for different underlying + * technologies, however it is most implemented by HDCP. + * + * The content protection will be set to &drm_connector_state.content_protection + * + * Returns: + * Zero on success, negative errno on failure. + */ +int drm_connector_attach_content_protection_property( + struct drm_connector *connector) +{ + struct drm_device *dev = connector->dev; + struct drm_property *prop = + dev->mode_config.content_protection_property; + + if (!prop) + prop = drm_property_create_enum(dev, 0, "Content Protection", + drm_cp_enum_list, + ARRAY_SIZE(drm_cp_enum_list)); + if (!prop) + return -ENOMEM; + + drm_object_attach_property(&connector->base, prop, + DRM_MODE_CONTENT_PROTECTION_UNDESIRED); + dev->mode_config.content_protection_property = prop; + + return 0; +} +EXPORT_SYMBOL(drm_connector_attach_content_protection_property); diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h index e6281d9f9c875f9f19d2b9904dab07434a66c092..e6a900c7c1fc39c64d5c171ff468e6906a4369a4 100644 --- a/drivers/gpu/drm/drm_internal.h +++ b/drivers/gpu/drm/drm_internal.h @@ -108,6 +108,7 @@ void drm_sysfs_connector_remove(struct drm_connector *connector); void drm_sysfs_lease_event(struct drm_device *dev); /* drm_gem.c */ +struct drm_gem_object; int drm_gem_init(struct drm_device *dev); void drm_gem_destroy(struct drm_device *dev); int drm_gem_handle_create_tail(struct drm_file *file_priv, @@ -203,3 +204,7 @@ int drm_syncobj_query_ioctl(struct drm_device *dev, void *data, void drm_framebuffer_print_info(struct drm_printer *p, unsigned int indent, const struct drm_framebuffer *fb); int drm_framebuffer_debugfs_init(struct drm_minor *minor); + +/* drm_hdcp.c */ +int drm_setup_hdcp_srm(struct class *drm_class); +void drm_teardown_hdcp_srm(struct class *drm_class); diff --git a/drivers/gpu/drm/drm_sysfs.c b/drivers/gpu/drm/drm_sysfs.c index ecb7b33002bb27de0af599702a354e7c241cd6ed..18b1ac442997c0e8717937f073176b4408b2477e 100644 --- a/drivers/gpu/drm/drm_sysfs.c +++ b/drivers/gpu/drm/drm_sysfs.c @@ -78,6 +78,7 @@ int drm_sysfs_init(void) } drm_class->devnode = drm_devnode; + drm_setup_hdcp_srm(drm_class); return 0; } @@ -90,6 +91,7 @@ void drm_sysfs_destroy(void) { if (IS_ERR_OR_NULL(drm_class)) return; + drm_teardown_hdcp_srm(drm_class); class_remove_file(drm_class, &class_attr_version.attr); class_destroy(drm_class); drm_class = NULL; diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig index 255f224db64b53ef2c5d3334aa9780a3b8c92d7c..978cb39a47a8239ce839f8478a9d87bfb08680a1 100644 --- a/drivers/gpu/drm/i915/Kconfig +++ b/drivers/gpu/drm/i915/Kconfig @@ -133,3 +133,9 @@ depends on DRM_I915 depends on EXPERT source "drivers/gpu/drm/i915/Kconfig.debug" endmenu + +menu "drm/i915 Profile Guided Optimisation" + visible if EXPERT + depends on DRM_I915 + source "drivers/gpu/drm/i915/Kconfig.profile" +endmenu diff --git a/drivers/gpu/drm/i915/Kconfig.profile b/drivers/gpu/drm/i915/Kconfig.profile new file mode 100644 index 0000000000000000000000000000000000000000..0e5db98da8f3a952b14d5e1b8d0cb14185a607bd --- /dev/null +++ b/drivers/gpu/drm/i915/Kconfig.profile @@ -0,0 +1,13 @@ +config DRM_I915_SPIN_REQUEST + int + default 5 # microseconds + help + Before sleeping waiting for a request (GPU operation) to complete, + we may spend some time polling for its completion. As the IRQ may + take a non-negligible time to setup, we do a short spin first to + check if the request will complete in the time it would have taken + us to enable the interrupt. + + May be 0 to disable the initial spin. In practice, we estimate + the cost of enabling the interrupt (if currently disabled) to be + a few microseconds. diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index fbcb0904f4a828b3ef49153ad365540bc20b77c8..68106fe35a04bfa29c9eb6e8835c8af33e0cee6c 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -35,32 +35,56 @@ subdir-ccflags-y += \ # Extra header tests include $(src)/Makefile.header-test +subdir-ccflags-y += -I$(src) + # Please keep these build lists sorted! # core driver code i915-y += i915_drv.o \ i915_irq.o \ - i915_memcpy.o \ - i915_mm.o \ i915_params.o \ i915_pci.o \ - i915_reset.o \ i915_suspend.o \ - i915_sw_fence.o \ - i915_syncmap.o \ i915_sysfs.o \ - i915_user_extensions.o \ intel_csr.o \ intel_device_info.o \ intel_pm.o \ intel_runtime_pm.o \ - intel_workarounds.o + intel_wakeref.o \ + intel_uncore.o + +# core library code +i915-y += \ + i915_memcpy.o \ + i915_mm.o \ + i915_sw_fence.o \ + i915_syncmap.o \ + i915_user_extensions.o i915-$(CONFIG_COMPAT) += i915_ioc32.o i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o intel_pipe_crc.o i915-$(CONFIG_PERF_EVENTS) += i915_pmu.o -# GEM code +# "Graphics Technology" (aka we talk to the gpu) +obj-y += gt/ +gt-y += \ + gt/intel_breadcrumbs.o \ + gt/intel_context.o \ + gt/intel_engine_cs.o \ + gt/intel_engine_pm.o \ + gt/intel_gt_pm.o \ + gt/intel_hangcheck.o \ + gt/intel_lrc.o \ + gt/intel_reset.o \ + gt/intel_ringbuffer.o \ + gt/intel_mocs.o \ + gt/intel_sseu.o \ + gt/intel_workarounds.o +gt-$(CONFIG_DRM_I915_SELFTEST) += \ + gt/mock_engine.o +i915-y += $(gt-y) + +# GEM (Graphics Execution Management) code i915-y += \ i915_active.o \ i915_cmd_parser.o \ @@ -75,6 +99,7 @@ i915-y += \ i915_gem_internal.o \ i915_gem.o \ i915_gem_object.o \ + i915_gem_pm.o \ i915_gem_render_state.o \ i915_gem_shrinker.o \ i915_gem_stolen.o \ @@ -88,14 +113,6 @@ i915-y += \ i915_timeline.o \ i915_trace_points.o \ i915_vma.o \ - intel_breadcrumbs.o \ - intel_context.o \ - intel_engine_cs.o \ - intel_hangcheck.o \ - intel_lrc.o \ - intel_mocs.o \ - intel_ringbuffer.o \ - intel_uncore.o \ intel_wopcm.o # general-purpose microcontroller (GuC) support @@ -159,8 +176,8 @@ i915-y += dvo_ch7017.o \ intel_dsi_dcs_backlight.o \ intel_dsi_vbt.o \ intel_dvo.o \ + intel_gmbus.o \ intel_hdmi.o \ - intel_i2c.o \ intel_lspcon.o \ intel_lvds.o \ intel_panel.o \ @@ -176,6 +193,7 @@ i915-$(CONFIG_DRM_I915_SELFTEST) += \ selftests/i915_random.o \ selftests/i915_selftest.o \ selftests/igt_flush_test.o \ + selftests/igt_gem_utils.o \ selftests/igt_live_test.o \ selftests/igt_reset.o \ selftests/igt_spinner.o diff --git a/drivers/gpu/drm/i915/Makefile.header-test b/drivers/gpu/drm/i915/Makefile.header-test index c1c391816fa77da72ced9675fb2afd0c3885c93d..3a9663002d4a42d8fbe5ca1730e50097e85cca5e 100644 --- a/drivers/gpu/drm/i915/Makefile.header-test +++ b/drivers/gpu/drm/i915/Makefile.header-test @@ -4,37 +4,65 @@ # Test the headers are compilable as standalone units header_test := \ i915_active_types.h \ + i915_debugfs.h \ + i915_drv.h \ i915_gem_context_types.h \ + i915_gem_pm.h \ + i915_irq.h \ + i915_params.h \ i915_priolist_types.h \ + i915_reg.h \ i915_scheduler_types.h \ i915_timeline_types.h \ + i915_utils.h \ + intel_acpi.h \ + intel_atomic.h \ intel_atomic_plane.h \ intel_audio.h \ + intel_bios.h \ intel_cdclk.h \ intel_color.h \ + intel_combo_phy.h \ intel_connector.h \ - intel_context_types.h \ intel_crt.h \ intel_csr.h \ intel_ddi.h \ intel_dp.h \ + intel_dp_aux_backlight.h \ + intel_dp_link_training.h \ + intel_dp_mst.h \ + intel_dpio_phy.h \ + intel_dpll_mgr.h \ + intel_drv.h \ + intel_dsi.h \ + intel_dsi_dcs_backlight.h \ intel_dvo.h \ - intel_engine_types.h \ + intel_dvo_dev.h \ intel_fbc.h \ intel_fbdev.h \ + intel_fifo_underrun.h \ intel_frontbuffer.h \ + intel_gmbus.h \ intel_hdcp.h \ intel_hdmi.h \ + intel_hotplug.h \ + intel_lpe_audio.h \ intel_lspcon.h \ intel_lvds.h \ + intel_overlay.h \ intel_panel.h \ intel_pipe_crc.h \ intel_pm.h \ intel_psr.h \ + intel_quirks.h \ + intel_runtime_pm.h \ intel_sdvo.h \ + intel_sideband.h \ intel_sprite.h \ intel_tv.h \ - intel_workarounds_types.h + intel_uncore.h \ + intel_vdsc.h \ + intel_wakeref.h quiet_cmd_header_test = HDRTEST $@ cmd_header_test = echo "\#include \"$( $@ diff --git a/drivers/gpu/drm/i915/dvo_ch7017.c b/drivers/gpu/drm/i915/dvo_ch7017.c index caac9942e1e3ab52ac0fc1c0150536658215adf5..602380fe74f3e9277d854f98f926ac158a06e998 100644 --- a/drivers/gpu/drm/i915/dvo_ch7017.c +++ b/drivers/gpu/drm/i915/dvo_ch7017.c @@ -25,7 +25,8 @@ * */ -#include "dvo.h" +#include "intel_drv.h" +#include "intel_dvo_dev.h" #define CH7017_TV_DISPLAY_MODE 0x00 #define CH7017_FLICKER_FILTER 0x01 diff --git a/drivers/gpu/drm/i915/dvo_ch7xxx.c b/drivers/gpu/drm/i915/dvo_ch7xxx.c index 397ac523372675e5baf37c96d56d7478de3f9897..e070bebee7b5eb5f74b69b2e9c92031dc891709b 100644 --- a/drivers/gpu/drm/i915/dvo_ch7xxx.c +++ b/drivers/gpu/drm/i915/dvo_ch7xxx.c @@ -26,7 +26,8 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. **************************************************************************/ -#include "dvo.h" +#include "intel_drv.h" +#include "intel_dvo_dev.h" #define CH7xxx_REG_VID 0x4a #define CH7xxx_REG_DID 0x4b diff --git a/drivers/gpu/drm/i915/dvo_ivch.c b/drivers/gpu/drm/i915/dvo_ivch.c index 24278cc490905dcf04e6de7dfbfec60ebda4097a..09dba35f3ffae963c4a785b821195a629e5cdad7 100644 --- a/drivers/gpu/drm/i915/dvo_ivch.c +++ b/drivers/gpu/drm/i915/dvo_ivch.c @@ -29,7 +29,8 @@ * */ -#include "dvo.h" +#include "intel_drv.h" +#include "intel_dvo_dev.h" /* * register definitions for the i82807aa. diff --git a/drivers/gpu/drm/i915/dvo_ns2501.c b/drivers/gpu/drm/i915/dvo_ns2501.c index c584e01dc8dc39c75b78bb31252a4efe8d666616..c83a5d88d62bd8aabb2816ff5c090f205918c317 100644 --- a/drivers/gpu/drm/i915/dvo_ns2501.c +++ b/drivers/gpu/drm/i915/dvo_ns2501.c @@ -26,9 +26,10 @@ * */ -#include "dvo.h" -#include "i915_reg.h" #include "i915_drv.h" +#include "i915_reg.h" +#include "intel_drv.h" +#include "intel_dvo_dev.h" #define NS2501_VID 0x1305 #define NS2501_DID 0x6726 diff --git a/drivers/gpu/drm/i915/dvo_sil164.c b/drivers/gpu/drm/i915/dvo_sil164.c index 4ae5d8fd9ff0fcf5d542e0405cefd2b08694c76a..04698eaeb632e610283827340e912b9d3f9ae827 100644 --- a/drivers/gpu/drm/i915/dvo_sil164.c +++ b/drivers/gpu/drm/i915/dvo_sil164.c @@ -26,7 +26,8 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. **************************************************************************/ -#include "dvo.h" +#include "intel_drv.h" +#include "intel_dvo_dev.h" #define SIL164_VID 0x0001 #define SIL164_DID 0x0006 diff --git a/drivers/gpu/drm/i915/dvo_tfp410.c b/drivers/gpu/drm/i915/dvo_tfp410.c index d603bc2f2506c5f20b2e17d83a6adb3605b1b8b5..623114ee73cdefd25ddaef44eb21dfa457b94958 100644 --- a/drivers/gpu/drm/i915/dvo_tfp410.c +++ b/drivers/gpu/drm/i915/dvo_tfp410.c @@ -25,7 +25,8 @@ * */ -#include "dvo.h" +#include "intel_drv.h" +#include "intel_dvo_dev.h" /* register definitions according to the TFP410 data sheet */ #define TFP410_VID 0x014C diff --git a/drivers/gpu/drm/i915/gt/Makefile b/drivers/gpu/drm/i915/gt/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..1c75b5c9790c7ff56695f13f85ac79017afb5496 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/Makefile @@ -0,0 +1,2 @@ +# Extra header tests +include $(src)/Makefile.header-test diff --git a/drivers/gpu/drm/i915/gt/Makefile.header-test b/drivers/gpu/drm/i915/gt/Makefile.header-test new file mode 100644 index 0000000000000000000000000000000000000000..61e06cbb4b324cb94cc27b536d92a37cda2e9497 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/Makefile.header-test @@ -0,0 +1,16 @@ +# SPDX-License-Identifier: MIT +# Copyright © 2019 Intel Corporation + +# Test the headers are compilable as standalone units +header_test := $(notdir $(wildcard $(src)/*.h)) + +quiet_cmd_header_test = HDRTEST $@ + cmd_header_test = echo "\#include \"$( $@ + +header_test_%.c: %.h + $(call cmd,header_test) + +extra-$(CONFIG_DRM_I915_WERROR) += \ + $(foreach h,$(header_test),$(patsubst %.h,header_test_%.o,$(h))) + +clean-files += $(foreach h,$(header_test),$(patsubst %.h,header_test_%.c,$(h))) diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c similarity index 95% rename from drivers/gpu/drm/i915/intel_breadcrumbs.c rename to drivers/gpu/drm/i915/gt/intel_breadcrumbs.c index 832cb6b1e9bd437d7916ec21508c33330e0921a3..c092bdf5f0bf26c1734e2457818bc03c9558d1ad 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c @@ -81,6 +81,22 @@ static inline bool __request_completed(const struct i915_request *rq) return i915_seqno_passed(__hwsp_seqno(rq), rq->fence.seqno); } +__maybe_unused static bool +check_signal_order(struct intel_context *ce, struct i915_request *rq) +{ + if (!list_is_last(&rq->signal_link, &ce->signals) && + i915_seqno_passed(rq->fence.seqno, + list_next_entry(rq, signal_link)->fence.seqno)) + return false; + + if (!list_is_first(&rq->signal_link, &ce->signals) && + i915_seqno_passed(list_prev_entry(rq, signal_link)->fence.seqno, + rq->fence.seqno)) + return false; + + return true; +} + static bool __dma_fence_signal(struct dma_fence *fence) { @@ -130,6 +146,8 @@ void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine) struct i915_request *rq = list_entry(pos, typeof(*rq), signal_link); + GEM_BUG_ON(!check_signal_order(ce, rq)); + if (!__request_completed(rq)) break; @@ -312,6 +330,7 @@ bool i915_request_enable_breadcrumb(struct i915_request *rq) list_add(&rq->signal_link, pos); if (pos == &ce->signals) /* catch transitions from empty list */ list_move_tail(&ce->signal_link, &b->signalers); + GEM_BUG_ON(!check_signal_order(ce, rq)); set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags); spin_unlock(&b->irq_lock); diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c new file mode 100644 index 0000000000000000000000000000000000000000..5b31e1e05ddd6423d708390adc338583ae1e82a0 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -0,0 +1,179 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include "i915_drv.h" +#include "i915_gem_context.h" +#include "i915_globals.h" + +#include "intel_context.h" +#include "intel_engine.h" +#include "intel_engine_pm.h" + +static struct i915_global_context { + struct i915_global base; + struct kmem_cache *slab_ce; +} global; + +static struct intel_context *intel_context_alloc(void) +{ + return kmem_cache_zalloc(global.slab_ce, GFP_KERNEL); +} + +void intel_context_free(struct intel_context *ce) +{ + kmem_cache_free(global.slab_ce, ce); +} + +struct intel_context * +intel_context_create(struct i915_gem_context *ctx, + struct intel_engine_cs *engine) +{ + struct intel_context *ce; + + ce = intel_context_alloc(); + if (!ce) + return ERR_PTR(-ENOMEM); + + intel_context_init(ce, ctx, engine); + return ce; +} + +int __intel_context_do_pin(struct intel_context *ce) +{ + int err; + + if (mutex_lock_interruptible(&ce->pin_mutex)) + return -EINTR; + + if (likely(!atomic_read(&ce->pin_count))) { + intel_wakeref_t wakeref; + + err = 0; + with_intel_runtime_pm(ce->engine->i915, wakeref) + err = ce->ops->pin(ce); + if (err) + goto err; + + i915_gem_context_get(ce->gem_context); /* for ctx->ppgtt */ + + intel_context_get(ce); + smp_mb__before_atomic(); /* flush pin before it is visible */ + } + + atomic_inc(&ce->pin_count); + GEM_BUG_ON(!intel_context_is_pinned(ce)); /* no overflow! */ + + mutex_unlock(&ce->pin_mutex); + return 0; + +err: + mutex_unlock(&ce->pin_mutex); + return err; +} + +void intel_context_unpin(struct intel_context *ce) +{ + if (likely(atomic_add_unless(&ce->pin_count, -1, 1))) + return; + + /* We may be called from inside intel_context_pin() to evict another */ + intel_context_get(ce); + mutex_lock_nested(&ce->pin_mutex, SINGLE_DEPTH_NESTING); + + if (likely(atomic_dec_and_test(&ce->pin_count))) { + ce->ops->unpin(ce); + + i915_gem_context_put(ce->gem_context); + intel_context_put(ce); + } + + mutex_unlock(&ce->pin_mutex); + intel_context_put(ce); +} + +static void intel_context_retire(struct i915_active_request *active, + struct i915_request *rq) +{ + struct intel_context *ce = + container_of(active, typeof(*ce), active_tracker); + + intel_context_unpin(ce); +} + +void +intel_context_init(struct intel_context *ce, + struct i915_gem_context *ctx, + struct intel_engine_cs *engine) +{ + GEM_BUG_ON(!engine->cops); + + kref_init(&ce->ref); + + ce->gem_context = ctx; + ce->engine = engine; + ce->ops = engine->cops; + ce->sseu = engine->sseu; + ce->saturated = 0; + + INIT_LIST_HEAD(&ce->signal_link); + INIT_LIST_HEAD(&ce->signals); + + mutex_init(&ce->pin_mutex); + + i915_active_request_init(&ce->active_tracker, + NULL, intel_context_retire); +} + +static void i915_global_context_shrink(void) +{ + kmem_cache_shrink(global.slab_ce); +} + +static void i915_global_context_exit(void) +{ + kmem_cache_destroy(global.slab_ce); +} + +static struct i915_global_context global = { { + .shrink = i915_global_context_shrink, + .exit = i915_global_context_exit, +} }; + +int __init i915_global_context_init(void) +{ + global.slab_ce = KMEM_CACHE(intel_context, SLAB_HWCACHE_ALIGN); + if (!global.slab_ce) + return -ENOMEM; + + i915_global_register(&global.base); + return 0; +} + +void intel_context_enter_engine(struct intel_context *ce) +{ + intel_engine_pm_get(ce->engine); +} + +void intel_context_exit_engine(struct intel_context *ce) +{ + ce->saturated = 0; + intel_engine_pm_put(ce->engine); +} + +struct i915_request *intel_context_create_request(struct intel_context *ce) +{ + struct i915_request *rq; + int err; + + err = intel_context_pin(ce); + if (unlikely(err)) + return ERR_PTR(err); + + rq = i915_request_create(ce); + intel_context_unpin(ce); + + return rq; +} diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h new file mode 100644 index 0000000000000000000000000000000000000000..63392c88cd98625eecc8f189ae5d1115f140e660 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_context.h @@ -0,0 +1,130 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef __INTEL_CONTEXT_H__ +#define __INTEL_CONTEXT_H__ + +#include + +#include "intel_context_types.h" +#include "intel_engine_types.h" + +void intel_context_init(struct intel_context *ce, + struct i915_gem_context *ctx, + struct intel_engine_cs *engine); + +struct intel_context * +intel_context_create(struct i915_gem_context *ctx, + struct intel_engine_cs *engine); + +void intel_context_free(struct intel_context *ce); + +/** + * intel_context_lock_pinned - Stablises the 'pinned' status of the HW context + * @ce - the context + * + * Acquire a lock on the pinned status of the HW context, such that the context + * can neither be bound to the GPU or unbound whilst the lock is held, i.e. + * intel_context_is_pinned() remains stable. + */ +static inline int intel_context_lock_pinned(struct intel_context *ce) + __acquires(ce->pin_mutex) +{ + return mutex_lock_interruptible(&ce->pin_mutex); +} + +/** + * intel_context_is_pinned - Reports the 'pinned' status + * @ce - the context + * + * While in use by the GPU, the context, along with its ring and page + * tables is pinned into memory and the GTT. + * + * Returns: true if the context is currently pinned for use by the GPU. + */ +static inline bool +intel_context_is_pinned(struct intel_context *ce) +{ + return atomic_read(&ce->pin_count); +} + +/** + * intel_context_unlock_pinned - Releases the earlier locking of 'pinned' status + * @ce - the context + * + * Releases the lock earlier acquired by intel_context_unlock_pinned(). + */ +static inline void intel_context_unlock_pinned(struct intel_context *ce) + __releases(ce->pin_mutex) +{ + mutex_unlock(&ce->pin_mutex); +} + +int __intel_context_do_pin(struct intel_context *ce); + +static inline int intel_context_pin(struct intel_context *ce) +{ + if (likely(atomic_inc_not_zero(&ce->pin_count))) + return 0; + + return __intel_context_do_pin(ce); +} + +static inline void __intel_context_pin(struct intel_context *ce) +{ + GEM_BUG_ON(!intel_context_is_pinned(ce)); + atomic_inc(&ce->pin_count); +} + +void intel_context_unpin(struct intel_context *ce); + +void intel_context_enter_engine(struct intel_context *ce); +void intel_context_exit_engine(struct intel_context *ce); + +static inline void intel_context_enter(struct intel_context *ce) +{ + if (!ce->active_count++) + ce->ops->enter(ce); +} + +static inline void intel_context_mark_active(struct intel_context *ce) +{ + ++ce->active_count; +} + +static inline void intel_context_exit(struct intel_context *ce) +{ + GEM_BUG_ON(!ce->active_count); + if (!--ce->active_count) + ce->ops->exit(ce); +} + +static inline struct intel_context *intel_context_get(struct intel_context *ce) +{ + kref_get(&ce->ref); + return ce; +} + +static inline void intel_context_put(struct intel_context *ce) +{ + kref_put(&ce->ref, ce->ops->destroy); +} + +static inline void intel_context_timeline_lock(struct intel_context *ce) + __acquires(&ce->ring->timeline->mutex) +{ + mutex_lock(&ce->ring->timeline->mutex); +} + +static inline void intel_context_timeline_unlock(struct intel_context *ce) + __releases(&ce->ring->timeline->mutex) +{ + mutex_unlock(&ce->ring->timeline->mutex); +} + +struct i915_request *intel_context_create_request(struct intel_context *ce); + +#endif /* __INTEL_CONTEXT_H__ */ diff --git a/drivers/gpu/drm/i915/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h similarity index 83% rename from drivers/gpu/drm/i915/intel_context_types.h rename to drivers/gpu/drm/i915/gt/intel_context_types.h index 339c7437fe82fe55cab2e4dbe929fc97e4c04e52..963a312430e6d37a8a3a627afab750d77df8c550 100644 --- a/drivers/gpu/drm/i915/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -10,11 +10,11 @@ #include #include #include -#include #include #include "i915_active_types.h" #include "intel_engine_types.h" +#include "intel_sseu.h" struct i915_gem_context; struct i915_vma; @@ -25,20 +25,13 @@ struct intel_context_ops { int (*pin)(struct intel_context *ce); void (*unpin)(struct intel_context *ce); + void (*enter)(struct intel_context *ce); + void (*exit)(struct intel_context *ce); + void (*reset)(struct intel_context *ce); void (*destroy)(struct kref *kref); }; -/* - * Powergating configuration for a particular (context,engine). - */ -struct intel_sseu { - u8 slice_mask; - u8 subslice_mask; - u8 min_eus_per_subslice; - u8 max_eus_per_subslice; -}; - struct intel_context { struct kref ref; @@ -46,7 +39,6 @@ struct intel_context { struct intel_engine_cs *engine; struct intel_engine_cs *active; - struct list_head active_link; struct list_head signal_link; struct list_head signals; @@ -56,6 +48,8 @@ struct intel_context { u32 *lrc_reg_state; u64 lrc_desc; + unsigned int active_count; /* notionally protected by timeline->mutex */ + atomic_t pin_count; struct mutex pin_mutex; /* guards pinning and associated on-gpuing */ @@ -68,7 +62,6 @@ struct intel_context { struct i915_active_request active_tracker; const struct intel_context_ops *ops; - struct rb_node node; /** sseu: Control eu/slice partitioning */ struct intel_sseu sseu; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/gt/intel_engine.h similarity index 90% rename from drivers/gpu/drm/i915/intel_ringbuffer.h rename to drivers/gpu/drm/i915/gt/intel_engine.h index 72c7c337ace9589e93a1b9dcab485ccefdeacd05..9359b3a7ad9c300b1a07f2e5002e2a5ec82df233 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -106,24 +106,6 @@ hangcheck_action_to_str(const enum intel_engine_hangcheck_action a) void intel_engines_set_scheduler_caps(struct drm_i915_private *i915); -static inline bool __execlists_need_preempt(int prio, int last) -{ - /* - * Allow preemption of low -> normal -> high, but we do - * not allow low priority tasks to preempt other low priority - * tasks under the impression that latency for low priority - * tasks does not matter (as much as background throughput), - * so kiss. - * - * More naturally we would write - * prio >= max(0, last); - * except that we wish to prevent triggering preemption at the same - * priority level: the task that is running should remain running - * to preserve FIFO ordering of dependencies. - */ - return prio > max(I915_PRIORITY_NORMAL - 1, last); -} - static inline void execlists_set_active(struct intel_engine_execlists *execlists, unsigned int bit) @@ -233,8 +215,6 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value) */ #define I915_GEM_HWS_PREEMPT 0x32 #define I915_GEM_HWS_PREEMPT_ADDR (I915_GEM_HWS_PREEMPT * sizeof(u32)) -#define I915_GEM_HWS_HANGCHECK 0x34 -#define I915_GEM_HWS_HANGCHECK_ADDR (I915_GEM_HWS_HANGCHECK * sizeof(u32)) #define I915_GEM_HWS_SEQNO 0x40 #define I915_GEM_HWS_SEQNO_ADDR (I915_GEM_HWS_SEQNO * sizeof(u32)) #define I915_GEM_HWS_SCRATCH 0x80 @@ -362,14 +342,16 @@ __intel_ring_space(unsigned int head, unsigned int tail, unsigned int size) return (head - tail - CACHELINE_BYTES) & (size - 1); } -int intel_engine_setup_common(struct intel_engine_cs *engine); +int intel_engines_init_mmio(struct drm_i915_private *i915); +int intel_engines_setup(struct drm_i915_private *i915); +int intel_engines_init(struct drm_i915_private *i915); +void intel_engines_cleanup(struct drm_i915_private *i915); + int intel_engine_init_common(struct intel_engine_cs *engine); void intel_engine_cleanup_common(struct intel_engine_cs *engine); -int intel_init_render_ring_buffer(struct intel_engine_cs *engine); -int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine); -int intel_init_blt_ring_buffer(struct intel_engine_cs *engine); -int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine); +int intel_ring_submission_setup(struct intel_engine_cs *engine); +int intel_ring_submission_init(struct intel_engine_cs *engine); int intel_engine_stop_cs(struct intel_engine_cs *engine); void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine); @@ -382,6 +364,8 @@ u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine); void intel_engine_get_instdone(struct intel_engine_cs *engine, struct intel_instdone *instdone); +void intel_engine_init_execlists(struct intel_engine_cs *engine); + void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine); void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); @@ -458,19 +442,14 @@ static inline void intel_engine_reset(struct intel_engine_cs *engine, { if (engine->reset.reset) engine->reset.reset(engine, stalled); + engine->serial++; /* contexts lost */ } -void intel_engines_sanitize(struct drm_i915_private *i915, bool force); -void intel_gt_resume(struct drm_i915_private *i915); - bool intel_engine_is_idle(struct intel_engine_cs *engine); bool intel_engines_are_idle(struct drm_i915_private *dev_priv); void intel_engine_lost_context(struct intel_engine_cs *engine); -void intel_engines_park(struct drm_i915_private *i915); -void intel_engines_unpark(struct drm_i915_private *i915); - void intel_engines_reset_default_submission(struct drm_i915_private *i915); unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915); @@ -567,17 +546,4 @@ static inline bool inject_preempt_hang(struct intel_engine_execlists *execlists) #endif -static inline u32 -intel_engine_next_hangcheck_seqno(struct intel_engine_cs *engine) -{ - return engine->hangcheck.next_seqno = - next_pseudo_random32(engine->hangcheck.next_seqno); -} - -static inline u32 -intel_engine_get_hangcheck_seqno(struct intel_engine_cs *engine) -{ - return intel_read_status_page(engine, I915_GEM_HWS_HANGCHECK); -} - #endif /* _INTEL_RINGBUFFER_H_ */ diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c similarity index 88% rename from drivers/gpu/drm/i915/intel_engine_cs.c rename to drivers/gpu/drm/i915/gt/intel_engine_cs.c index eea9bec04f1ba0898d0e7cf700159dc8f1265c75..2590f5904b67412430d80b307d09ec66e04c0a6b 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -25,9 +25,11 @@ #include #include "i915_drv.h" -#include "i915_reset.h" -#include "intel_ringbuffer.h" + +#include "intel_engine.h" +#include "intel_engine_pm.h" #include "intel_lrc.h" +#include "intel_reset.h" /* Haswell does have the CXT_SIZE register however it does not appear to be * valid. Now, docs explain in dwords what is in the context object. The full @@ -48,35 +50,24 @@ struct engine_class_info { const char *name; - int (*init_legacy)(struct intel_engine_cs *engine); - int (*init_execlists)(struct intel_engine_cs *engine); - u8 uabi_class; }; static const struct engine_class_info intel_engine_classes[] = { [RENDER_CLASS] = { .name = "rcs", - .init_execlists = logical_render_ring_init, - .init_legacy = intel_init_render_ring_buffer, .uabi_class = I915_ENGINE_CLASS_RENDER, }, [COPY_ENGINE_CLASS] = { .name = "bcs", - .init_execlists = logical_xcs_ring_init, - .init_legacy = intel_init_blt_ring_buffer, .uabi_class = I915_ENGINE_CLASS_COPY, }, [VIDEO_DECODE_CLASS] = { .name = "vcs", - .init_execlists = logical_xcs_ring_init, - .init_legacy = intel_init_bsd_ring_buffer, .uabi_class = I915_ENGINE_CLASS_VIDEO, }, [VIDEO_ENHANCEMENT_CLASS] = { .name = "vecs", - .init_execlists = logical_xcs_ring_init, - .init_legacy = intel_init_vebox_ring_buffer, .uabi_class = I915_ENGINE_CLASS_VIDEO_ENHANCE, }, }; @@ -212,6 +203,22 @@ __intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class) PAGE_SIZE); case 5: case 4: + /* + * There is a discrepancy here between the size reported + * by the register and the size of the context layout + * in the docs. Both are described as authorative! + * + * The discrepancy is on the order of a few cachelines, + * but the total is under one page (4k), which is our + * minimum allocation anyway so it should all come + * out in the wash. + */ + cxt_size = I915_READ(CXT_SIZE) + 1; + DRM_DEBUG_DRIVER("gen%d CXT_SIZE = %d bytes [0x%08x]\n", + INTEL_GEN(dev_priv), + cxt_size * 64, + cxt_size - 1); + return round_up(cxt_size * 64, PAGE_SIZE); case 3: case 2: /* For the special day when i810 gets merged. */ @@ -312,6 +319,12 @@ intel_engine_setup(struct drm_i915_private *dev_priv, engine->class = info->class; engine->instance = info->instance; + /* + * To be overridden by the backend on setup. However to facilitate + * cleanup on error during setup, we always provide the destroy vfunc. + */ + engine->destroy = (typeof(engine->destroy))kfree; + engine->uabi_class = intel_engine_classes[info->class].uabi_class; engine->context_size = __intel_engine_context_size(dev_priv, @@ -336,18 +349,70 @@ intel_engine_setup(struct drm_i915_private *dev_priv, return 0; } +static void __setup_engine_capabilities(struct intel_engine_cs *engine) +{ + struct drm_i915_private *i915 = engine->i915; + + if (engine->class == VIDEO_DECODE_CLASS) { + /* + * HEVC support is present on first engine instance + * before Gen11 and on all instances afterwards. + */ + if (INTEL_GEN(i915) >= 11 || + (INTEL_GEN(i915) >= 9 && engine->instance == 0)) + engine->uabi_capabilities |= + I915_VIDEO_CLASS_CAPABILITY_HEVC; + + /* + * SFC block is present only on even logical engine + * instances. + */ + if ((INTEL_GEN(i915) >= 11 && + RUNTIME_INFO(i915)->vdbox_sfc_access & engine->mask) || + (INTEL_GEN(i915) >= 9 && engine->instance == 0)) + engine->uabi_capabilities |= + I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC; + } else if (engine->class == VIDEO_ENHANCEMENT_CLASS) { + if (INTEL_GEN(i915) >= 9) + engine->uabi_capabilities |= + I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC; + } +} + +static void intel_setup_engine_capabilities(struct drm_i915_private *i915) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, i915, id) + __setup_engine_capabilities(engine); +} + +/** + * intel_engines_cleanup() - free the resources allocated for Command Streamers + * @i915: the i915 devic + */ +void intel_engines_cleanup(struct drm_i915_private *i915) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, i915, id) { + engine->destroy(engine); + i915->engine[id] = NULL; + } +} + /** * intel_engines_init_mmio() - allocate and prepare the Engine Command Streamers - * @dev_priv: i915 device private + * @i915: the i915 device * * Return: non-zero if the initialization failed. */ -int intel_engines_init_mmio(struct drm_i915_private *dev_priv) +int intel_engines_init_mmio(struct drm_i915_private *i915) { - struct intel_device_info *device_info = mkwrite_device_info(dev_priv); - const unsigned int engine_mask = INTEL_INFO(dev_priv)->engine_mask; - struct intel_engine_cs *engine; - enum intel_engine_id id; + struct intel_device_info *device_info = mkwrite_device_info(i915); + const unsigned int engine_mask = INTEL_INFO(i915)->engine_mask; unsigned int mask = 0; unsigned int i; int err; @@ -360,10 +425,10 @@ int intel_engines_init_mmio(struct drm_i915_private *dev_priv) return -ENODEV; for (i = 0; i < ARRAY_SIZE(intel_engines); i++) { - if (!HAS_ENGINE(dev_priv, i)) + if (!HAS_ENGINE(i915, i)) continue; - err = intel_engine_setup(dev_priv, i); + err = intel_engine_setup(i915, i); if (err) goto cleanup; @@ -379,69 +444,52 @@ int intel_engines_init_mmio(struct drm_i915_private *dev_priv) device_info->engine_mask = mask; /* We always presume we have at least RCS available for later probing */ - if (WARN_ON(!HAS_ENGINE(dev_priv, RCS0))) { + if (WARN_ON(!HAS_ENGINE(i915, RCS0))) { err = -ENODEV; goto cleanup; } - RUNTIME_INFO(dev_priv)->num_engines = hweight32(mask); + RUNTIME_INFO(i915)->num_engines = hweight32(mask); - i915_check_and_clear_faults(dev_priv); + i915_check_and_clear_faults(i915); + + intel_setup_engine_capabilities(i915); return 0; cleanup: - for_each_engine(engine, dev_priv, id) - kfree(engine); + intel_engines_cleanup(i915); return err; } /** * intel_engines_init() - init the Engine Command Streamers - * @dev_priv: i915 device private + * @i915: i915 device private * * Return: non-zero if the initialization failed. */ -int intel_engines_init(struct drm_i915_private *dev_priv) +int intel_engines_init(struct drm_i915_private *i915) { + int (*init)(struct intel_engine_cs *engine); struct intel_engine_cs *engine; - enum intel_engine_id id, err_id; + enum intel_engine_id id; int err; - for_each_engine(engine, dev_priv, id) { - const struct engine_class_info *class_info = - &intel_engine_classes[engine->class]; - int (*init)(struct intel_engine_cs *engine); - - if (HAS_EXECLISTS(dev_priv)) - init = class_info->init_execlists; - else - init = class_info->init_legacy; - - err = -EINVAL; - err_id = id; - - if (GEM_DEBUG_WARN_ON(!init)) - goto cleanup; + if (HAS_EXECLISTS(i915)) + init = intel_execlists_submission_init; + else + init = intel_ring_submission_init; + for_each_engine(engine, i915, id) { err = init(engine); if (err) goto cleanup; - - GEM_BUG_ON(!engine->submit_request); } return 0; cleanup: - for_each_engine(engine, dev_priv, id) { - if (id >= err_id) { - kfree(engine); - dev_priv->engine[id] = NULL; - } else { - dev_priv->gt.cleanup_engine(engine); - } - } + intel_engines_cleanup(i915); return err; } @@ -450,7 +498,7 @@ static void intel_engine_init_batch_pool(struct intel_engine_cs *engine) i915_gem_batch_pool_init(&engine->batch_pool, engine); } -static void intel_engine_init_execlist(struct intel_engine_cs *engine) +void intel_engine_init_execlists(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; @@ -557,16 +605,7 @@ static int init_status_page(struct intel_engine_cs *engine) return ret; } -/** - * intel_engines_setup_common - setup engine state not requiring hw access - * @engine: Engine to setup. - * - * Initializes @engine@ structure members shared between legacy and execlists - * submission modes which do not require hardware access. - * - * Typically done early in the submission mode specific engine setup stage. - */ -int intel_engine_setup_common(struct intel_engine_cs *engine) +static int intel_engine_setup_common(struct intel_engine_cs *engine) { int err; @@ -583,10 +622,15 @@ int intel_engine_setup_common(struct intel_engine_cs *engine) i915_timeline_set_subclass(&engine->timeline, TIMELINE_ENGINE); intel_engine_init_breadcrumbs(engine); - intel_engine_init_execlist(engine); + intel_engine_init_execlists(engine); intel_engine_init_hangcheck(engine); intel_engine_init_batch_pool(engine); intel_engine_init_cmd_parser(engine); + intel_engine_init__pm(engine); + + /* Use the whole device by default */ + engine->sseu = + intel_sseu_from_device_info(&RUNTIME_INFO(engine->i915)->sseu); return 0; @@ -595,6 +639,49 @@ int intel_engine_setup_common(struct intel_engine_cs *engine) return err; } +/** + * intel_engines_setup- setup engine state not requiring hw access + * @i915: Device to setup. + * + * Initializes engine structure members shared between legacy and execlists + * submission modes which do not require hardware access. + * + * Typically done early in the submission mode specific engine setup stage. + */ +int intel_engines_setup(struct drm_i915_private *i915) +{ + int (*setup)(struct intel_engine_cs *engine); + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err; + + if (HAS_EXECLISTS(i915)) + setup = intel_execlists_submission_setup; + else + setup = intel_ring_submission_setup; + + for_each_engine(engine, i915, id) { + err = intel_engine_setup_common(engine); + if (err) + goto cleanup; + + err = setup(engine); + if (err) + goto cleanup; + + /* We expect the backend to take control over its state */ + GEM_BUG_ON(engine->destroy == (typeof(engine->destroy))kfree); + + GEM_BUG_ON(!engine->cops); + } + + return 0; + +cleanup: + intel_engines_cleanup(i915); + return err; +} + void intel_engines_set_scheduler_caps(struct drm_i915_private *i915) { static const struct { @@ -675,6 +762,7 @@ static int measure_breadcrumb_dw(struct intel_engine_cs *engine) goto out_timeline; dw = engine->emit_fini_breadcrumb(&frame->rq, frame->cs) - frame->cs; + GEM_BUG_ON(dw & 1); /* RING_TAIL must be qword aligned */ i915_timeline_unpin(&frame->timeline); @@ -690,11 +778,17 @@ static int pin_context(struct i915_gem_context *ctx, struct intel_context **out) { struct intel_context *ce; + int err; - ce = intel_context_pin(ctx, engine); + ce = i915_gem_context_get_engine(ctx, engine->id); if (IS_ERR(ce)) return PTR_ERR(ce); + err = intel_context_pin(ce); + intel_context_put(ce); + if (err) + return err; + *out = ce; return 0; } @@ -753,30 +847,6 @@ int intel_engine_init_common(struct intel_engine_cs *engine) return ret; } -void intel_gt_resume(struct drm_i915_private *i915) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - - /* - * After resume, we may need to poke into the pinned kernel - * contexts to paper over any damage caused by the sudden suspend. - * Only the kernel contexts should remain pinned over suspend, - * allowing us to fixup the user contexts on their first pin. - */ - for_each_engine(engine, i915, id) { - struct intel_context *ce; - - ce = engine->kernel_context; - if (ce) - ce->ops->reset(ce); - - ce = engine->preempt_context; - if (ce) - ce->ops->reset(ce); - } -} - /** * intel_engines_cleanup_common - cleans up the engine state created by * the common initiailizers. @@ -1062,10 +1132,15 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine) if (i915_reset_failed(engine->i915)) return true; + if (!intel_wakeref_active(&engine->wakeref)) + return true; + /* Waiting to drain ELSP? */ if (READ_ONCE(engine->execlists.active)) { struct tasklet_struct *t = &engine->execlists.tasklet; + synchronize_hardirq(engine->i915->drm.irq); + local_bh_disable(); if (tasklet_trylock(t)) { /* Must wait for any GPU reset in progress. */ @@ -1123,117 +1198,6 @@ void intel_engines_reset_default_submission(struct drm_i915_private *i915) engine->set_default_submission(engine); } -static bool reset_engines(struct drm_i915_private *i915) -{ - if (INTEL_INFO(i915)->gpu_reset_clobbers_display) - return false; - - return intel_gpu_reset(i915, ALL_ENGINES) == 0; -} - -/** - * intel_engines_sanitize: called after the GPU has lost power - * @i915: the i915 device - * @force: ignore a failed reset and sanitize engine state anyway - * - * Anytime we reset the GPU, either with an explicit GPU reset or through a - * PCI power cycle, the GPU loses state and we must reset our state tracking - * to match. Note that calling intel_engines_sanitize() if the GPU has not - * been reset results in much confusion! - */ -void intel_engines_sanitize(struct drm_i915_private *i915, bool force) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - - GEM_TRACE("\n"); - - if (!reset_engines(i915) && !force) - return; - - for_each_engine(engine, i915, id) - intel_engine_reset(engine, false); -} - -/** - * intel_engines_park: called when the GT is transitioning from busy->idle - * @i915: the i915 device - * - * The GT is now idle and about to go to sleep (maybe never to wake again?). - * Time for us to tidy and put away our toys (release resources back to the - * system). - */ -void intel_engines_park(struct drm_i915_private *i915) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - - for_each_engine(engine, i915, id) { - /* Flush the residual irq tasklets first. */ - intel_engine_disarm_breadcrumbs(engine); - tasklet_kill(&engine->execlists.tasklet); - - /* - * We are committed now to parking the engines, make sure there - * will be no more interrupts arriving later and the engines - * are truly idle. - */ - if (wait_for(intel_engine_is_idle(engine), 10)) { - struct drm_printer p = drm_debug_printer(__func__); - - dev_err(i915->drm.dev, - "%s is not idle before parking\n", - engine->name); - intel_engine_dump(engine, &p, NULL); - } - - /* Must be reset upon idling, or we may miss the busy wakeup. */ - GEM_BUG_ON(engine->execlists.queue_priority_hint != INT_MIN); - - if (engine->park) - engine->park(engine); - - if (engine->pinned_default_state) { - i915_gem_object_unpin_map(engine->default_state); - engine->pinned_default_state = NULL; - } - - i915_gem_batch_pool_fini(&engine->batch_pool); - engine->execlists.no_priolist = false; - } - - i915->gt.active_engines = 0; -} - -/** - * intel_engines_unpark: called when the GT is transitioning from idle->busy - * @i915: the i915 device - * - * The GT was idle and now about to fire up with some new user requests. - */ -void intel_engines_unpark(struct drm_i915_private *i915) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - - for_each_engine(engine, i915, id) { - void *map; - - /* Pin the default state for fast resets from atomic context. */ - map = NULL; - if (engine->default_state) - map = i915_gem_object_pin_map(engine->default_state, - I915_MAP_WB); - if (!IS_ERR_OR_NULL(map)) - engine->pinned_default_state = map; - - if (engine->unpark) - engine->unpark(engine); - - intel_engine_init_hangcheck(engine); - } -} - /** * intel_engine_lost_context: called when the GPU is reset into unknown state * @engine: the engine @@ -1312,8 +1276,11 @@ static void print_request(struct drm_printer *m, i915_request_completed(rq) ? "!" : i915_request_started(rq) ? "*" : "", + test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, + &rq->fence.flags) ? "+" : test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, - &rq->fence.flags) ? "+" : "", + &rq->fence.flags) ? "-" : + "", buf, jiffies_to_msecs(jiffies - rq->emitted_jiffies), name); @@ -1518,9 +1485,8 @@ void intel_engine_dump(struct intel_engine_cs *engine, if (i915_reset_failed(engine->i915)) drm_printf(m, "*** WEDGED ***\n"); - drm_printf(m, "\tHangcheck %x:%x [%d ms]\n", - engine->hangcheck.last_seqno, - engine->hangcheck.next_seqno, + drm_printf(m, "\tAwake? %d\n", atomic_read(&engine->wakeref.count)); + drm_printf(m, "\tHangcheck: %d ms ago\n", jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp)); drm_printf(m, "\tReset count: %d (global %d)\n", i915_reset_engine_count(error, engine), @@ -1752,6 +1718,5 @@ intel_engine_find_active_request(struct intel_engine_cs *engine) } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) -#include "selftests/mock_engine.c" -#include "selftests/intel_engine_cs.c" +#include "selftest_engine_cs.c" #endif diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c new file mode 100644 index 0000000000000000000000000000000000000000..ccf034764741bb7241a08c8aa03e5da734976bbb --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -0,0 +1,164 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include "i915_drv.h" + +#include "intel_engine.h" +#include "intel_engine_pm.h" +#include "intel_gt_pm.h" + +static int __engine_unpark(struct intel_wakeref *wf) +{ + struct intel_engine_cs *engine = + container_of(wf, typeof(*engine), wakeref); + void *map; + + GEM_TRACE("%s\n", engine->name); + + intel_gt_pm_get(engine->i915); + + /* Pin the default state for fast resets from atomic context. */ + map = NULL; + if (engine->default_state) + map = i915_gem_object_pin_map(engine->default_state, + I915_MAP_WB); + if (!IS_ERR_OR_NULL(map)) + engine->pinned_default_state = map; + + if (engine->unpark) + engine->unpark(engine); + + intel_engine_init_hangcheck(engine); + return 0; +} + +void intel_engine_pm_get(struct intel_engine_cs *engine) +{ + intel_wakeref_get(engine->i915, &engine->wakeref, __engine_unpark); +} + +void intel_engine_park(struct intel_engine_cs *engine) +{ + /* + * We are committed now to parking this engine, make sure there + * will be no more interrupts arriving later and the engine + * is truly idle. + */ + if (wait_for(intel_engine_is_idle(engine), 10)) { + struct drm_printer p = drm_debug_printer(__func__); + + dev_err(engine->i915->drm.dev, + "%s is not idle before parking\n", + engine->name); + intel_engine_dump(engine, &p, NULL); + } +} + +static bool switch_to_kernel_context(struct intel_engine_cs *engine) +{ + struct i915_request *rq; + + /* Already inside the kernel context, safe to power down. */ + if (engine->wakeref_serial == engine->serial) + return true; + + /* GPU is pointing to the void, as good as in the kernel context. */ + if (i915_reset_failed(engine->i915)) + return true; + + /* + * Note, we do this without taking the timeline->mutex. We cannot + * as we may be called while retiring the kernel context and so + * already underneath the timeline->mutex. Instead we rely on the + * exclusive property of the __engine_park that prevents anyone + * else from creating a request on this engine. This also requires + * that the ring is empty and we avoid any waits while constructing + * the context, as they assume protection by the timeline->mutex. + * This should hold true as we can only park the engine after + * retiring the last request, thus all rings should be empty and + * all timelines idle. + */ + rq = __i915_request_create(engine->kernel_context, GFP_NOWAIT); + if (IS_ERR(rq)) + /* Context switch failed, hope for the best! Maybe reset? */ + return true; + + /* Check again on the next retirement. */ + engine->wakeref_serial = engine->serial + 1; + __i915_request_commit(rq); + + return false; +} + +static int __engine_park(struct intel_wakeref *wf) +{ + struct intel_engine_cs *engine = + container_of(wf, typeof(*engine), wakeref); + + /* + * If one and only one request is completed between pm events, + * we know that we are inside the kernel context and it is + * safe to power down. (We are paranoid in case that runtime + * suspend causes corruption to the active context image, and + * want to avoid that impacting userspace.) + */ + if (!switch_to_kernel_context(engine)) + return -EBUSY; + + GEM_TRACE("%s\n", engine->name); + + intel_engine_disarm_breadcrumbs(engine); + + /* Must be reset upon idling, or we may miss the busy wakeup. */ + GEM_BUG_ON(engine->execlists.queue_priority_hint != INT_MIN); + + if (engine->park) + engine->park(engine); + + if (engine->pinned_default_state) { + i915_gem_object_unpin_map(engine->default_state); + engine->pinned_default_state = NULL; + } + + engine->execlists.no_priolist = false; + + intel_gt_pm_put(engine->i915); + return 0; +} + +void intel_engine_pm_put(struct intel_engine_cs *engine) +{ + intel_wakeref_put(engine->i915, &engine->wakeref, __engine_park); +} + +void intel_engine_init__pm(struct intel_engine_cs *engine) +{ + intel_wakeref_init(&engine->wakeref); +} + +int intel_engines_resume(struct drm_i915_private *i915) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err = 0; + + intel_gt_pm_get(i915); + for_each_engine(engine, i915, id) { + intel_engine_pm_get(engine); + engine->serial++; /* kernel context lost */ + err = engine->resume(engine); + intel_engine_pm_put(engine); + if (err) { + dev_err(i915->drm.dev, + "Failed to restart %s (%d)\n", + engine->name, err); + break; + } + } + intel_gt_pm_put(i915); + + return err; +} diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.h b/drivers/gpu/drm/i915/gt/intel_engine_pm.h new file mode 100644 index 0000000000000000000000000000000000000000..b326cd993d60f23008abe85fe640160a23203a76 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.h @@ -0,0 +1,22 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef INTEL_ENGINE_PM_H +#define INTEL_ENGINE_PM_H + +struct drm_i915_private; +struct intel_engine_cs; + +void intel_engine_pm_get(struct intel_engine_cs *engine); +void intel_engine_pm_put(struct intel_engine_cs *engine); + +void intel_engine_park(struct intel_engine_cs *engine); + +void intel_engine_init__pm(struct intel_engine_cs *engine); + +int intel_engines_resume(struct drm_i915_private *i915); + +#endif /* INTEL_ENGINE_PM_H */ diff --git a/drivers/gpu/drm/i915/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h similarity index 95% rename from drivers/gpu/drm/i915/intel_engine_types.h rename to drivers/gpu/drm/i915/gt/intel_engine_types.h index 1f970c76b6a65e5a3935d993e4f2f3cced4132da..40e774acc2cd20341904cb989ca8d200136f63f1 100644 --- a/drivers/gpu/drm/i915/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -14,14 +14,15 @@ #include #include "i915_gem.h" +#include "i915_gem_batch_pool.h" +#include "i915_pmu.h" #include "i915_priolist_types.h" #include "i915_selftest.h" #include "i915_timeline_types.h" +#include "intel_sseu.h" +#include "intel_wakeref.h" #include "intel_workarounds_types.h" -#include "i915_gem_batch_pool.h" -#include "i915_pmu.h" - #define I915_MAX_SLICES 3 #define I915_MAX_SUBSLICES 8 @@ -52,8 +53,8 @@ struct intel_instdone { struct intel_engine_hangcheck { u64 acthd; - u32 last_seqno; - u32 next_seqno; + u32 last_ring; + u32 last_head; unsigned long action_timestamp; struct intel_instdone instdone; }; @@ -226,6 +227,7 @@ struct intel_engine_execlists { * @queue: queue of requests, in priority lists */ struct rb_root_cached queue; + struct rb_root_cached virtual; /** * @csb_write: control register for Context Switch buffer @@ -278,6 +280,10 @@ struct intel_engine_cs { u32 context_size; u32 mmio_base; + u32 uabi_capabilities; + + struct intel_sseu sseu; + struct intel_ring *buffer; struct i915_timeline timeline; @@ -285,6 +291,10 @@ struct intel_engine_cs { struct intel_context *kernel_context; /* pinned */ struct intel_context *preempt_context; /* pinned; optional */ + unsigned long serial; + + unsigned long wakeref_serial; + struct intel_wakeref wakeref; struct drm_i915_gem_object *default_state; void *pinned_default_state; @@ -357,7 +367,7 @@ struct intel_engine_cs { void (*irq_enable)(struct intel_engine_cs *engine); void (*irq_disable)(struct intel_engine_cs *engine); - int (*init_hw)(struct intel_engine_cs *engine); + int (*resume)(struct intel_engine_cs *engine); struct { void (*prepare)(struct intel_engine_cs *engine); @@ -397,6 +407,13 @@ struct intel_engine_cs { */ void (*submit_request)(struct i915_request *rq); + /* + * Called on signaling of a SUBMIT_FENCE, passing along the signaling + * request down to the bonded pairs. + */ + void (*bond_execute)(struct i915_request *rq, + struct dma_fence *signal); + /* * Call when the priority on a request has changed and it and its * dependencies may need rescheduling. Note the request itself may @@ -413,7 +430,7 @@ struct intel_engine_cs { */ void (*cancel_requests)(struct intel_engine_cs *engine); - void (*cleanup)(struct intel_engine_cs *engine); + void (*destroy)(struct intel_engine_cs *engine); struct intel_engine_execlists execlists; @@ -438,6 +455,7 @@ struct intel_engine_cs { #define I915_ENGINE_HAS_PREEMPTION BIT(2) #define I915_ENGINE_HAS_SEMAPHORES BIT(3) #define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(4) +#define I915_ENGINE_IS_VIRTUAL BIT(5) unsigned int flags; /* @@ -527,6 +545,12 @@ intel_engine_needs_breadcrumb_tasklet(const struct intel_engine_cs *engine) return engine->flags & I915_ENGINE_NEEDS_BREADCRUMB_TASKLET; } +static inline bool +intel_engine_is_virtual(const struct intel_engine_cs *engine) +{ + return engine->flags & I915_ENGINE_IS_VIRTUAL; +} + #define instdone_slice_mask(dev_priv__) \ (IS_GEN(dev_priv__, 7) ? \ 1 : RUNTIME_INFO(dev_priv__)->sseu.slice_mask) diff --git a/drivers/gpu/drm/i915/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h similarity index 100% rename from drivers/gpu/drm/i915/intel_gpu_commands.h rename to drivers/gpu/drm/i915/gt/intel_gpu_commands.h diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c new file mode 100644 index 0000000000000000000000000000000000000000..ae7155f0e06320efded9b184b2e552d8b960c8ab --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -0,0 +1,143 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include "i915_drv.h" +#include "intel_gt_pm.h" +#include "intel_pm.h" +#include "intel_wakeref.h" + +static void pm_notify(struct drm_i915_private *i915, int state) +{ + blocking_notifier_call_chain(&i915->gt.pm_notifications, state, i915); +} + +static int intel_gt_unpark(struct intel_wakeref *wf) +{ + struct drm_i915_private *i915 = + container_of(wf, typeof(*i915), gt.wakeref); + + GEM_TRACE("\n"); + + /* + * It seems that the DMC likes to transition between the DC states a lot + * when there are no connected displays (no active power domains) during + * command submission. + * + * This activity has negative impact on the performance of the chip with + * huge latencies observed in the interrupt handler and elsewhere. + * + * Work around it by grabbing a GT IRQ power domain whilst there is any + * GT activity, preventing any DC state transitions. + */ + i915->gt.awake = intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ); + GEM_BUG_ON(!i915->gt.awake); + + intel_enable_gt_powersave(i915); + + i915_update_gfx_val(i915); + if (INTEL_GEN(i915) >= 6) + gen6_rps_busy(i915); + + i915_pmu_gt_unparked(i915); + + i915_queue_hangcheck(i915); + + pm_notify(i915, INTEL_GT_UNPARK); + + return 0; +} + +void intel_gt_pm_get(struct drm_i915_private *i915) +{ + intel_wakeref_get(i915, &i915->gt.wakeref, intel_gt_unpark); +} + +static int intel_gt_park(struct intel_wakeref *wf) +{ + struct drm_i915_private *i915 = + container_of(wf, typeof(*i915), gt.wakeref); + intel_wakeref_t wakeref = fetch_and_zero(&i915->gt.awake); + + GEM_TRACE("\n"); + + pm_notify(i915, INTEL_GT_PARK); + + i915_pmu_gt_parked(i915); + if (INTEL_GEN(i915) >= 6) + gen6_rps_idle(i915); + + GEM_BUG_ON(!wakeref); + intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ, wakeref); + + return 0; +} + +void intel_gt_pm_put(struct drm_i915_private *i915) +{ + intel_wakeref_put(i915, &i915->gt.wakeref, intel_gt_park); +} + +void intel_gt_pm_init(struct drm_i915_private *i915) +{ + intel_wakeref_init(&i915->gt.wakeref); + BLOCKING_INIT_NOTIFIER_HEAD(&i915->gt.pm_notifications); +} + +static bool reset_engines(struct drm_i915_private *i915) +{ + if (INTEL_INFO(i915)->gpu_reset_clobbers_display) + return false; + + return intel_gpu_reset(i915, ALL_ENGINES) == 0; +} + +/** + * intel_gt_sanitize: called after the GPU has lost power + * @i915: the i915 device + * @force: ignore a failed reset and sanitize engine state anyway + * + * Anytime we reset the GPU, either with an explicit GPU reset or through a + * PCI power cycle, the GPU loses state and we must reset our state tracking + * to match. Note that calling intel_gt_sanitize() if the GPU has not + * been reset results in much confusion! + */ +void intel_gt_sanitize(struct drm_i915_private *i915, bool force) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + GEM_TRACE("\n"); + + if (!reset_engines(i915) && !force) + return; + + for_each_engine(engine, i915, id) + intel_engine_reset(engine, false); +} + +void intel_gt_resume(struct drm_i915_private *i915) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + /* + * After resume, we may need to poke into the pinned kernel + * contexts to paper over any damage caused by the sudden suspend. + * Only the kernel contexts should remain pinned over suspend, + * allowing us to fixup the user contexts on their first pin. + */ + for_each_engine(engine, i915, id) { + struct intel_context *ce; + + ce = engine->kernel_context; + if (ce) + ce->ops->reset(ce); + + ce = engine->preempt_context; + if (ce) + ce->ops->reset(ce); + } +} diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h new file mode 100644 index 0000000000000000000000000000000000000000..7dd1130a19a480ce02b07d99afb2d2358ec32709 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h @@ -0,0 +1,27 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef INTEL_GT_PM_H +#define INTEL_GT_PM_H + +#include + +struct drm_i915_private; + +enum { + INTEL_GT_UNPARK, + INTEL_GT_PARK, +}; + +void intel_gt_pm_get(struct drm_i915_private *i915); +void intel_gt_pm_put(struct drm_i915_private *i915); + +void intel_gt_pm_init(struct drm_i915_private *i915); + +void intel_gt_sanitize(struct drm_i915_private *i915, bool force); +void intel_gt_resume(struct drm_i915_private *i915); + +#endif /* INTEL_GT_PM_H */ diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/gt/intel_hangcheck.c similarity index 94% rename from drivers/gpu/drm/i915/intel_hangcheck.c rename to drivers/gpu/drm/i915/gt/intel_hangcheck.c index 3d51ed1428d4ef8447acdccfe10024cbef6e4481..3a4d09b80fa0d86b7b9701d55d3983e231a75b53 100644 --- a/drivers/gpu/drm/i915/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/intel_hangcheck.c @@ -22,12 +22,13 @@ * */ +#include "intel_reset.h" #include "i915_drv.h" -#include "i915_reset.h" struct hangcheck { u64 acthd; - u32 seqno; + u32 ring; + u32 head; enum intel_engine_hangcheck_action action; unsigned long action_timestamp; int deadlock; @@ -133,26 +134,31 @@ static void hangcheck_load_sample(struct intel_engine_cs *engine, struct hangcheck *hc) { hc->acthd = intel_engine_get_active_head(engine); - hc->seqno = intel_engine_get_hangcheck_seqno(engine); + hc->ring = ENGINE_READ(engine, RING_START); + hc->head = ENGINE_READ(engine, RING_HEAD); } static void hangcheck_store_sample(struct intel_engine_cs *engine, const struct hangcheck *hc) { engine->hangcheck.acthd = hc->acthd; - engine->hangcheck.last_seqno = hc->seqno; + engine->hangcheck.last_ring = hc->ring; + engine->hangcheck.last_head = hc->head; } static enum intel_engine_hangcheck_action hangcheck_get_action(struct intel_engine_cs *engine, const struct hangcheck *hc) { - if (engine->hangcheck.last_seqno != hc->seqno) - return ENGINE_ACTIVE_SEQNO; - if (intel_engine_is_idle(engine)) return ENGINE_IDLE; + if (engine->hangcheck.last_ring != hc->ring) + return ENGINE_ACTIVE_SEQNO; + + if (engine->hangcheck.last_head != hc->head) + return ENGINE_ACTIVE_SEQNO; + return engine_stuck(engine, hc->acthd); } @@ -256,6 +262,7 @@ static void i915_hangcheck_elapsed(struct work_struct *work) struct intel_engine_cs *engine; enum intel_engine_id id; unsigned int hung = 0, stuck = 0, wedged = 0; + intel_wakeref_t wakeref; if (!i915_modparams.enable_hangcheck) return; @@ -266,6 +273,10 @@ static void i915_hangcheck_elapsed(struct work_struct *work) if (i915_terminally_wedged(dev_priv)) return; + wakeref = intel_runtime_pm_get_if_in_use(dev_priv); + if (!wakeref) + return; + /* As enabling the GPU requires fairly extensive mmio access, * periodically arm the mmio checker to see if we are triggering * any invalid access. @@ -313,6 +324,8 @@ static void i915_hangcheck_elapsed(struct work_struct *work) if (hung) hangcheck_declare_hang(dev_priv, hung, stuck); + intel_runtime_pm_put(dev_priv, wakeref); + /* Reset timer in case GPU hangs without another request being added */ i915_queue_hangcheck(dev_priv); } @@ -330,5 +343,5 @@ void intel_hangcheck_init(struct drm_i915_private *i915) } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) -#include "selftests/intel_hangcheck.c" +#include "selftest_hangcheck.c" #endif diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c similarity index 77% rename from drivers/gpu/drm/i915/intel_lrc.c rename to drivers/gpu/drm/i915/gt/intel_lrc.c index 11e5a86610bf7eb6187ced817015cf3f4c98f4ba..1f7bee0cae0cac82bd1845343a50d11c3062e9ea 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -133,13 +133,13 @@ */ #include -#include #include "i915_drv.h" #include "i915_gem_render_state.h" -#include "i915_reset.h" #include "i915_vgpu.h" +#include "intel_engine_pm.h" #include "intel_lrc_reg.h" #include "intel_mocs.h" +#include "intel_reset.h" #include "intel_workarounds.h" #define RING_EXECLIST_QFULL (1 << 0x2) @@ -164,7 +164,53 @@ #define WA_TAIL_DWORDS 2 #define WA_TAIL_BYTES (sizeof(u32) * WA_TAIL_DWORDS) -#define ACTIVE_PRIORITY (I915_PRIORITY_NOSEMAPHORE) +struct virtual_engine { + struct intel_engine_cs base; + struct intel_context context; + + /* + * We allow only a single request through the virtual engine at a time + * (each request in the timeline waits for the completion fence of + * the previous before being submitted). By restricting ourselves to + * only submitting a single request, each request is placed on to a + * physical to maximise load spreading (by virtue of the late greedy + * scheduling -- each real engine takes the next available request + * upon idling). + */ + struct i915_request *request; + + /* + * We keep a rbtree of available virtual engines inside each physical + * engine, sorted by priority. Here we preallocate the nodes we need + * for the virtual engine, indexed by physical_engine->id. + */ + struct ve_node { + struct rb_node rb; + int prio; + } nodes[I915_NUM_ENGINES]; + + /* + * Keep track of bonded pairs -- restrictions upon on our selection + * of physical engines any particular request may be submitted to. + * If we receive a submit-fence from a master engine, we will only + * use one of sibling_mask physical engines. + */ + struct ve_bond { + const struct intel_engine_cs *master; + intel_engine_mask_t sibling_mask; + } *bonds; + unsigned int num_bonds; + + /* And finally, which physical engines this virtual engine maps onto. */ + unsigned int num_siblings; + struct intel_engine_cs *siblings[0]; +}; + +static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine) +{ + GEM_BUG_ON(!intel_engine_is_virtual(engine)); + return container_of(engine, struct virtual_engine, base); +} static int execlists_context_deferred_alloc(struct intel_context *ce, struct intel_engine_cs *engine); @@ -189,23 +235,12 @@ static int effective_prio(const struct i915_request *rq) /* * On unwinding the active request, we give it a priority bump - * equivalent to a freshly submitted request. This protects it from - * being gazumped again, but it would be preferable if we didn't - * let it be gazumped in the first place! - * - * See __unwind_incomplete_requests() + * if it has completed waiting on any semaphore. If we know that + * the request has already started, we can prevent an unwanted + * preempt-to-idle cycle by taking that into account now. */ - if (~prio & ACTIVE_PRIORITY && __i915_request_has_started(rq)) { - /* - * After preemption, we insert the active request at the - * end of the new priority level. This means that we will be - * _lower_ priority than the preemptee all things equal (and - * so the preemption is valid), so adjust our comparison - * accordingly. - */ - prio |= ACTIVE_PRIORITY; - prio--; - } + if (__i915_request_has_started(rq)) + prio |= I915_PRIORITY_NOSEMAPHORE; /* Restrict mere WAIT boosts from triggering preemption */ return prio | __NO_PREEMPTION; @@ -229,7 +264,8 @@ static int queue_prio(const struct intel_engine_execlists *execlists) } static inline bool need_preempt(const struct intel_engine_cs *engine, - const struct i915_request *rq) + const struct i915_request *rq, + struct rb_node *rb) { int last_prio; @@ -252,8 +288,8 @@ static inline bool need_preempt(const struct intel_engine_cs *engine, * ourselves, ignore the request. */ last_prio = effective_prio(rq); - if (!__execlists_need_preempt(engine->execlists.queue_priority_hint, - last_prio)) + if (!i915_scheduler_need_preempt(engine->execlists.queue_priority_hint, + last_prio)) return false; /* @@ -264,6 +300,25 @@ static inline bool need_preempt(const struct intel_engine_cs *engine, rq_prio(list_next_entry(rq, link)) > last_prio) return true; + if (rb) { + struct virtual_engine *ve = + rb_entry(rb, typeof(*ve), nodes[engine->id].rb); + bool preempt = false; + + if (engine == ve->siblings[0]) { /* only preempt one sibling */ + struct i915_request *next; + + rcu_read_lock(); + next = READ_ONCE(ve->request); + if (next) + preempt = rq_prio(next) > last_prio; + rcu_read_unlock(); + } + + if (preempt) + return preempt; + } + /* * If the inflight context did not trigger the preemption, then maybe * it was the set of queued requests? Pick the highest priority in @@ -375,13 +430,15 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) { struct i915_request *rq, *rn, *active = NULL; struct list_head *uninitialized_var(pl); - int prio = I915_PRIORITY_INVALID | ACTIVE_PRIORITY; + int prio = I915_PRIORITY_INVALID; lockdep_assert_held(&engine->timeline.lock); list_for_each_entry_safe_reverse(rq, rn, &engine->timeline.requests, link) { + struct intel_engine_cs *owner; + if (i915_request_completed(rq)) break; @@ -390,40 +447,29 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) GEM_BUG_ON(rq->hw_context->active); - GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID); - if (rq_prio(rq) != prio) { - prio = rq_prio(rq); - pl = i915_sched_lookup_priolist(engine, prio); - } - GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); - - list_add(&rq->sched.link, pl); - - active = rq; - } + /* + * Push the request back into the queue for later resubmission. + * If this request is not native to this physical engine (i.e. + * it came from a virtual source), push it back onto the virtual + * engine so that it can be moved across onto another physical + * engine as load dictates. + */ + owner = rq->hw_context->engine; + if (likely(owner == engine)) { + GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID); + if (rq_prio(rq) != prio) { + prio = rq_prio(rq); + pl = i915_sched_lookup_priolist(engine, prio); + } + GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); - /* - * The active request is now effectively the start of a new client - * stream, so give it the equivalent small priority bump to prevent - * it being gazumped a second time by another peer. - * - * Note we have to be careful not to apply a priority boost to a request - * still spinning on its semaphores. If the request hasn't started, that - * means it is still waiting for its dependencies to be signaled, and - * if we apply a priority boost to this request, we will boost it past - * its signalers and so break PI. - * - * One consequence of this preemption boost is that we may jump - * over lesser priorities (such as I915_PRIORITY_WAIT), effectively - * making those priorities non-preemptible. They will be moved forward - * in the priority queue, but they will not gain immediate access to - * the GPU. - */ - if (~prio & ACTIVE_PRIORITY && __i915_request_has_started(active)) { - prio |= ACTIVE_PRIORITY; - active->sched.attr.priority = prio; - list_move_tail(&active->sched.link, - i915_sched_lookup_priolist(engine, prio)); + list_add(&rq->sched.link, pl); + active = rq; + } else { + rq->engine = owner; + owner->submit_request(rq); + active = NULL; + } } return active; @@ -475,6 +521,15 @@ execlists_context_schedule_in(struct i915_request *rq) rq->hw_context->active = rq->engine; } +static void kick_siblings(struct i915_request *rq) +{ + struct virtual_engine *ve = to_virtual_engine(rq->hw_context->engine); + struct i915_request *next = READ_ONCE(ve->request); + + if (next && next->execution_mask & ~rq->execution_mask) + tasklet_schedule(&ve->base.execlists.tasklet); +} + static inline void execlists_context_schedule_out(struct i915_request *rq, unsigned long status) { @@ -482,6 +537,18 @@ execlists_context_schedule_out(struct i915_request *rq, unsigned long status) intel_engine_context_out(rq->engine); execlists_context_status_change(rq, status); trace_i915_request_out(rq); + + /* + * If this is part of a virtual engine, its next request may have + * been blocked waiting for access to the active context. We have + * to kick all the siblings again in case we need to switch (e.g. + * the next request is not runnable on this engine). Hopefully, + * we will already have submitted the next request before the + * tasklet runs and do not need to rebuild each virtual tree + * and kick everyone again. + */ + if (rq->engine != rq->hw_context->engine) + kick_siblings(rq); } static u64 execlists_update_context(struct i915_request *rq) @@ -535,7 +602,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) * that all ELSP are drained i.e. we have processed the CSB, * before allowing ourselves to idle and calling intel_runtime_pm_put(). */ - GEM_BUG_ON(!engine->i915->gt.awake); + GEM_BUG_ON(!intel_wakeref_active(&engine->wakeref)); /* * ELSQ note: the submit queue is not cleared after being submitted @@ -659,6 +726,93 @@ static void complete_preempt_context(struct intel_engine_execlists *execlists) execlists)); } +static void virtual_update_register_offsets(u32 *regs, + struct intel_engine_cs *engine) +{ + u32 base = engine->mmio_base; + + /* Must match execlists_init_reg_state()! */ + + regs[CTX_CONTEXT_CONTROL] = + i915_mmio_reg_offset(RING_CONTEXT_CONTROL(base)); + regs[CTX_RING_HEAD] = i915_mmio_reg_offset(RING_HEAD(base)); + regs[CTX_RING_TAIL] = i915_mmio_reg_offset(RING_TAIL(base)); + regs[CTX_RING_BUFFER_START] = i915_mmio_reg_offset(RING_START(base)); + regs[CTX_RING_BUFFER_CONTROL] = i915_mmio_reg_offset(RING_CTL(base)); + + regs[CTX_BB_HEAD_U] = i915_mmio_reg_offset(RING_BBADDR_UDW(base)); + regs[CTX_BB_HEAD_L] = i915_mmio_reg_offset(RING_BBADDR(base)); + regs[CTX_BB_STATE] = i915_mmio_reg_offset(RING_BBSTATE(base)); + regs[CTX_SECOND_BB_HEAD_U] = + i915_mmio_reg_offset(RING_SBBADDR_UDW(base)); + regs[CTX_SECOND_BB_HEAD_L] = i915_mmio_reg_offset(RING_SBBADDR(base)); + regs[CTX_SECOND_BB_STATE] = i915_mmio_reg_offset(RING_SBBSTATE(base)); + + regs[CTX_CTX_TIMESTAMP] = + i915_mmio_reg_offset(RING_CTX_TIMESTAMP(base)); + regs[CTX_PDP3_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 3)); + regs[CTX_PDP3_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 3)); + regs[CTX_PDP2_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 2)); + regs[CTX_PDP2_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 2)); + regs[CTX_PDP1_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 1)); + regs[CTX_PDP1_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 1)); + regs[CTX_PDP0_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 0)); + regs[CTX_PDP0_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 0)); + + if (engine->class == RENDER_CLASS) { + regs[CTX_RCS_INDIRECT_CTX] = + i915_mmio_reg_offset(RING_INDIRECT_CTX(base)); + regs[CTX_RCS_INDIRECT_CTX_OFFSET] = + i915_mmio_reg_offset(RING_INDIRECT_CTX_OFFSET(base)); + regs[CTX_BB_PER_CTX_PTR] = + i915_mmio_reg_offset(RING_BB_PER_CTX_PTR(base)); + + regs[CTX_R_PWR_CLK_STATE] = + i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE); + } +} + +static bool virtual_matches(const struct virtual_engine *ve, + const struct i915_request *rq, + const struct intel_engine_cs *engine) +{ + const struct intel_engine_cs *active; + + if (!(rq->execution_mask & engine->mask)) /* We peeked too soon! */ + return false; + + /* + * We track when the HW has completed saving the context image + * (i.e. when we have seen the final CS event switching out of + * the context) and must not overwrite the context image before + * then. This restricts us to only using the active engine + * while the previous virtualized request is inflight (so + * we reuse the register offsets). This is a very small + * hystersis on the greedy seelction algorithm. + */ + active = READ_ONCE(ve->context.active); + if (active && active != engine) + return false; + + return true; +} + +static void virtual_xfer_breadcrumbs(struct virtual_engine *ve, + struct intel_engine_cs *engine) +{ + struct intel_engine_cs *old = ve->siblings[0]; + + /* All unattached (rq->engine == old) must already be completed */ + + spin_lock(&old->breadcrumbs.irq_lock); + if (!list_empty(&ve->context.signal_link)) { + list_move_tail(&ve->context.signal_link, + &engine->breadcrumbs.signalers); + intel_engine_queue_breadcrumbs(engine); + } + spin_unlock(&old->breadcrumbs.irq_lock); +} + static void execlists_dequeue(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; @@ -691,6 +845,26 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * and context switches) submission. */ + for (rb = rb_first_cached(&execlists->virtual); rb; ) { + struct virtual_engine *ve = + rb_entry(rb, typeof(*ve), nodes[engine->id].rb); + struct i915_request *rq = READ_ONCE(ve->request); + + if (!rq) { /* lazily cleanup after another engine handled rq */ + rb_erase_cached(rb, &execlists->virtual); + RB_CLEAR_NODE(rb); + rb = rb_first_cached(&execlists->virtual); + continue; + } + + if (!virtual_matches(ve, rq, engine)) { + rb = rb_next(rb); + continue; + } + + break; + } + if (last) { /* * Don't resubmit or switch until all outstanding @@ -712,7 +886,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK)) return; - if (need_preempt(engine, last)) { + if (need_preempt(engine, last, rb)) { inject_preempt_context(engine); return; } @@ -752,6 +926,93 @@ static void execlists_dequeue(struct intel_engine_cs *engine) last->tail = last->wa_tail; } + while (rb) { /* XXX virtual is always taking precedence */ + struct virtual_engine *ve = + rb_entry(rb, typeof(*ve), nodes[engine->id].rb); + struct i915_request *rq; + + spin_lock(&ve->base.timeline.lock); + + rq = ve->request; + if (unlikely(!rq)) { /* lost the race to a sibling */ + spin_unlock(&ve->base.timeline.lock); + rb_erase_cached(rb, &execlists->virtual); + RB_CLEAR_NODE(rb); + rb = rb_first_cached(&execlists->virtual); + continue; + } + + GEM_BUG_ON(rq != ve->request); + GEM_BUG_ON(rq->engine != &ve->base); + GEM_BUG_ON(rq->hw_context != &ve->context); + + if (rq_prio(rq) >= queue_prio(execlists)) { + if (!virtual_matches(ve, rq, engine)) { + spin_unlock(&ve->base.timeline.lock); + rb = rb_next(rb); + continue; + } + + if (last && !can_merge_rq(last, rq)) { + spin_unlock(&ve->base.timeline.lock); + return; /* leave this rq for another engine */ + } + + GEM_TRACE("%s: virtual rq=%llx:%lld%s, new engine? %s\n", + engine->name, + rq->fence.context, + rq->fence.seqno, + i915_request_completed(rq) ? "!" : + i915_request_started(rq) ? "*" : + "", + yesno(engine != ve->siblings[0])); + + ve->request = NULL; + ve->base.execlists.queue_priority_hint = INT_MIN; + rb_erase_cached(rb, &execlists->virtual); + RB_CLEAR_NODE(rb); + + GEM_BUG_ON(!(rq->execution_mask & engine->mask)); + rq->engine = engine; + + if (engine != ve->siblings[0]) { + u32 *regs = ve->context.lrc_reg_state; + unsigned int n; + + GEM_BUG_ON(READ_ONCE(ve->context.active)); + virtual_update_register_offsets(regs, engine); + + if (!list_empty(&ve->context.signals)) + virtual_xfer_breadcrumbs(ve, engine); + + /* + * Move the bound engine to the top of the list + * for future execution. We then kick this + * tasklet first before checking others, so that + * we preferentially reuse this set of bound + * registers. + */ + for (n = 1; n < ve->num_siblings; n++) { + if (ve->siblings[n] == engine) { + swap(ve->siblings[n], + ve->siblings[0]); + break; + } + } + + GEM_BUG_ON(ve->siblings[0] != engine); + } + + __i915_request_submit(rq); + trace_i915_request_in(rq, port_index(port, execlists)); + submit = true; + last = rq; + } + + spin_unlock(&ve->base.timeline.lock); + break; + } + while ((rb = rb_first_cached(&execlists->queue))) { struct i915_priolist *p = to_priolist(rb); struct i915_request *rq, *rn; @@ -1085,7 +1346,7 @@ static void execlists_submission_tasklet(unsigned long data) GEM_TRACE("%s awake?=%d, active=%x\n", engine->name, - !!engine->i915->gt.awake, + !!intel_wakeref_active(&engine->wakeref), engine->execlists.active); spin_lock_irqsave(&engine->timeline.lock, flags); @@ -1232,7 +1493,7 @@ __execlists_update_reg_state(struct intel_context *ce, /* RPCS */ if (engine->class == RENDER_CLASS) regs[CTX_R_PWR_CLK_STATE + 1] = - gen8_make_rpcs(engine->i915, &ce->sseu); + intel_sseu_make_rpcs(engine->i915, &ce->sseu); } static int @@ -1316,6 +1577,9 @@ static const struct intel_context_ops execlists_context_ops = { .pin = execlists_context_pin, .unpin = execlists_context_unpin, + .enter = intel_context_enter_engine, + .exit = intel_context_exit_engine, + .reset = execlists_context_reset, .destroy = execlists_context_destroy, }; @@ -1695,8 +1959,8 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine) unsigned int i; int ret; - if (GEM_DEBUG_WARN_ON(engine->id != RCS0)) - return -EINVAL; + if (engine->class != RENDER_CLASS) + return 0; switch (INTEL_GEN(engine->i915)) { case 11: @@ -1787,7 +2051,7 @@ static bool unexpected_starting_state(struct intel_engine_cs *engine) return unexpected; } -static int gen8_init_common_ring(struct intel_engine_cs *engine) +static int execlists_resume(struct intel_engine_cs *engine) { intel_engine_apply_workarounds(engine); intel_engine_apply_whitelist(engine); @@ -1820,7 +2084,7 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine) * completed the reset in i915_gem_reset_finish(). If a request * is completed by one engine, it may then queue a request * to a second via its execlists->tasklet *just* as we are - * calling engine->init_hw() and also writing the ELSP. + * calling engine->resume() and also writing the ELSP. * Turning off the execlists->tasklet until the reset is over * prevents the race. */ @@ -1872,6 +2136,25 @@ static void reset_csb_pointers(struct intel_engine_execlists *execlists) &execlists->csb_status[reset_value]); } +static struct i915_request *active_request(struct i915_request *rq) +{ + const struct list_head * const list = &rq->engine->timeline.requests; + const struct intel_context * const context = rq->hw_context; + struct i915_request *active = NULL; + + list_for_each_entry_from_reverse(rq, list, link) { + if (i915_request_completed(rq)) + break; + + if (rq->hw_context != context) + break; + + active = rq; + } + + return active; +} + static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) { struct intel_engine_execlists * const execlists = &engine->execlists; @@ -1892,7 +2175,8 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) if (!port_isset(execlists->port)) goto out_clear; - ce = port_request(execlists->port)->hw_context; + rq = port_request(execlists->port); + ce = rq->hw_context; /* * Catch up with any missed context-switch interrupts. @@ -1905,16 +2189,10 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) */ execlists_cancel_port_requests(execlists); - /* Push back any incomplete requests for replay after the reset. */ - rq = __unwind_incomplete_requests(engine); + rq = active_request(rq); if (!rq) goto out_replay; - if (rq->hw_context != ce) { /* caught just before a CS event */ - rq = NULL; - goto out_replay; - } - /* * If this request hasn't started yet, e.g. it is waiting on a * semaphore, we need to avoid skipping the request or else we @@ -1961,13 +2239,16 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) } execlists_init_reg_state(regs, ce, engine, ce->ring); - /* Rerun the request; its payload has been neutered (if guilty). */ out_replay: + /* Rerun the request; its payload has been neutered (if guilty). */ ce->ring->head = rq ? intel_ring_wrap(ce->ring, rq->head) : ce->ring->tail; intel_ring_update_space(ce->ring); __execlists_update_reg_state(ce, engine); + /* Push back any incomplete requests for replay after the reset. */ + __unwind_incomplete_requests(engine); + out_clear: execlists_clear_all_active(execlists); } @@ -2041,6 +2322,26 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) i915_priolist_free(p); } + /* Cancel all attached virtual engines */ + while ((rb = rb_first_cached(&execlists->virtual))) { + struct virtual_engine *ve = + rb_entry(rb, typeof(*ve), nodes[engine->id].rb); + + rb_erase_cached(rb, &execlists->virtual); + RB_CLEAR_NODE(rb); + + spin_lock(&ve->base.timeline.lock); + if (ve->request) { + ve->request->engine = engine; + __i915_request_submit(ve->request); + dma_fence_set_error(&ve->request->fence, -EIO); + i915_request_mark_complete(ve->request); + ve->base.execlists.queue_priority_hint = INT_MIN; + ve->request = NULL; + } + spin_unlock(&ve->base.timeline.lock); + } + /* Remaining _unready_ requests will be nop'ed when submitted */ execlists->queue_priority_hint = INT_MIN; @@ -2270,12 +2571,6 @@ static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs) request->timeline->hwsp_offset, 0); - cs = gen8_emit_ggtt_write(cs, - intel_engine_next_hangcheck_seqno(request->engine), - I915_GEM_HWS_HANGCHECK_ADDR, - MI_FLUSH_DW_STORE_INDEX); - - *cs++ = MI_USER_INTERRUPT; *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; @@ -2287,19 +2582,17 @@ static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs) static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) { + /* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */ cs = gen8_emit_ggtt_write_rcs(cs, request->fence.seqno, request->timeline->hwsp_offset, PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | PIPE_CONTROL_DEPTH_CACHE_FLUSH | - PIPE_CONTROL_DC_FLUSH_ENABLE | - PIPE_CONTROL_FLUSH_ENABLE | - PIPE_CONTROL_CS_STALL); - - cs = gen8_emit_ggtt_write_rcs(cs, - intel_engine_next_hangcheck_seqno(request->engine), - I915_GEM_HWS_HANGCHECK_ADDR, - PIPE_CONTROL_STORE_DATA_INDEX); + PIPE_CONTROL_DC_FLUSH_ENABLE); + cs = gen8_emit_pipe_control(cs, + PIPE_CONTROL_FLUSH_ENABLE | + PIPE_CONTROL_CS_STALL, + 0); *cs++ = MI_USER_INTERRUPT; *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; @@ -2329,38 +2622,9 @@ static int gen8_init_rcs_context(struct i915_request *rq) return i915_gem_render_state_emit(rq); } -/** - * intel_logical_ring_cleanup() - deallocate the Engine Command Streamer - * @engine: Engine Command Streamer. - */ -void intel_logical_ring_cleanup(struct intel_engine_cs *engine) +static void execlists_park(struct intel_engine_cs *engine) { - struct drm_i915_private *dev_priv; - - /* - * Tasklet cannot be active at this point due intel_mark_active/idle - * so this is just for documentation. - */ - if (WARN_ON(test_bit(TASKLET_STATE_SCHED, - &engine->execlists.tasklet.state))) - tasklet_kill(&engine->execlists.tasklet); - - dev_priv = engine->i915; - - if (engine->buffer) { - WARN_ON((ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0); - } - - if (engine->cleanup) - engine->cleanup(engine); - - intel_engine_cleanup_common(engine); - - lrc_destroy_wa_ctx(engine); - - engine->i915 = NULL; - dev_priv->engine[engine->id] = NULL; - kfree(engine); + intel_engine_park(engine); } void intel_execlists_set_default_submission(struct intel_engine_cs *engine) @@ -2374,7 +2638,7 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine) engine->reset.reset = execlists_reset; engine->reset.finish = execlists_reset_finish; - engine->park = NULL; + engine->park = execlists_park; engine->unpark = NULL; engine->flags |= I915_ENGINE_SUPPORTS_STATS; @@ -2385,11 +2649,20 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine) engine->flags |= I915_ENGINE_HAS_PREEMPTION; } +static void execlists_destroy(struct intel_engine_cs *engine) +{ + intel_engine_cleanup_common(engine); + lrc_destroy_wa_ctx(engine); + kfree(engine); +} + static void logical_ring_default_vfuncs(struct intel_engine_cs *engine) { /* Default vfuncs which can be overriden by each engine. */ - engine->init_hw = gen8_init_common_ring; + + engine->destroy = execlists_destroy; + engine->resume = execlists_resume; engine->reset.prepare = execlists_reset_prepare; engine->reset.reset = execlists_reset; @@ -2442,15 +2715,8 @@ logical_ring_default_irqs(struct intel_engine_cs *engine) engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift; } -static int -logical_ring_setup(struct intel_engine_cs *engine) +int intel_execlists_submission_setup(struct intel_engine_cs *engine) { - int err; - - err = intel_engine_setup_common(engine); - if (err) - return err; - /* Intentionally left blank. */ engine->buffer = NULL; @@ -2460,10 +2726,16 @@ logical_ring_setup(struct intel_engine_cs *engine) logical_ring_default_vfuncs(engine); logical_ring_default_irqs(engine); + if (engine->class == RENDER_CLASS) { + engine->init_context = gen8_init_rcs_context; + engine->emit_flush = gen8_emit_flush_render; + engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs; + } + return 0; } -static int logical_ring_init(struct intel_engine_cs *engine) +int intel_execlists_submission_init(struct intel_engine_cs *engine) { struct drm_i915_private *i915 = engine->i915; struct intel_engine_execlists * const execlists = &engine->execlists; @@ -2475,6 +2747,15 @@ static int logical_ring_init(struct intel_engine_cs *engine) return ret; intel_engine_init_workarounds(engine); + intel_engine_init_whitelist(engine); + + if (intel_init_workaround_bb(engine)) + /* + * We continue even if we fail to initialize WA batch + * because we only expect rare glitches but nothing + * critical to prevent us from using GPU + */ + DRM_ERROR("WA batch buffer initialization failed\n"); if (HAS_LOGICAL_RING_ELSQ(i915)) { execlists->submit_reg = i915->uncore.regs + @@ -2507,182 +2788,6 @@ static int logical_ring_init(struct intel_engine_cs *engine) return 0; } -int logical_render_ring_init(struct intel_engine_cs *engine) -{ - int ret; - - ret = logical_ring_setup(engine); - if (ret) - return ret; - - /* Override some for render ring. */ - engine->init_context = gen8_init_rcs_context; - engine->emit_flush = gen8_emit_flush_render; - engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs; - - ret = logical_ring_init(engine); - if (ret) - return ret; - - ret = intel_init_workaround_bb(engine); - if (ret) { - /* - * We continue even if we fail to initialize WA batch - * because we only expect rare glitches but nothing - * critical to prevent us from using GPU - */ - DRM_ERROR("WA batch buffer initialization failed: %d\n", - ret); - } - - intel_engine_init_whitelist(engine); - - return 0; -} - -int logical_xcs_ring_init(struct intel_engine_cs *engine) -{ - int err; - - err = logical_ring_setup(engine); - if (err) - return err; - - return logical_ring_init(engine); -} - -u32 gen8_make_rpcs(struct drm_i915_private *i915, struct intel_sseu *req_sseu) -{ - const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu; - bool subslice_pg = sseu->has_subslice_pg; - struct intel_sseu ctx_sseu; - u8 slices, subslices; - u32 rpcs = 0; - - /* - * No explicit RPCS request is needed to ensure full - * slice/subslice/EU enablement prior to Gen9. - */ - if (INTEL_GEN(i915) < 9) - return 0; - - /* - * If i915/perf is active, we want a stable powergating configuration - * on the system. - * - * We could choose full enablement, but on ICL we know there are use - * cases which disable slices for functional, apart for performance - * reasons. So in this case we select a known stable subset. - */ - if (!i915->perf.oa.exclusive_stream) { - ctx_sseu = *req_sseu; - } else { - ctx_sseu = intel_device_default_sseu(i915); - - if (IS_GEN(i915, 11)) { - /* - * We only need subslice count so it doesn't matter - * which ones we select - just turn off low bits in the - * amount of half of all available subslices per slice. - */ - ctx_sseu.subslice_mask = - ~(~0 << (hweight8(ctx_sseu.subslice_mask) / 2)); - ctx_sseu.slice_mask = 0x1; - } - } - - slices = hweight8(ctx_sseu.slice_mask); - subslices = hweight8(ctx_sseu.subslice_mask); - - /* - * Since the SScount bitfield in GEN8_R_PWR_CLK_STATE is only three bits - * wide and Icelake has up to eight subslices, specfial programming is - * needed in order to correctly enable all subslices. - * - * According to documentation software must consider the configuration - * as 2x4x8 and hardware will translate this to 1x8x8. - * - * Furthemore, even though SScount is three bits, maximum documented - * value for it is four. From this some rules/restrictions follow: - * - * 1. - * If enabled subslice count is greater than four, two whole slices must - * be enabled instead. - * - * 2. - * When more than one slice is enabled, hardware ignores the subslice - * count altogether. - * - * From these restrictions it follows that it is not possible to enable - * a count of subslices between the SScount maximum of four restriction, - * and the maximum available number on a particular SKU. Either all - * subslices are enabled, or a count between one and four on the first - * slice. - */ - if (IS_GEN(i915, 11) && - slices == 1 && - subslices > min_t(u8, 4, hweight8(sseu->subslice_mask[0]) / 2)) { - GEM_BUG_ON(subslices & 1); - - subslice_pg = false; - slices *= 2; - } - - /* - * Starting in Gen9, render power gating can leave - * slice/subslice/EU in a partially enabled state. We - * must make an explicit request through RPCS for full - * enablement. - */ - if (sseu->has_slice_pg) { - u32 mask, val = slices; - - if (INTEL_GEN(i915) >= 11) { - mask = GEN11_RPCS_S_CNT_MASK; - val <<= GEN11_RPCS_S_CNT_SHIFT; - } else { - mask = GEN8_RPCS_S_CNT_MASK; - val <<= GEN8_RPCS_S_CNT_SHIFT; - } - - GEM_BUG_ON(val & ~mask); - val &= mask; - - rpcs |= GEN8_RPCS_ENABLE | GEN8_RPCS_S_CNT_ENABLE | val; - } - - if (subslice_pg) { - u32 val = subslices; - - val <<= GEN8_RPCS_SS_CNT_SHIFT; - - GEM_BUG_ON(val & ~GEN8_RPCS_SS_CNT_MASK); - val &= GEN8_RPCS_SS_CNT_MASK; - - rpcs |= GEN8_RPCS_ENABLE | GEN8_RPCS_SS_CNT_ENABLE | val; - } - - if (sseu->has_eu_pg) { - u32 val; - - val = ctx_sseu.min_eus_per_subslice << GEN8_RPCS_EU_MIN_SHIFT; - GEM_BUG_ON(val & ~GEN8_RPCS_EU_MIN_MASK); - val &= GEN8_RPCS_EU_MIN_MASK; - - rpcs |= val; - - val = ctx_sseu.max_eus_per_subslice << GEN8_RPCS_EU_MAX_SHIFT; - GEM_BUG_ON(val & ~GEN8_RPCS_EU_MAX_MASK); - val &= GEN8_RPCS_EU_MAX_MASK; - - rpcs |= val; - - rpcs |= GEN8_RPCS_ENABLE; - } - - return rpcs; -} - static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *engine) { u32 indirect_ctx_offset; @@ -2721,12 +2826,15 @@ static void execlists_init_reg_state(u32 *regs, bool rcs = engine->class == RENDER_CLASS; u32 base = engine->mmio_base; - /* A context is actually a big batch buffer with several + /* + * A context is actually a big batch buffer with several * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The * values we are setting here are only for the first context restore: * on a subsequent save, the GPU will recreate this batchbuffer with new * values (including all the missing MI_LOAD_REGISTER_IMM commands that * we are not initializing here). + * + * Must keep consistent with virtual_update_register_offsets(). */ regs[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(rcs ? 14 : 11) | MI_LRI_FORCE_POSTED; @@ -2945,6 +3053,448 @@ static int execlists_context_deferred_alloc(struct intel_context *ce, return ret; } +static void virtual_context_destroy(struct kref *kref) +{ + struct virtual_engine *ve = + container_of(kref, typeof(*ve), context.ref); + unsigned int n; + + GEM_BUG_ON(ve->request); + GEM_BUG_ON(ve->context.active); + + for (n = 0; n < ve->num_siblings; n++) { + struct intel_engine_cs *sibling = ve->siblings[n]; + struct rb_node *node = &ve->nodes[sibling->id].rb; + + if (RB_EMPTY_NODE(node)) + continue; + + spin_lock_irq(&sibling->timeline.lock); + + /* Detachment is lazily performed in the execlists tasklet */ + if (!RB_EMPTY_NODE(node)) + rb_erase_cached(node, &sibling->execlists.virtual); + + spin_unlock_irq(&sibling->timeline.lock); + } + GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet)); + + if (ve->context.state) + __execlists_context_fini(&ve->context); + + kfree(ve->bonds); + + i915_timeline_fini(&ve->base.timeline); + kfree(ve); +} + +static void virtual_engine_initial_hint(struct virtual_engine *ve) +{ + int swp; + + /* + * Pick a random sibling on starting to help spread the load around. + * + * New contexts are typically created with exactly the same order + * of siblings, and often started in batches. Due to the way we iterate + * the array of sibling when submitting requests, sibling[0] is + * prioritised for dequeuing. If we make sure that sibling[0] is fairly + * randomised across the system, we also help spread the load by the + * first engine we inspect being different each time. + * + * NB This does not force us to execute on this engine, it will just + * typically be the first we inspect for submission. + */ + swp = prandom_u32_max(ve->num_siblings); + if (!swp) + return; + + swap(ve->siblings[swp], ve->siblings[0]); + virtual_update_register_offsets(ve->context.lrc_reg_state, + ve->siblings[0]); +} + +static int virtual_context_pin(struct intel_context *ce) +{ + struct virtual_engine *ve = container_of(ce, typeof(*ve), context); + int err; + + /* Note: we must use a real engine class for setting up reg state */ + err = __execlists_context_pin(ce, ve->siblings[0]); + if (err) + return err; + + virtual_engine_initial_hint(ve); + return 0; +} + +static void virtual_context_enter(struct intel_context *ce) +{ + struct virtual_engine *ve = container_of(ce, typeof(*ve), context); + unsigned int n; + + for (n = 0; n < ve->num_siblings; n++) + intel_engine_pm_get(ve->siblings[n]); +} + +static void virtual_context_exit(struct intel_context *ce) +{ + struct virtual_engine *ve = container_of(ce, typeof(*ve), context); + unsigned int n; + + ce->saturated = 0; + for (n = 0; n < ve->num_siblings; n++) + intel_engine_pm_put(ve->siblings[n]); +} + +static const struct intel_context_ops virtual_context_ops = { + .pin = virtual_context_pin, + .unpin = execlists_context_unpin, + + .enter = virtual_context_enter, + .exit = virtual_context_exit, + + .destroy = virtual_context_destroy, +}; + +static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve) +{ + struct i915_request *rq; + intel_engine_mask_t mask; + + rq = READ_ONCE(ve->request); + if (!rq) + return 0; + + /* The rq is ready for submission; rq->execution_mask is now stable. */ + mask = rq->execution_mask; + if (unlikely(!mask)) { + /* Invalid selection, submit to a random engine in error */ + i915_request_skip(rq, -ENODEV); + mask = ve->siblings[0]->mask; + } + + GEM_TRACE("%s: rq=%llx:%lld, mask=%x, prio=%d\n", + ve->base.name, + rq->fence.context, rq->fence.seqno, + mask, ve->base.execlists.queue_priority_hint); + + return mask; +} + +static void virtual_submission_tasklet(unsigned long data) +{ + struct virtual_engine * const ve = (struct virtual_engine *)data; + const int prio = ve->base.execlists.queue_priority_hint; + intel_engine_mask_t mask; + unsigned int n; + + rcu_read_lock(); + mask = virtual_submission_mask(ve); + rcu_read_unlock(); + if (unlikely(!mask)) + return; + + local_irq_disable(); + for (n = 0; READ_ONCE(ve->request) && n < ve->num_siblings; n++) { + struct intel_engine_cs *sibling = ve->siblings[n]; + struct ve_node * const node = &ve->nodes[sibling->id]; + struct rb_node **parent, *rb; + bool first; + + if (unlikely(!(mask & sibling->mask))) { + if (!RB_EMPTY_NODE(&node->rb)) { + spin_lock(&sibling->timeline.lock); + rb_erase_cached(&node->rb, + &sibling->execlists.virtual); + RB_CLEAR_NODE(&node->rb); + spin_unlock(&sibling->timeline.lock); + } + continue; + } + + spin_lock(&sibling->timeline.lock); + + if (!RB_EMPTY_NODE(&node->rb)) { + /* + * Cheat and avoid rebalancing the tree if we can + * reuse this node in situ. + */ + first = rb_first_cached(&sibling->execlists.virtual) == + &node->rb; + if (prio == node->prio || (prio > node->prio && first)) + goto submit_engine; + + rb_erase_cached(&node->rb, &sibling->execlists.virtual); + } + + rb = NULL; + first = true; + parent = &sibling->execlists.virtual.rb_root.rb_node; + while (*parent) { + struct ve_node *other; + + rb = *parent; + other = rb_entry(rb, typeof(*other), rb); + if (prio > other->prio) { + parent = &rb->rb_left; + } else { + parent = &rb->rb_right; + first = false; + } + } + + rb_link_node(&node->rb, rb, parent); + rb_insert_color_cached(&node->rb, + &sibling->execlists.virtual, + first); + +submit_engine: + GEM_BUG_ON(RB_EMPTY_NODE(&node->rb)); + node->prio = prio; + if (first && prio > sibling->execlists.queue_priority_hint) { + sibling->execlists.queue_priority_hint = prio; + tasklet_hi_schedule(&sibling->execlists.tasklet); + } + + spin_unlock(&sibling->timeline.lock); + } + local_irq_enable(); +} + +static void virtual_submit_request(struct i915_request *rq) +{ + struct virtual_engine *ve = to_virtual_engine(rq->engine); + + GEM_TRACE("%s: rq=%llx:%lld\n", + ve->base.name, + rq->fence.context, + rq->fence.seqno); + + GEM_BUG_ON(ve->base.submit_request != virtual_submit_request); + + GEM_BUG_ON(ve->request); + ve->base.execlists.queue_priority_hint = rq_prio(rq); + WRITE_ONCE(ve->request, rq); + + tasklet_schedule(&ve->base.execlists.tasklet); +} + +static struct ve_bond * +virtual_find_bond(struct virtual_engine *ve, + const struct intel_engine_cs *master) +{ + int i; + + for (i = 0; i < ve->num_bonds; i++) { + if (ve->bonds[i].master == master) + return &ve->bonds[i]; + } + + return NULL; +} + +static void +virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal) +{ + struct virtual_engine *ve = to_virtual_engine(rq->engine); + struct ve_bond *bond; + + bond = virtual_find_bond(ve, to_request(signal)->engine); + if (bond) { + intel_engine_mask_t old, new, cmp; + + cmp = READ_ONCE(rq->execution_mask); + do { + old = cmp; + new = cmp & bond->sibling_mask; + } while ((cmp = cmpxchg(&rq->execution_mask, old, new)) != old); + } +} + +struct intel_context * +intel_execlists_create_virtual(struct i915_gem_context *ctx, + struct intel_engine_cs **siblings, + unsigned int count) +{ + struct virtual_engine *ve; + unsigned int n; + int err; + + if (count == 0) + return ERR_PTR(-EINVAL); + + if (count == 1) + return intel_context_create(ctx, siblings[0]); + + ve = kzalloc(struct_size(ve, siblings, count), GFP_KERNEL); + if (!ve) + return ERR_PTR(-ENOMEM); + + ve->base.i915 = ctx->i915; + ve->base.id = -1; + ve->base.class = OTHER_CLASS; + ve->base.uabi_class = I915_ENGINE_CLASS_INVALID; + ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL; + ve->base.flags = I915_ENGINE_IS_VIRTUAL; + + snprintf(ve->base.name, sizeof(ve->base.name), "virtual"); + + err = i915_timeline_init(ctx->i915, &ve->base.timeline, NULL); + if (err) + goto err_put; + i915_timeline_set_subclass(&ve->base.timeline, TIMELINE_VIRTUAL); + + intel_engine_init_execlists(&ve->base); + + ve->base.cops = &virtual_context_ops; + ve->base.request_alloc = execlists_request_alloc; + + ve->base.schedule = i915_schedule; + ve->base.submit_request = virtual_submit_request; + ve->base.bond_execute = virtual_bond_execute; + + ve->base.execlists.queue_priority_hint = INT_MIN; + tasklet_init(&ve->base.execlists.tasklet, + virtual_submission_tasklet, + (unsigned long)ve); + + intel_context_init(&ve->context, ctx, &ve->base); + + for (n = 0; n < count; n++) { + struct intel_engine_cs *sibling = siblings[n]; + + GEM_BUG_ON(!is_power_of_2(sibling->mask)); + if (sibling->mask & ve->base.mask) { + DRM_DEBUG("duplicate %s entry in load balancer\n", + sibling->name); + err = -EINVAL; + goto err_put; + } + + /* + * The virtual engine implementation is tightly coupled to + * the execlists backend -- we push out request directly + * into a tree inside each physical engine. We could support + * layering if we handle cloning of the requests and + * submitting a copy into each backend. + */ + if (sibling->execlists.tasklet.func != + execlists_submission_tasklet) { + err = -ENODEV; + goto err_put; + } + + GEM_BUG_ON(RB_EMPTY_NODE(&ve->nodes[sibling->id].rb)); + RB_CLEAR_NODE(&ve->nodes[sibling->id].rb); + + ve->siblings[ve->num_siblings++] = sibling; + ve->base.mask |= sibling->mask; + + /* + * All physical engines must be compatible for their emission + * functions (as we build the instructions during request + * construction and do not alter them before submission + * on the physical engine). We use the engine class as a guide + * here, although that could be refined. + */ + if (ve->base.class != OTHER_CLASS) { + if (ve->base.class != sibling->class) { + DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n", + sibling->class, ve->base.class); + err = -EINVAL; + goto err_put; + } + continue; + } + + ve->base.class = sibling->class; + ve->base.uabi_class = sibling->uabi_class; + snprintf(ve->base.name, sizeof(ve->base.name), + "v%dx%d", ve->base.class, count); + ve->base.context_size = sibling->context_size; + + ve->base.emit_bb_start = sibling->emit_bb_start; + ve->base.emit_flush = sibling->emit_flush; + ve->base.emit_init_breadcrumb = sibling->emit_init_breadcrumb; + ve->base.emit_fini_breadcrumb = sibling->emit_fini_breadcrumb; + ve->base.emit_fini_breadcrumb_dw = + sibling->emit_fini_breadcrumb_dw; + } + + return &ve->context; + +err_put: + intel_context_put(&ve->context); + return ERR_PTR(err); +} + +struct intel_context * +intel_execlists_clone_virtual(struct i915_gem_context *ctx, + struct intel_engine_cs *src) +{ + struct virtual_engine *se = to_virtual_engine(src); + struct intel_context *dst; + + dst = intel_execlists_create_virtual(ctx, + se->siblings, + se->num_siblings); + if (IS_ERR(dst)) + return dst; + + if (se->num_bonds) { + struct virtual_engine *de = to_virtual_engine(dst->engine); + + de->bonds = kmemdup(se->bonds, + sizeof(*se->bonds) * se->num_bonds, + GFP_KERNEL); + if (!de->bonds) { + intel_context_put(dst); + return ERR_PTR(-ENOMEM); + } + + de->num_bonds = se->num_bonds; + } + + return dst; +} + +int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine, + const struct intel_engine_cs *master, + const struct intel_engine_cs *sibling) +{ + struct virtual_engine *ve = to_virtual_engine(engine); + struct ve_bond *bond; + int n; + + /* Sanity check the sibling is part of the virtual engine */ + for (n = 0; n < ve->num_siblings; n++) + if (sibling == ve->siblings[n]) + break; + if (n == ve->num_siblings) + return -EINVAL; + + bond = virtual_find_bond(ve, master); + if (bond) { + bond->sibling_mask |= sibling->mask; + return 0; + } + + bond = krealloc(ve->bonds, + sizeof(*bond) * (ve->num_bonds + 1), + GFP_KERNEL); + if (!bond) + return -ENOMEM; + + bond[ve->num_bonds].master = master; + bond[ve->num_bonds].sibling_mask = sibling->mask; + + ve->bonds = bond; + ve->num_bonds++; + + return 0; +} + void intel_execlists_show_requests(struct intel_engine_cs *engine, struct drm_printer *m, void (*show_request)(struct drm_printer *m, @@ -3002,6 +3552,29 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine, show_request(m, last, "\t\tQ "); } + last = NULL; + count = 0; + for (rb = rb_first_cached(&execlists->virtual); rb; rb = rb_next(rb)) { + struct virtual_engine *ve = + rb_entry(rb, typeof(*ve), nodes[engine->id].rb); + struct i915_request *rq = READ_ONCE(ve->request); + + if (rq) { + if (count++ < max - 1) + show_request(m, rq, "\t\tV "); + else + last = rq; + } + } + if (last) { + if (count > max) { + drm_printf(m, + "\t\t...skipping %d virtual requests...\n", + count - max); + } + show_request(m, last, "\t\tV "); + } + spin_unlock_irqrestore(&engine->timeline.lock, flags); } @@ -3037,5 +3610,5 @@ void intel_lr_context_reset(struct intel_engine_cs *engine, } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) -#include "selftests/intel_lrc.c" +#include "selftest_lrc.c" #endif diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h similarity index 86% rename from drivers/gpu/drm/i915/intel_lrc.h rename to drivers/gpu/drm/i915/gt/intel_lrc.h index 84aa230ea27be10acdee4ccf35dabf21a31fc3c2..e029aee87adf3c2dc99d321792b29060b66b15dc 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h @@ -24,8 +24,7 @@ #ifndef _INTEL_LRC_H_ #define _INTEL_LRC_H_ -#include "intel_ringbuffer.h" -#include "i915_gem_context.h" +#include "intel_engine.h" /* Execlists regs */ #define RING_ELSP(base) _MMIO((base) + 0x230) @@ -67,8 +66,9 @@ enum { /* Logical Rings */ void intel_logical_ring_cleanup(struct intel_engine_cs *engine); -int logical_render_ring_init(struct intel_engine_cs *engine); -int logical_xcs_ring_init(struct intel_engine_cs *engine); + +int intel_execlists_submission_setup(struct intel_engine_cs *engine); +int intel_execlists_submission_init(struct intel_engine_cs *engine); /* Logical Ring Contexts */ @@ -99,7 +99,6 @@ int logical_xcs_ring_init(struct intel_engine_cs *engine); struct drm_printer; struct drm_i915_private; -struct i915_gem_context; void intel_execlists_set_default_submission(struct intel_engine_cs *engine); @@ -115,6 +114,17 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine, const char *prefix), unsigned int max); -u32 gen8_make_rpcs(struct drm_i915_private *i915, struct intel_sseu *ctx_sseu); +struct intel_context * +intel_execlists_create_virtual(struct i915_gem_context *ctx, + struct intel_engine_cs **siblings, + unsigned int count); + +struct intel_context * +intel_execlists_clone_virtual(struct i915_gem_context *ctx, + struct intel_engine_cs *src); + +int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine, + const struct intel_engine_cs *master, + const struct intel_engine_cs *sibling); #endif /* _INTEL_LRC_H_ */ diff --git a/drivers/gpu/drm/i915/intel_lrc_reg.h b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h similarity index 100% rename from drivers/gpu/drm/i915/intel_lrc_reg.h rename to drivers/gpu/drm/i915/gt/intel_lrc_reg.h diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c similarity index 99% rename from drivers/gpu/drm/i915/intel_mocs.c rename to drivers/gpu/drm/i915/gt/intel_mocs.c index 274ba78500c06156373dfc6fa89681a72a7eca29..79df66022d3ab0e263090028b56277510f9baf2f 100644 --- a/drivers/gpu/drm/i915/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -20,9 +20,11 @@ * SOFTWARE. */ +#include "i915_drv.h" + +#include "intel_engine.h" #include "intel_mocs.h" #include "intel_lrc.h" -#include "intel_ringbuffer.h" /* structures required */ struct drm_i915_mocs_entry { diff --git a/drivers/gpu/drm/i915/intel_mocs.h b/drivers/gpu/drm/i915/gt/intel_mocs.h similarity index 97% rename from drivers/gpu/drm/i915/intel_mocs.h rename to drivers/gpu/drm/i915/gt/intel_mocs.h index 3d99d1271b2bc515a0468fc9445a3e98c8f7f919..0913704a1af2ec4b8fc868037686ef83100addf2 100644 --- a/drivers/gpu/drm/i915/intel_mocs.h +++ b/drivers/gpu/drm/i915/gt/intel_mocs.h @@ -49,7 +49,9 @@ * context handling keep the MOCS in step. */ -#include "i915_drv.h" +struct drm_i915_private; +struct i915_request; +struct intel_engine_cs; int intel_rcs_context_init_mocs(struct i915_request *rq); void intel_mocs_init_l3cc_table(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c similarity index 94% rename from drivers/gpu/drm/i915/i915_reset.c rename to drivers/gpu/drm/i915/gt/intel_reset.c index 677d59304e7828452076df13492748a7091b7cc5..8c60f7550f9cb480b966f4b316c4d2c2970039cc 100644 --- a/drivers/gpu/drm/i915/i915_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -9,9 +9,13 @@ #include "i915_drv.h" #include "i915_gpu_error.h" -#include "i915_reset.h" +#include "i915_irq.h" +#include "intel_engine_pm.h" +#include "intel_gt_pm.h" +#include "intel_reset.h" #include "intel_guc.h" +#include "intel_overlay.h" #define RESET_MAX_RETRIES 3 @@ -641,9 +645,6 @@ int intel_gpu_reset(struct drm_i915_private *i915, bool intel_has_gpu_reset(struct drm_i915_private *i915) { - if (USES_GUC(i915)) - return false; - if (!i915_modparams.reset) return NULL; @@ -683,6 +684,7 @@ static void reset_prepare_engine(struct intel_engine_cs *engine) * written to the powercontext is undefined and so we may lose * GPU state upon resume, i.e. fail to restart after a reset. */ + intel_engine_pm_get(engine); intel_uncore_forcewake_get(engine->uncore, FORCEWAKE_ALL); engine->reset.prepare(engine); } @@ -718,6 +720,7 @@ static void reset_prepare(struct drm_i915_private *i915) struct intel_engine_cs *engine; enum intel_engine_id id; + intel_gt_pm_get(i915); for_each_engine(engine, i915, id) reset_prepare_engine(engine); @@ -755,48 +758,10 @@ static int gt_reset(struct drm_i915_private *i915, static void reset_finish_engine(struct intel_engine_cs *engine) { engine->reset.finish(engine); + intel_engine_pm_put(engine); intel_uncore_forcewake_put(engine->uncore, FORCEWAKE_ALL); } -struct i915_gpu_restart { - struct work_struct work; - struct drm_i915_private *i915; -}; - -static void restart_work(struct work_struct *work) -{ - struct i915_gpu_restart *arg = container_of(work, typeof(*arg), work); - struct drm_i915_private *i915 = arg->i915; - struct intel_engine_cs *engine; - enum intel_engine_id id; - intel_wakeref_t wakeref; - - wakeref = intel_runtime_pm_get(i915); - mutex_lock(&i915->drm.struct_mutex); - WRITE_ONCE(i915->gpu_error.restart, NULL); - - for_each_engine(engine, i915, id) { - struct i915_request *rq; - - /* - * Ostensibily, we always want a context loaded for powersaving, - * so if the engine is idle after the reset, send a request - * to load our scratch kernel_context. - */ - if (!intel_engine_is_idle(engine)) - continue; - - rq = i915_request_alloc(engine, i915->kernel_context); - if (!IS_ERR(rq)) - i915_request_add(rq); - } - - mutex_unlock(&i915->drm.struct_mutex); - intel_runtime_pm_put(i915, wakeref); - - kfree(arg); -} - static void reset_finish(struct drm_i915_private *i915) { struct intel_engine_cs *engine; @@ -806,29 +771,7 @@ static void reset_finish(struct drm_i915_private *i915) reset_finish_engine(engine); intel_engine_signal_breadcrumbs(engine); } -} - -static void reset_restart(struct drm_i915_private *i915) -{ - struct i915_gpu_restart *arg; - - /* - * Following the reset, ensure that we always reload context for - * powersaving, and to correct engine->last_retired_context. Since - * this requires us to submit a request, queue a worker to do that - * task for us to evade any locking here. - */ - if (READ_ONCE(i915->gpu_error.restart)) - return; - - arg = kmalloc(sizeof(*arg), GFP_KERNEL); - if (arg) { - arg->i915 = i915; - INIT_WORK(&arg->work, restart_work); - - WRITE_ONCE(i915->gpu_error.restart, arg); - queue_work(i915->wq, &arg->work); - } + intel_gt_pm_put(i915); } static void nop_submit_request(struct i915_request *request) @@ -889,6 +832,7 @@ static void __i915_gem_set_wedged(struct drm_i915_private *i915) * in nop_submit_request. */ synchronize_rcu_expedited(); + set_bit(I915_WEDGED, &error->flags); /* Mark all executing requests as skipped */ for_each_engine(engine, i915, id) @@ -896,9 +840,6 @@ static void __i915_gem_set_wedged(struct drm_i915_private *i915) reset_finish(i915); - smp_mb__before_atomic(); - set_bit(I915_WEDGED, &error->flags); - GEM_TRACE("end\n"); } @@ -956,7 +897,7 @@ static bool __i915_gem_unset_wedged(struct drm_i915_private *i915) } mutex_unlock(&i915->gt.timelines.mutex); - intel_engines_sanitize(i915, false); + intel_gt_sanitize(i915, false); /* * Undo nop_submit_request. We prevent all new i915 requests from @@ -1034,7 +975,6 @@ void i915_reset(struct drm_i915_private *i915, GEM_TRACE("flags=%lx\n", error->flags); might_sleep(); - assert_rpm_wakelock_held(i915); GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, &error->flags)); /* Clear any previous failed attempts at recovery. Time to try again. */ @@ -1087,8 +1027,6 @@ void i915_reset(struct drm_i915_private *i915, finish: reset_finish(i915); - if (!__i915_wedged(error)) - reset_restart(i915); return; taint: @@ -1104,7 +1042,7 @@ void i915_reset(struct drm_i915_private *i915, * rather than continue on into oblivion. For everyone else, * the system should still plod along, but they have been warned! */ - add_taint(TAINT_WARN, LOCKDEP_STILL_OK); + add_taint_for_CI(TAINT_WARN); error: __i915_gem_set_wedged(i915); goto finish; @@ -1137,6 +1075,9 @@ int i915_reset_engine(struct intel_engine_cs *engine, const char *msg) GEM_TRACE("%s flags=%lx\n", engine->name, error->flags); GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, &error->flags)); + if (!intel_wakeref_active(&engine->wakeref)) + return 0; + reset_prepare_engine(engine); if (msg) @@ -1168,7 +1109,7 @@ int i915_reset_engine(struct intel_engine_cs *engine, const char *msg) * have been reset to their default values. Follow the init_ring * process to program RING_MODE, HWSP and re-enable submission. */ - ret = engine->init_hw(engine); + ret = engine->resume(engine); if (ret) goto out; @@ -1425,25 +1366,6 @@ int i915_terminally_wedged(struct drm_i915_private *i915) return __i915_wedged(error) ? -EIO : 0; } -bool i915_reset_flush(struct drm_i915_private *i915) -{ - int err; - - cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work); - - flush_workqueue(i915->wq); - GEM_BUG_ON(READ_ONCE(i915->gpu_error.restart)); - - mutex_lock(&i915->drm.struct_mutex); - err = i915_gem_wait_for_idle(i915, - I915_WAIT_LOCKED | - I915_WAIT_FOR_IDLE_BOOST, - MAX_SCHEDULE_TIMEOUT); - mutex_unlock(&i915->drm.struct_mutex); - - return !err; -} - static void i915_wedge_me(struct work_struct *work) { struct i915_wedge_me *w = container_of(work, typeof(*w), work.work); @@ -1472,3 +1394,7 @@ void __i915_fini_wedge(struct i915_wedge_me *w) destroy_delayed_work_on_stack(&w->work); w->i915 = NULL; } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftest_reset.c" +#endif diff --git a/drivers/gpu/drm/i915/i915_reset.h b/drivers/gpu/drm/i915/gt/intel_reset.h similarity index 95% rename from drivers/gpu/drm/i915/i915_reset.h rename to drivers/gpu/drm/i915/gt/intel_reset.h index 3c0450289b8ff36bcb46c947f4006e1097eac9e7..b52efaab4941c43ecde00911b900802a684b6b33 100644 --- a/drivers/gpu/drm/i915/i915_reset.h +++ b/drivers/gpu/drm/i915/gt/intel_reset.h @@ -11,7 +11,7 @@ #include #include -#include "intel_engine_types.h" +#include "gt/intel_engine_types.h" struct drm_i915_private; struct i915_request; @@ -34,7 +34,6 @@ int i915_reset_engine(struct intel_engine_cs *engine, const char *reason); void i915_reset_request(struct i915_request *rq, bool guilty); -bool i915_reset_flush(struct drm_i915_private *i915); int __must_check i915_reset_trylock(struct drm_i915_private *i915); void i915_reset_unlock(struct drm_i915_private *i915, int tag); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c similarity index 94% rename from drivers/gpu/drm/i915/intel_ringbuffer.c rename to drivers/gpu/drm/i915/gt/intel_ringbuffer.c index 029fd8ec185724c03983c2adf956bfe9047d1919..f0d60affdba37288db4fd62dca984072c7f4182b 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c @@ -33,9 +33,8 @@ #include "i915_drv.h" #include "i915_gem_render_state.h" -#include "i915_reset.h" #include "i915_trace.h" -#include "intel_drv.h" +#include "intel_reset.h" #include "intel_workarounds.h" /* Rough estimate of the typical request size, performing a flush, @@ -310,11 +309,6 @@ static u32 *gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) *cs++ = rq->timeline->hwsp_offset | PIPE_CONTROL_GLOBAL_GTT; *cs++ = rq->fence.seqno; - *cs++ = GFX_OP_PIPE_CONTROL(4); - *cs++ = PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_STORE_DATA_INDEX; - *cs++ = I915_GEM_HWS_HANGCHECK_ADDR | PIPE_CONTROL_GLOBAL_GTT; - *cs++ = intel_engine_next_hangcheck_seqno(rq->engine); - *cs++ = MI_USER_INTERRUPT; *cs++ = MI_NOOP; @@ -416,13 +410,6 @@ static u32 *gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) *cs++ = rq->timeline->hwsp_offset; *cs++ = rq->fence.seqno; - *cs++ = GFX_OP_PIPE_CONTROL(4); - *cs++ = (PIPE_CONTROL_QW_WRITE | - PIPE_CONTROL_STORE_DATA_INDEX | - PIPE_CONTROL_GLOBAL_GTT_IVB); - *cs++ = I915_GEM_HWS_HANGCHECK_ADDR; - *cs++ = intel_engine_next_hangcheck_seqno(rq->engine); - *cs++ = MI_USER_INTERRUPT; *cs++ = MI_NOOP; @@ -441,12 +428,7 @@ static u32 *gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) *cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT; *cs++ = rq->fence.seqno; - *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX; - *cs++ = I915_GEM_HWS_HANGCHECK_ADDR | MI_FLUSH_DW_USE_GTT; - *cs++ = intel_engine_next_hangcheck_seqno(rq->engine); - *cs++ = MI_USER_INTERRUPT; - *cs++ = MI_NOOP; rq->tail = intel_ring_offset(rq, cs); assert_ring_tail_valid(rq->ring, rq->tail); @@ -466,10 +448,6 @@ static u32 *gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) *cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT; *cs++ = rq->fence.seqno; - *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX; - *cs++ = I915_GEM_HWS_HANGCHECK_ADDR | MI_FLUSH_DW_USE_GTT; - *cs++ = intel_engine_next_hangcheck_seqno(rq->engine); - for (i = 0; i < GEN7_XCS_WA; i++) { *cs++ = MI_STORE_DWORD_INDEX; *cs++ = I915_GEM_HWS_SEQNO_ADDR; @@ -481,6 +459,7 @@ static u32 *gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs) *cs++ = 0; *cs++ = MI_USER_INTERRUPT; + *cs++ = MI_NOOP; rq->tail = intel_ring_offset(rq, cs); assert_ring_tail_valid(rq->ring, rq->tail); @@ -638,12 +617,15 @@ static bool stop_ring(struct intel_engine_cs *engine) return (ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR) == 0; } -static int init_ring_common(struct intel_engine_cs *engine) +static int xcs_resume(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; struct intel_ring *ring = engine->buffer; int ret = 0; + GEM_TRACE("%s: ring:{HEAD:%04x, TAIL:%04x}\n", + engine->name, ring->head, ring->tail); + intel_uncore_forcewake_get(engine->uncore, FORCEWAKE_ALL); if (!stop_ring(engine)) { @@ -828,12 +810,23 @@ static int intel_rcs_ctx_init(struct i915_request *rq) return 0; } -static int init_render_ring(struct intel_engine_cs *engine) +static int rcs_resume(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; - int ret = init_ring_common(engine); - if (ret) - return ret; + + /* + * Disable CONSTANT_BUFFER before it is loaded from the context + * image. For as it is loaded, it is executed and the stored + * address may no longer be valid, leading to a GPU hang. + * + * This imposes the requirement that userspace reload their + * CONSTANT_BUFFER on every batch, fortunately a requirement + * they are already accustomed to from before contexts were + * enabled. + */ + if (IS_GEN(dev_priv, 4)) + I915_WRITE(ECOSKPD, + _MASKED_BIT_ENABLE(ECO_CONSTANT_BUFFER_SR_DISABLE)); /* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */ if (IS_GEN_RANGE(dev_priv, 4, 6)) @@ -873,10 +866,7 @@ static int init_render_ring(struct intel_engine_cs *engine) if (IS_GEN_RANGE(dev_priv, 6, 7)) I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING)); - if (INTEL_GEN(dev_priv) >= 6) - ENGINE_WRITE(engine, RING_IMR, ~engine->irq_keep_mask); - - return 0; + return xcs_resume(engine); } static void cancel_requests(struct intel_engine_cs *engine) @@ -918,11 +908,8 @@ static u32 *i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs) *cs++ = I915_GEM_HWS_SEQNO_ADDR; *cs++ = rq->fence.seqno; - *cs++ = MI_STORE_DWORD_INDEX; - *cs++ = I915_GEM_HWS_HANGCHECK_ADDR; - *cs++ = intel_engine_next_hangcheck_seqno(rq->engine); - *cs++ = MI_USER_INTERRUPT; + *cs++ = MI_NOOP; rq->tail = intel_ring_offset(rq, cs); assert_ring_tail_valid(rq->ring, rq->tail); @@ -940,10 +927,6 @@ static u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs) *cs++ = MI_FLUSH; - *cs++ = MI_STORE_DWORD_INDEX; - *cs++ = I915_GEM_HWS_HANGCHECK_ADDR; - *cs++ = intel_engine_next_hangcheck_seqno(rq->engine); - BUILD_BUG_ON(GEN5_WA_STORES < 1); for (i = 0; i < GEN5_WA_STORES; i++) { *cs++ = MI_STORE_DWORD_INDEX; @@ -952,7 +935,6 @@ static u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs) } *cs++ = MI_USER_INTERRUPT; - *cs++ = MI_NOOP; rq->tail = intel_ring_offset(rq, cs); assert_ring_tail_valid(rq->ring, rq->tail); @@ -1517,77 +1499,13 @@ static const struct intel_context_ops ring_context_ops = { .pin = ring_context_pin, .unpin = ring_context_unpin, + .enter = intel_context_enter_engine, + .exit = intel_context_exit_engine, + .reset = ring_context_reset, .destroy = ring_context_destroy, }; -static int intel_init_ring_buffer(struct intel_engine_cs *engine) -{ - struct i915_timeline *timeline; - struct intel_ring *ring; - int err; - - err = intel_engine_setup_common(engine); - if (err) - return err; - - timeline = i915_timeline_create(engine->i915, engine->status_page.vma); - if (IS_ERR(timeline)) { - err = PTR_ERR(timeline); - goto err; - } - GEM_BUG_ON(timeline->has_initial_breadcrumb); - - ring = intel_engine_create_ring(engine, timeline, 32 * PAGE_SIZE); - i915_timeline_put(timeline); - if (IS_ERR(ring)) { - err = PTR_ERR(ring); - goto err; - } - - err = intel_ring_pin(ring); - if (err) - goto err_ring; - - GEM_BUG_ON(engine->buffer); - engine->buffer = ring; - - err = intel_engine_init_common(engine); - if (err) - goto err_unpin; - - GEM_BUG_ON(ring->timeline->hwsp_ggtt != engine->status_page.vma); - - return 0; - -err_unpin: - intel_ring_unpin(ring); -err_ring: - intel_ring_put(ring); -err: - intel_engine_cleanup_common(engine); - return err; -} - -void intel_engine_cleanup(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - - WARN_ON(INTEL_GEN(dev_priv) > 2 && - (ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0); - - intel_ring_unpin(engine->buffer); - intel_ring_put(engine->buffer); - - if (engine->cleanup) - engine->cleanup(engine); - - intel_engine_cleanup_common(engine); - - dev_priv->engine[engine->id] = NULL; - kfree(engine); -} - static int load_pd_dir(struct i915_request *rq, const struct i915_hw_ppgtt *ppgtt) { @@ -1646,11 +1564,14 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags) /* These flags are for resource streamer on HSW+ */ flags |= HSW_MI_RS_SAVE_STATE_EN | HSW_MI_RS_RESTORE_STATE_EN; else + /* We need to save the extended state for powersaving modes */ flags |= MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN; len = 4; if (IS_GEN(i915, 7)) len += 2 + (num_engines ? 4 * num_engines + 6 : 0); + else if (IS_GEN(i915, 5)) + len += 2; if (flags & MI_FORCE_RESTORE) { GEM_BUG_ON(flags & MI_RESTORE_INHIBIT); flags &= ~MI_FORCE_RESTORE; @@ -1679,6 +1600,14 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags) GEN6_PSMI_SLEEP_MSG_DISABLE); } } + } else if (IS_GEN(i915, 5)) { + /* + * This w/a is only listed for pre-production ilk a/b steppings, + * but is also mentioned for programming the powerctx. To be + * safe, just apply the workaround; we do not use SyncFlush so + * this should never take effect and so be a no-op! + */ + *cs++ = MI_SUSPEND_FLUSH | MI_SUSPEND_FLUSH_EN; } if (force_restore) { @@ -1732,6 +1661,8 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags) *cs++ = MI_NOOP; } *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; + } else if (IS_GEN(i915, 5)) { + *cs++ = MI_SUSPEND_FLUSH; } intel_ring_advance(rq, cs); @@ -1776,7 +1707,6 @@ static int switch_context(struct i915_request *rq) u32 hw_flags = 0; int ret, i; - lockdep_assert_held(&rq->i915->drm.struct_mutex); GEM_BUG_ON(HAS_EXECLISTS(rq->i915)); if (ppgtt) { @@ -1888,12 +1818,12 @@ static int ring_request_alloc(struct i915_request *request) */ request->reserved_space += LEGACY_REQUEST_SIZE; - ret = switch_context(request); + /* Unconditionally invalidate GPU caches and TLBs. */ + ret = request->engine->emit_flush(request, EMIT_INVALIDATE); if (ret) return ret; - /* Unconditionally invalidate GPU caches and TLBs. */ - ret = request->engine->emit_flush(request, EMIT_INVALIDATE); + ret = switch_context(request); if (ret) return ret; @@ -1906,8 +1836,6 @@ static noinline int wait_for_space(struct intel_ring *ring, unsigned int bytes) struct i915_request *target; long timeout; - lockdep_assert_held(&ring->vma->vm->i915->drm.struct_mutex); - if (intel_ring_update_space(ring) >= bytes) return 0; @@ -2167,24 +2095,6 @@ static int gen6_ring_flush(struct i915_request *rq, u32 mode) return gen6_flush_dw(rq, mode, MI_INVALIDATE_TLB); } -static void intel_ring_init_irq(struct drm_i915_private *dev_priv, - struct intel_engine_cs *engine) -{ - if (INTEL_GEN(dev_priv) >= 6) { - engine->irq_enable = gen6_irq_enable; - engine->irq_disable = gen6_irq_disable; - } else if (INTEL_GEN(dev_priv) >= 5) { - engine->irq_enable = gen5_irq_enable; - engine->irq_disable = gen5_irq_disable; - } else if (INTEL_GEN(dev_priv) >= 3) { - engine->irq_enable = i9xx_irq_enable; - engine->irq_disable = i9xx_irq_disable; - } else { - engine->irq_enable = i8xx_irq_enable; - engine->irq_disable = i8xx_irq_disable; - } -} - static void i9xx_set_default_submission(struct intel_engine_cs *engine) { engine->submit_request = i9xx_submit_request; @@ -2200,15 +2110,51 @@ static void gen6_bsd_set_default_submission(struct intel_engine_cs *engine) engine->submit_request = gen6_bsd_submit_request; } -static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv, - struct intel_engine_cs *engine) +static void ring_destroy(struct intel_engine_cs *engine) { + struct drm_i915_private *dev_priv = engine->i915; + + WARN_ON(INTEL_GEN(dev_priv) > 2 && + (ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0); + + intel_ring_unpin(engine->buffer); + intel_ring_put(engine->buffer); + + intel_engine_cleanup_common(engine); + kfree(engine); +} + +static void setup_irq(struct intel_engine_cs *engine) +{ + struct drm_i915_private *i915 = engine->i915; + + if (INTEL_GEN(i915) >= 6) { + engine->irq_enable = gen6_irq_enable; + engine->irq_disable = gen6_irq_disable; + } else if (INTEL_GEN(i915) >= 5) { + engine->irq_enable = gen5_irq_enable; + engine->irq_disable = gen5_irq_disable; + } else if (INTEL_GEN(i915) >= 3) { + engine->irq_enable = i9xx_irq_enable; + engine->irq_disable = i9xx_irq_disable; + } else { + engine->irq_enable = i8xx_irq_enable; + engine->irq_disable = i8xx_irq_disable; + } +} + +static void setup_common(struct intel_engine_cs *engine) +{ + struct drm_i915_private *i915 = engine->i915; + /* gen8+ are only supported with execlists */ - GEM_BUG_ON(INTEL_GEN(dev_priv) >= 8); + GEM_BUG_ON(INTEL_GEN(i915) >= 8); - intel_ring_init_irq(dev_priv, engine); + setup_irq(engine); - engine->init_hw = init_ring_common; + engine->destroy = ring_destroy; + + engine->resume = xcs_resume; engine->reset.prepare = reset_prepare; engine->reset.reset = reset_ring; engine->reset.finish = reset_finish; @@ -2222,117 +2168,96 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv, * engine->emit_init_breadcrumb(). */ engine->emit_fini_breadcrumb = i9xx_emit_breadcrumb; - if (IS_GEN(dev_priv, 5)) + if (IS_GEN(i915, 5)) engine->emit_fini_breadcrumb = gen5_emit_breadcrumb; engine->set_default_submission = i9xx_set_default_submission; - if (INTEL_GEN(dev_priv) >= 6) + if (INTEL_GEN(i915) >= 6) engine->emit_bb_start = gen6_emit_bb_start; - else if (INTEL_GEN(dev_priv) >= 4) + else if (INTEL_GEN(i915) >= 4) engine->emit_bb_start = i965_emit_bb_start; - else if (IS_I830(dev_priv) || IS_I845G(dev_priv)) + else if (IS_I830(i915) || IS_I845G(i915)) engine->emit_bb_start = i830_emit_bb_start; else engine->emit_bb_start = i915_emit_bb_start; } -int intel_init_render_ring_buffer(struct intel_engine_cs *engine) +static void setup_rcs(struct intel_engine_cs *engine) { - struct drm_i915_private *dev_priv = engine->i915; - int ret; - - intel_ring_default_vfuncs(dev_priv, engine); + struct drm_i915_private *i915 = engine->i915; - if (HAS_L3_DPF(dev_priv)) + if (HAS_L3_DPF(i915)) engine->irq_keep_mask = GT_RENDER_L3_PARITY_ERROR_INTERRUPT; engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT; - if (INTEL_GEN(dev_priv) >= 7) { + if (INTEL_GEN(i915) >= 7) { engine->init_context = intel_rcs_ctx_init; engine->emit_flush = gen7_render_ring_flush; engine->emit_fini_breadcrumb = gen7_rcs_emit_breadcrumb; - } else if (IS_GEN(dev_priv, 6)) { + } else if (IS_GEN(i915, 6)) { engine->init_context = intel_rcs_ctx_init; engine->emit_flush = gen6_render_ring_flush; engine->emit_fini_breadcrumb = gen6_rcs_emit_breadcrumb; - } else if (IS_GEN(dev_priv, 5)) { + } else if (IS_GEN(i915, 5)) { engine->emit_flush = gen4_render_ring_flush; } else { - if (INTEL_GEN(dev_priv) < 4) + if (INTEL_GEN(i915) < 4) engine->emit_flush = gen2_render_ring_flush; else engine->emit_flush = gen4_render_ring_flush; engine->irq_enable_mask = I915_USER_INTERRUPT; } - if (IS_HASWELL(dev_priv)) + if (IS_HASWELL(i915)) engine->emit_bb_start = hsw_emit_bb_start; - engine->init_hw = init_render_ring; - - ret = intel_init_ring_buffer(engine); - if (ret) - return ret; - - return 0; + engine->resume = rcs_resume; } -int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine) +static void setup_vcs(struct intel_engine_cs *engine) { - struct drm_i915_private *dev_priv = engine->i915; - - intel_ring_default_vfuncs(dev_priv, engine); + struct drm_i915_private *i915 = engine->i915; - if (INTEL_GEN(dev_priv) >= 6) { + if (INTEL_GEN(i915) >= 6) { /* gen6 bsd needs a special wa for tail updates */ - if (IS_GEN(dev_priv, 6)) + if (IS_GEN(i915, 6)) engine->set_default_submission = gen6_bsd_set_default_submission; engine->emit_flush = gen6_bsd_ring_flush; engine->irq_enable_mask = GT_BSD_USER_INTERRUPT; - if (IS_GEN(dev_priv, 6)) + if (IS_GEN(i915, 6)) engine->emit_fini_breadcrumb = gen6_xcs_emit_breadcrumb; else engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb; } else { engine->emit_flush = bsd_ring_flush; - if (IS_GEN(dev_priv, 5)) + if (IS_GEN(i915, 5)) engine->irq_enable_mask = ILK_BSD_USER_INTERRUPT; else engine->irq_enable_mask = I915_BSD_USER_INTERRUPT; } - - return intel_init_ring_buffer(engine); } -int intel_init_blt_ring_buffer(struct intel_engine_cs *engine) +static void setup_bcs(struct intel_engine_cs *engine) { - struct drm_i915_private *dev_priv = engine->i915; - - GEM_BUG_ON(INTEL_GEN(dev_priv) < 6); - - intel_ring_default_vfuncs(dev_priv, engine); + struct drm_i915_private *i915 = engine->i915; engine->emit_flush = gen6_ring_flush; engine->irq_enable_mask = GT_BLT_USER_INTERRUPT; - if (IS_GEN(dev_priv, 6)) + if (IS_GEN(i915, 6)) engine->emit_fini_breadcrumb = gen6_xcs_emit_breadcrumb; else engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb; - - return intel_init_ring_buffer(engine); } -int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine) +static void setup_vecs(struct intel_engine_cs *engine) { - struct drm_i915_private *dev_priv = engine->i915; - - GEM_BUG_ON(INTEL_GEN(dev_priv) < 7); + struct drm_i915_private *i915 = engine->i915; - intel_ring_default_vfuncs(dev_priv, engine); + GEM_BUG_ON(INTEL_GEN(i915) < 7); engine->emit_flush = gen6_ring_flush; engine->irq_enable_mask = PM_VEBOX_USER_INTERRUPT; @@ -2340,6 +2265,73 @@ int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine) engine->irq_disable = hsw_vebox_irq_disable; engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb; +} - return intel_init_ring_buffer(engine); +int intel_ring_submission_setup(struct intel_engine_cs *engine) +{ + setup_common(engine); + + switch (engine->class) { + case RENDER_CLASS: + setup_rcs(engine); + break; + case VIDEO_DECODE_CLASS: + setup_vcs(engine); + break; + case COPY_ENGINE_CLASS: + setup_bcs(engine); + break; + case VIDEO_ENHANCEMENT_CLASS: + setup_vecs(engine); + break; + default: + MISSING_CASE(engine->class); + return -ENODEV; + } + + return 0; +} + +int intel_ring_submission_init(struct intel_engine_cs *engine) +{ + struct i915_timeline *timeline; + struct intel_ring *ring; + int err; + + timeline = i915_timeline_create(engine->i915, engine->status_page.vma); + if (IS_ERR(timeline)) { + err = PTR_ERR(timeline); + goto err; + } + GEM_BUG_ON(timeline->has_initial_breadcrumb); + + ring = intel_engine_create_ring(engine, timeline, 32 * PAGE_SIZE); + i915_timeline_put(timeline); + if (IS_ERR(ring)) { + err = PTR_ERR(ring); + goto err; + } + + err = intel_ring_pin(ring); + if (err) + goto err_ring; + + GEM_BUG_ON(engine->buffer); + engine->buffer = ring; + + err = intel_engine_init_common(engine); + if (err) + goto err_unpin; + + GEM_BUG_ON(ring->timeline->hwsp_ggtt != engine->status_page.vma); + + return 0; + +err_unpin: + intel_ring_unpin(ring); +err_ring: + intel_ring_put(ring); +err: + intel_engine_cleanup_common(engine); + return err; } diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c new file mode 100644 index 0000000000000000000000000000000000000000..7f448f3bea0b9feb01e99908b32310dac77d003a --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c @@ -0,0 +1,142 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include "i915_drv.h" +#include "intel_lrc_reg.h" +#include "intel_sseu.h" + +u32 intel_sseu_make_rpcs(struct drm_i915_private *i915, + const struct intel_sseu *req_sseu) +{ + const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu; + bool subslice_pg = sseu->has_subslice_pg; + struct intel_sseu ctx_sseu; + u8 slices, subslices; + u32 rpcs = 0; + + /* + * No explicit RPCS request is needed to ensure full + * slice/subslice/EU enablement prior to Gen9. + */ + if (INTEL_GEN(i915) < 9) + return 0; + + /* + * If i915/perf is active, we want a stable powergating configuration + * on the system. + * + * We could choose full enablement, but on ICL we know there are use + * cases which disable slices for functional, apart for performance + * reasons. So in this case we select a known stable subset. + */ + if (!i915->perf.oa.exclusive_stream) { + ctx_sseu = *req_sseu; + } else { + ctx_sseu = intel_sseu_from_device_info(sseu); + + if (IS_GEN(i915, 11)) { + /* + * We only need subslice count so it doesn't matter + * which ones we select - just turn off low bits in the + * amount of half of all available subslices per slice. + */ + ctx_sseu.subslice_mask = + ~(~0 << (hweight8(ctx_sseu.subslice_mask) / 2)); + ctx_sseu.slice_mask = 0x1; + } + } + + slices = hweight8(ctx_sseu.slice_mask); + subslices = hweight8(ctx_sseu.subslice_mask); + + /* + * Since the SScount bitfield in GEN8_R_PWR_CLK_STATE is only three bits + * wide and Icelake has up to eight subslices, specfial programming is + * needed in order to correctly enable all subslices. + * + * According to documentation software must consider the configuration + * as 2x4x8 and hardware will translate this to 1x8x8. + * + * Furthemore, even though SScount is three bits, maximum documented + * value for it is four. From this some rules/restrictions follow: + * + * 1. + * If enabled subslice count is greater than four, two whole slices must + * be enabled instead. + * + * 2. + * When more than one slice is enabled, hardware ignores the subslice + * count altogether. + * + * From these restrictions it follows that it is not possible to enable + * a count of subslices between the SScount maximum of four restriction, + * and the maximum available number on a particular SKU. Either all + * subslices are enabled, or a count between one and four on the first + * slice. + */ + if (IS_GEN(i915, 11) && + slices == 1 && + subslices > min_t(u8, 4, hweight8(sseu->subslice_mask[0]) / 2)) { + GEM_BUG_ON(subslices & 1); + + subslice_pg = false; + slices *= 2; + } + + /* + * Starting in Gen9, render power gating can leave + * slice/subslice/EU in a partially enabled state. We + * must make an explicit request through RPCS for full + * enablement. + */ + if (sseu->has_slice_pg) { + u32 mask, val = slices; + + if (INTEL_GEN(i915) >= 11) { + mask = GEN11_RPCS_S_CNT_MASK; + val <<= GEN11_RPCS_S_CNT_SHIFT; + } else { + mask = GEN8_RPCS_S_CNT_MASK; + val <<= GEN8_RPCS_S_CNT_SHIFT; + } + + GEM_BUG_ON(val & ~mask); + val &= mask; + + rpcs |= GEN8_RPCS_ENABLE | GEN8_RPCS_S_CNT_ENABLE | val; + } + + if (subslice_pg) { + u32 val = subslices; + + val <<= GEN8_RPCS_SS_CNT_SHIFT; + + GEM_BUG_ON(val & ~GEN8_RPCS_SS_CNT_MASK); + val &= GEN8_RPCS_SS_CNT_MASK; + + rpcs |= GEN8_RPCS_ENABLE | GEN8_RPCS_SS_CNT_ENABLE | val; + } + + if (sseu->has_eu_pg) { + u32 val; + + val = ctx_sseu.min_eus_per_subslice << GEN8_RPCS_EU_MIN_SHIFT; + GEM_BUG_ON(val & ~GEN8_RPCS_EU_MIN_MASK); + val &= GEN8_RPCS_EU_MIN_MASK; + + rpcs |= val; + + val = ctx_sseu.max_eus_per_subslice << GEN8_RPCS_EU_MAX_SHIFT; + GEM_BUG_ON(val & ~GEN8_RPCS_EU_MAX_MASK); + val &= GEN8_RPCS_EU_MAX_MASK; + + rpcs |= val; + + rpcs |= GEN8_RPCS_ENABLE; + } + + return rpcs; +} diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h new file mode 100644 index 0000000000000000000000000000000000000000..73bc824094e84e71f0763de3adf960bb3344fa78 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_sseu.h @@ -0,0 +1,67 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef __INTEL_SSEU_H__ +#define __INTEL_SSEU_H__ + +#include + +struct drm_i915_private; + +#define GEN_MAX_SLICES (6) /* CNL upper bound */ +#define GEN_MAX_SUBSLICES (8) /* ICL upper bound */ + +struct sseu_dev_info { + u8 slice_mask; + u8 subslice_mask[GEN_MAX_SLICES]; + u16 eu_total; + u8 eu_per_subslice; + u8 min_eu_in_pool; + /* For each slice, which subslice(s) has(have) 7 EUs (bitfield)? */ + u8 subslice_7eu[3]; + u8 has_slice_pg:1; + u8 has_subslice_pg:1; + u8 has_eu_pg:1; + + /* Topology fields */ + u8 max_slices; + u8 max_subslices; + u8 max_eus_per_subslice; + + /* We don't have more than 8 eus per subslice at the moment and as we + * store eus enabled using bits, no need to multiply by eus per + * subslice. + */ + u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES]; +}; + +/* + * Powergating configuration for a particular (context,engine). + */ +struct intel_sseu { + u8 slice_mask; + u8 subslice_mask; + u8 min_eus_per_subslice; + u8 max_eus_per_subslice; +}; + +static inline struct intel_sseu +intel_sseu_from_device_info(const struct sseu_dev_info *sseu) +{ + struct intel_sseu value = { + .slice_mask = sseu->slice_mask, + .subslice_mask = sseu->subslice_mask[0], + .min_eus_per_subslice = sseu->max_eus_per_subslice, + .max_eus_per_subslice = sseu->max_eus_per_subslice, + }; + + return value; +} + +u32 intel_sseu_make_rpcs(struct drm_i915_private *i915, + const struct intel_sseu *req_sseu); + +#endif /* __INTEL_SSEU_H__ */ diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c similarity index 86% rename from drivers/gpu/drm/i915/intel_workarounds.c rename to drivers/gpu/drm/i915/gt/intel_workarounds.c index 9682dd575152e4dbada54c62f637b87423964203..ce4bcca3f83ca5b9de1225fd7b441124e4c51c9b 100644 --- a/drivers/gpu/drm/i915/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -122,6 +122,7 @@ static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa) wal->wa_count++; wa_->val |= wa->val; wa_->mask |= wa->mask; + wa_->read |= wa->read; return; } } @@ -146,9 +147,10 @@ wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, u32 val) { struct i915_wa wa = { - .reg = reg, + .reg = reg, .mask = mask, - .val = val + .val = val, + .read = mask, }; _wa_add(wal, &wa); @@ -172,6 +174,19 @@ wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 val) wa_write_masked_or(wal, reg, val, val); } +static void +ignore_wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, u32 val) +{ + struct i915_wa wa = { + .reg = reg, + .mask = mask, + .val = val, + /* Bonkers HW, skip verifying */ + }; + + _wa_add(wal, &wa); +} + #define WA_SET_BIT_MASKED(addr, mask) \ wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_ENABLE(mask)) @@ -181,10 +196,9 @@ wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 val) #define WA_SET_FIELD_MASKED(addr, mask, value) \ wa_write_masked_or(wal, (addr), (mask), _MASKED_FIELD((mask), (value))) -static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine) +static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine, + struct i915_wa_list *wal) { - struct i915_wa_list *wal = &engine->ctx_wa_list; - WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING); /* WaDisableAsyncFlipPerfMode:bdw,chv */ @@ -230,12 +244,12 @@ static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine) GEN6_WIZ_HASHING_16x4); } -static void bdw_ctx_workarounds_init(struct intel_engine_cs *engine) +static void bdw_ctx_workarounds_init(struct intel_engine_cs *engine, + struct i915_wa_list *wal) { struct drm_i915_private *i915 = engine->i915; - struct i915_wa_list *wal = &engine->ctx_wa_list; - gen8_ctx_workarounds_init(engine); + gen8_ctx_workarounds_init(engine, wal); /* WaDisableThreadStallDopClockGating:bdw (pre-production) */ WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); @@ -258,11 +272,10 @@ static void bdw_ctx_workarounds_init(struct intel_engine_cs *engine) (IS_BDW_GT3(i915) ? HDC_FENCE_DEST_SLM_DISABLE : 0)); } -static void chv_ctx_workarounds_init(struct intel_engine_cs *engine) +static void chv_ctx_workarounds_init(struct intel_engine_cs *engine, + struct i915_wa_list *wal) { - struct i915_wa_list *wal = &engine->ctx_wa_list; - - gen8_ctx_workarounds_init(engine); + gen8_ctx_workarounds_init(engine, wal); /* WaDisableThreadStallDopClockGating:chv */ WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); @@ -271,10 +284,10 @@ static void chv_ctx_workarounds_init(struct intel_engine_cs *engine) WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X); } -static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine) +static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine, + struct i915_wa_list *wal) { struct drm_i915_private *i915 = engine->i915; - struct i915_wa_list *wal = &engine->ctx_wa_list; if (HAS_LLC(i915)) { /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl @@ -369,10 +382,10 @@ static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine) WA_SET_BIT_MASKED(GEN9_WM_CHICKEN3, GEN9_FACTOR_IN_CLR_VAL_HIZ); } -static void skl_tune_iz_hashing(struct intel_engine_cs *engine) +static void skl_tune_iz_hashing(struct intel_engine_cs *engine, + struct i915_wa_list *wal) { struct drm_i915_private *i915 = engine->i915; - struct i915_wa_list *wal = &engine->ctx_wa_list; u8 vals[3] = { 0, 0, 0 }; unsigned int i; @@ -409,17 +422,17 @@ static void skl_tune_iz_hashing(struct intel_engine_cs *engine) GEN9_IZ_HASHING(0, vals[0])); } -static void skl_ctx_workarounds_init(struct intel_engine_cs *engine) +static void skl_ctx_workarounds_init(struct intel_engine_cs *engine, + struct i915_wa_list *wal) { - gen9_ctx_workarounds_init(engine); - skl_tune_iz_hashing(engine); + gen9_ctx_workarounds_init(engine, wal); + skl_tune_iz_hashing(engine, wal); } -static void bxt_ctx_workarounds_init(struct intel_engine_cs *engine) +static void bxt_ctx_workarounds_init(struct intel_engine_cs *engine, + struct i915_wa_list *wal) { - struct i915_wa_list *wal = &engine->ctx_wa_list; - - gen9_ctx_workarounds_init(engine); + gen9_ctx_workarounds_init(engine, wal); /* WaDisableThreadStallDopClockGating:bxt */ WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, @@ -430,12 +443,12 @@ static void bxt_ctx_workarounds_init(struct intel_engine_cs *engine) GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); } -static void kbl_ctx_workarounds_init(struct intel_engine_cs *engine) +static void kbl_ctx_workarounds_init(struct intel_engine_cs *engine, + struct i915_wa_list *wal) { struct drm_i915_private *i915 = engine->i915; - struct i915_wa_list *wal = &engine->ctx_wa_list; - gen9_ctx_workarounds_init(engine); + gen9_ctx_workarounds_init(engine, wal); /* WaToEnableHwFixForPushConstHWBug:kbl */ if (IS_KBL_REVID(i915, KBL_REVID_C0, REVID_FOREVER)) @@ -447,22 +460,20 @@ static void kbl_ctx_workarounds_init(struct intel_engine_cs *engine) GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); } -static void glk_ctx_workarounds_init(struct intel_engine_cs *engine) +static void glk_ctx_workarounds_init(struct intel_engine_cs *engine, + struct i915_wa_list *wal) { - struct i915_wa_list *wal = &engine->ctx_wa_list; - - gen9_ctx_workarounds_init(engine); + gen9_ctx_workarounds_init(engine, wal); /* WaToEnableHwFixForPushConstHWBug:glk */ WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); } -static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine) +static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine, + struct i915_wa_list *wal) { - struct i915_wa_list *wal = &engine->ctx_wa_list; - - gen9_ctx_workarounds_init(engine); + gen9_ctx_workarounds_init(engine, wal); /* WaToEnableHwFixForPushConstHWBug:cfl */ WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, @@ -473,10 +484,10 @@ static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine) GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); } -static void cnl_ctx_workarounds_init(struct intel_engine_cs *engine) +static void cnl_ctx_workarounds_init(struct intel_engine_cs *engine, + struct i915_wa_list *wal) { struct drm_i915_private *i915 = engine->i915; - struct i915_wa_list *wal = &engine->ctx_wa_list; /* WaForceContextSaveRestoreNonCoherent:cnl */ WA_SET_BIT_MASKED(CNL_HDC_CHICKEN0, @@ -513,10 +524,16 @@ static void cnl_ctx_workarounds_init(struct intel_engine_cs *engine) WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, DISABLE_EARLY_EOT); } -static void icl_ctx_workarounds_init(struct intel_engine_cs *engine) +static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, + struct i915_wa_list *wal) { struct drm_i915_private *i915 = engine->i915; - struct i915_wa_list *wal = &engine->ctx_wa_list; + + /* WaDisableBankHangMode:icl */ + wa_write(wal, + GEN8_L3CNTLREG, + intel_uncore_read(engine->uncore, GEN8_L3CNTLREG) | + GEN8_ERRDETBCTRL); /* Wa_1604370585:icl (pre-prod) * Formerly known as WaPushConstantDereferenceHoldDisable @@ -556,33 +573,42 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine) WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_GPGPU_LEVEL_MASK, GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL); + + /* allow headerless messages for preemptible GPGPU context */ + WA_SET_BIT_MASKED(GEN10_SAMPLER_MODE, + GEN11_SAMPLER_ENABLE_HEADLESS_MSG); } -void intel_engine_init_ctx_wa(struct intel_engine_cs *engine) +static void +__intel_engine_init_ctx_wa(struct intel_engine_cs *engine, + struct i915_wa_list *wal, + const char *name) { struct drm_i915_private *i915 = engine->i915; - struct i915_wa_list *wal = &engine->ctx_wa_list; - wa_init_start(wal, "context"); + if (engine->class != RENDER_CLASS) + return; + + wa_init_start(wal, name); if (IS_GEN(i915, 11)) - icl_ctx_workarounds_init(engine); + icl_ctx_workarounds_init(engine, wal); else if (IS_CANNONLAKE(i915)) - cnl_ctx_workarounds_init(engine); + cnl_ctx_workarounds_init(engine, wal); else if (IS_COFFEELAKE(i915)) - cfl_ctx_workarounds_init(engine); + cfl_ctx_workarounds_init(engine, wal); else if (IS_GEMINILAKE(i915)) - glk_ctx_workarounds_init(engine); + glk_ctx_workarounds_init(engine, wal); else if (IS_KABYLAKE(i915)) - kbl_ctx_workarounds_init(engine); + kbl_ctx_workarounds_init(engine, wal); else if (IS_BROXTON(i915)) - bxt_ctx_workarounds_init(engine); + bxt_ctx_workarounds_init(engine, wal); else if (IS_SKYLAKE(i915)) - skl_ctx_workarounds_init(engine); + skl_ctx_workarounds_init(engine, wal); else if (IS_CHERRYVIEW(i915)) - chv_ctx_workarounds_init(engine); + chv_ctx_workarounds_init(engine, wal); else if (IS_BROADWELL(i915)) - bdw_ctx_workarounds_init(engine); + bdw_ctx_workarounds_init(engine, wal); else if (INTEL_GEN(i915) < 8) return; else @@ -591,6 +617,11 @@ void intel_engine_init_ctx_wa(struct intel_engine_cs *engine) wa_init_finish(wal); } +void intel_engine_init_ctx_wa(struct intel_engine_cs *engine) +{ + __intel_engine_init_ctx_wa(engine, &engine->ctx_wa_list, "context"); +} + int intel_engine_emit_ctx_wa(struct i915_request *rq) { struct i915_wa_list *wal = &rq->engine->ctx_wa_list; @@ -909,6 +940,21 @@ wal_get_fw_for_rmw(struct intel_uncore *uncore, const struct i915_wa_list *wal) return fw; } +static bool +wa_verify(const struct i915_wa *wa, u32 cur, const char *name, const char *from) +{ + if ((cur ^ wa->val) & wa->read) { + DRM_ERROR("%s workaround lost on %s! (%x=%x/%x, expected %x, mask=%x)\n", + name, from, i915_mmio_reg_offset(wa->reg), + cur, cur & wa->read, + wa->val, wa->mask); + + return false; + } + + return true; +} + static void wa_list_apply(struct intel_uncore *uncore, const struct i915_wa_list *wal) { @@ -927,6 +973,10 @@ wa_list_apply(struct intel_uncore *uncore, const struct i915_wa_list *wal) for (i = 0, wa = wal->list; i < wal->count; i++, wa++) { intel_uncore_rmw_fw(uncore, wa->reg, wa->mask, wa->val); + if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) + wa_verify(wa, + intel_uncore_read_fw(uncore, wa->reg), + wal->name, "application"); } intel_uncore_forcewake_put__locked(uncore, fw); @@ -938,20 +988,6 @@ void intel_gt_apply_workarounds(struct drm_i915_private *i915) wa_list_apply(&i915->uncore, &i915->gt_wa_list); } -static bool -wa_verify(const struct i915_wa *wa, u32 cur, const char *name, const char *from) -{ - if ((cur ^ wa->val) & wa->mask) { - DRM_ERROR("%s workaround lost on %s! (%x=%x/%x, expected %x, mask=%x)\n", - name, from, i915_mmio_reg_offset(wa->reg), cur, - cur & wa->mask, wa->val, wa->mask); - - return false; - } - - return true; -} - static bool wa_list_verify(struct intel_uncore *uncore, const struct i915_wa_list *wal, const char *from) @@ -1056,7 +1092,8 @@ void intel_engine_init_whitelist(struct intel_engine_cs *engine) struct drm_i915_private *i915 = engine->i915; struct i915_wa_list *w = &engine->whitelist; - GEM_BUG_ON(engine->id != RCS0); + if (engine->class != RENDER_CLASS) + return; wa_init_start(w, "whitelist"); @@ -1117,9 +1154,10 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) _3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE); /* WaPipelineFlushCoherentLines:icl */ - wa_write_or(wal, - GEN8_L3SQCREG4, - GEN8_LQSC_FLUSH_COHERENT_LINES); + ignore_wa_write_or(wal, + GEN8_L3SQCREG4, + GEN8_LQSC_FLUSH_COHERENT_LINES, + GEN8_LQSC_FLUSH_COHERENT_LINES); /* * Wa_1405543622:icl @@ -1146,9 +1184,10 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) * Wa_1405733216:icl * Formerly known as WaDisableCleanEvicts */ - wa_write_or(wal, - GEN8_L3SQCREG4, - GEN11_LQSC_CLEAN_EVICT_DISABLE); + ignore_wa_write_or(wal, + GEN8_L3SQCREG4, + GEN11_LQSC_CLEAN_EVICT_DISABLE, + GEN11_LQSC_CLEAN_EVICT_DISABLE); /* WaForwardProgressSoftReset:icl */ wa_write_or(wal, @@ -1254,6 +1293,128 @@ void intel_engine_apply_workarounds(struct intel_engine_cs *engine) wa_list_apply(engine->uncore, &engine->wa_list); } +static struct i915_vma * +create_scratch(struct i915_address_space *vm, int count) +{ + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + unsigned int size; + int err; + + size = round_up(count * sizeof(u32), PAGE_SIZE); + obj = i915_gem_object_create_internal(vm->i915, size); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC); + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err_obj; + } + + err = i915_vma_pin(vma, 0, 0, + i915_vma_is_ggtt(vma) ? PIN_GLOBAL : PIN_USER); + if (err) + goto err_obj; + + return vma; + +err_obj: + i915_gem_object_put(obj); + return ERR_PTR(err); +} + +static int +wa_list_srm(struct i915_request *rq, + const struct i915_wa_list *wal, + struct i915_vma *vma) +{ + const struct i915_wa *wa; + unsigned int i; + u32 srm, *cs; + + srm = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; + if (INTEL_GEN(rq->i915) >= 8) + srm++; + + cs = intel_ring_begin(rq, 4 * wal->count); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + for (i = 0, wa = wal->list; i < wal->count; i++, wa++) { + *cs++ = srm; + *cs++ = i915_mmio_reg_offset(wa->reg); + *cs++ = i915_ggtt_offset(vma) + sizeof(u32) * i; + *cs++ = 0; + } + intel_ring_advance(rq, cs); + + return 0; +} + +static int engine_wa_list_verify(struct intel_context *ce, + const struct i915_wa_list * const wal, + const char *from) +{ + const struct i915_wa *wa; + struct i915_request *rq; + struct i915_vma *vma; + unsigned int i; + u32 *results; + int err; + + if (!wal->count) + return 0; + + vma = create_scratch(&ce->engine->i915->ggtt.vm, wal->count); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_vma; + } + + err = wa_list_srm(rq, wal, vma); + if (err) + goto err_vma; + + i915_request_add(rq); + if (i915_request_wait(rq, I915_WAIT_LOCKED, HZ / 5) < 0) { + err = -ETIME; + goto err_vma; + } + + results = i915_gem_object_pin_map(vma->obj, I915_MAP_WB); + if (IS_ERR(results)) { + err = PTR_ERR(results); + goto err_vma; + } + + err = 0; + for (i = 0, wa = wal->list; i < wal->count; i++, wa++) + if (!wa_verify(wa, results[i], wal->name, from)) + err = -ENXIO; + + i915_gem_object_unpin_map(vma->obj); + +err_vma: + i915_vma_unpin(vma); + i915_vma_put(vma); + return err; +} + +int intel_engine_verify_workarounds(struct intel_engine_cs *engine, + const char *from) +{ + return engine_wa_list_verify(engine->kernel_context, + &engine->wa_list, + from); +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) -#include "selftests/intel_workarounds.c" +#include "selftest_workarounds.c" #endif diff --git a/drivers/gpu/drm/i915/intel_workarounds.h b/drivers/gpu/drm/i915/gt/intel_workarounds.h similarity index 79% rename from drivers/gpu/drm/i915/intel_workarounds.h rename to drivers/gpu/drm/i915/gt/intel_workarounds.h index 34eee5ec511e4aea19ecb8fe52575b0f625c5c50..3761a6ee58bb9ad6ca1a226745fdf06ff997daa9 100644 --- a/drivers/gpu/drm/i915/intel_workarounds.h +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.h @@ -4,13 +4,17 @@ * Copyright © 2014-2018 Intel Corporation */ -#ifndef _I915_WORKAROUNDS_H_ -#define _I915_WORKAROUNDS_H_ +#ifndef _INTEL_WORKAROUNDS_H_ +#define _INTEL_WORKAROUNDS_H_ #include #include "intel_workarounds_types.h" +struct drm_i915_private; +struct i915_request; +struct intel_engine_cs; + static inline void intel_wa_list_free(struct i915_wa_list *wal) { kfree(wal->list); @@ -30,5 +34,7 @@ void intel_engine_apply_whitelist(struct intel_engine_cs *engine); void intel_engine_init_workarounds(struct intel_engine_cs *engine); void intel_engine_apply_workarounds(struct intel_engine_cs *engine); +int intel_engine_verify_workarounds(struct intel_engine_cs *engine, + const char *from); #endif diff --git a/drivers/gpu/drm/i915/intel_workarounds_types.h b/drivers/gpu/drm/i915/gt/intel_workarounds_types.h similarity index 88% rename from drivers/gpu/drm/i915/intel_workarounds_types.h rename to drivers/gpu/drm/i915/gt/intel_workarounds_types.h index 30918da180ff2257e09b70e55b6c9f8beb75f788..42ac1fb9957281fe8398560bd117a4528b8afd89 100644 --- a/drivers/gpu/drm/i915/intel_workarounds_types.h +++ b/drivers/gpu/drm/i915/gt/intel_workarounds_types.h @@ -12,9 +12,10 @@ #include "i915_reg.h" struct i915_wa { - i915_reg_t reg; - u32 mask; - u32 val; + i915_reg_t reg; + u32 mask; + u32 val; + u32 read; }; struct i915_wa_list { diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c similarity index 88% rename from drivers/gpu/drm/i915/selftests/mock_engine.c rename to drivers/gpu/drm/i915/gt/mock_engine.c index 61a8206ed6772cdbe7c6811d7d11b5d99d7e78c1..2941916b37bf5540295caae064ea3a7a75ca5990 100644 --- a/drivers/gpu/drm/i915/selftests/mock_engine.c +++ b/drivers/gpu/drm/i915/gt/mock_engine.c @@ -22,8 +22,13 @@ * */ +#include "i915_drv.h" +#include "i915_gem_context.h" +#include "intel_context.h" +#include "intel_engine_pm.h" + #include "mock_engine.h" -#include "mock_request.h" +#include "selftests/mock_request.h" struct mock_ring { struct intel_ring base; @@ -154,6 +159,9 @@ static const struct intel_context_ops mock_context_ops = { .pin = mock_context_pin, .unpin = mock_context_unpin, + .enter = intel_context_enter_engine, + .exit = intel_context_exit_engine, + .destroy = mock_context_destroy, }; @@ -257,29 +265,44 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, engine->base.reset.finish = mock_reset_finish; engine->base.cancel_requests = mock_cancel_requests; - if (i915_timeline_init(i915, &engine->base.timeline, NULL)) - goto err_free; - i915_timeline_set_subclass(&engine->base.timeline, TIMELINE_ENGINE); - - intel_engine_init_breadcrumbs(&engine->base); - /* fake hw queue */ spin_lock_init(&engine->hw_lock); timer_setup(&engine->hw_delay, hw_delay_complete, 0); INIT_LIST_HEAD(&engine->hw_queue); - if (pin_context(i915->kernel_context, &engine->base, - &engine->base.kernel_context)) + return &engine->base; +} + +int mock_engine_init(struct intel_engine_cs *engine) +{ + struct drm_i915_private *i915 = engine->i915; + int err; + + intel_engine_init_breadcrumbs(engine); + intel_engine_init_execlists(engine); + intel_engine_init__pm(engine); + + if (i915_timeline_init(i915, &engine->timeline, NULL)) goto err_breadcrumbs; + i915_timeline_set_subclass(&engine->timeline, TIMELINE_ENGINE); - return &engine->base; + engine->kernel_context = + i915_gem_context_get_engine(i915->kernel_context, engine->id); + if (IS_ERR(engine->kernel_context)) + goto err_timeline; + + err = intel_context_pin(engine->kernel_context); + intel_context_put(engine->kernel_context); + if (err) + goto err_timeline; + + return 0; +err_timeline: + i915_timeline_fini(&engine->timeline); err_breadcrumbs: - intel_engine_fini_breadcrumbs(&engine->base); - i915_timeline_fini(&engine->base.timeline); -err_free: - kfree(engine); - return NULL; + intel_engine_fini_breadcrumbs(engine); + return -ENOMEM; } void mock_engine_flush(struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.h b/drivers/gpu/drm/i915/gt/mock_engine.h similarity index 95% rename from drivers/gpu/drm/i915/selftests/mock_engine.h rename to drivers/gpu/drm/i915/gt/mock_engine.h index b9cc3a245f1684264074372de1340bf03c7371b1..3f9b698c49d2a9142b736a724b4b7e2dc3d26c1c 100644 --- a/drivers/gpu/drm/i915/selftests/mock_engine.h +++ b/drivers/gpu/drm/i915/gt/mock_engine.h @@ -29,7 +29,7 @@ #include #include -#include "../intel_ringbuffer.h" +#include "gt/intel_engine.h" struct mock_engine { struct intel_engine_cs base; @@ -42,6 +42,8 @@ struct mock_engine { struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, const char *name, int id); +int mock_engine_init(struct intel_engine_cs *engine); + void mock_engine_flush(struct intel_engine_cs *engine); void mock_engine_reset(struct intel_engine_cs *engine); void mock_engine_free(struct intel_engine_cs *engine); diff --git a/drivers/gpu/drm/i915/selftests/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c similarity index 100% rename from drivers/gpu/drm/i915/selftests/intel_engine_cs.c rename to drivers/gpu/drm/i915/gt/selftest_engine_cs.c diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c similarity index 90% rename from drivers/gpu/drm/i915/selftests/intel_hangcheck.c rename to drivers/gpu/drm/i915/gt/selftest_hangcheck.c index 050bd1e19e02ee772d93438d325ef14ca8b9cd25..48a51739b9265956a123f99cc54c512dc3327118 100644 --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -24,14 +24,18 @@ #include -#include "../i915_selftest.h" -#include "i915_random.h" -#include "igt_flush_test.h" -#include "igt_reset.h" -#include "igt_wedge_me.h" +#include "intel_engine_pm.h" -#include "mock_context.h" -#include "mock_drm.h" +#include "i915_selftest.h" +#include "selftests/i915_random.h" +#include "selftests/igt_flush_test.h" +#include "selftests/igt_gem_utils.h" +#include "selftests/igt_reset.h" +#include "selftests/igt_wedge_me.h" +#include "selftests/igt_atomic.h" + +#include "selftests/mock_context.h" +#include "selftests/mock_drm.h" #define IGT_IDLE_TIMEOUT 50 /* ms; time to wait after flushing between tests */ @@ -173,7 +177,7 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine) if (err) goto unpin_vma; - rq = i915_request_alloc(engine, h->ctx); + rq = igt_request_alloc(h->ctx, engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto unpin_hws; @@ -362,54 +366,6 @@ static int igt_hang_sanitycheck(void *arg) return err; } -static int igt_global_reset(void *arg) -{ - struct drm_i915_private *i915 = arg; - unsigned int reset_count; - int err = 0; - - /* Check that we can issue a global GPU reset */ - - igt_global_reset_lock(i915); - - reset_count = i915_reset_count(&i915->gpu_error); - - i915_reset(i915, ALL_ENGINES, NULL); - - if (i915_reset_count(&i915->gpu_error) == reset_count) { - pr_err("No GPU reset recorded!\n"); - err = -EINVAL; - } - - igt_global_reset_unlock(i915); - - if (i915_reset_failed(i915)) - err = -EIO; - - return err; -} - -static int igt_wedged_reset(void *arg) -{ - struct drm_i915_private *i915 = arg; - intel_wakeref_t wakeref; - - /* Check that we can recover a wedged device with a GPU reset */ - - igt_global_reset_lock(i915); - wakeref = intel_runtime_pm_get(i915); - - i915_gem_set_wedged(i915); - - GEM_BUG_ON(!i915_reset_failed(i915)); - i915_reset(i915, ALL_ENGINES, NULL); - - intel_runtime_pm_put(i915, wakeref); - igt_global_reset_unlock(i915); - - return i915_reset_failed(i915) ? -EIO : 0; -} - static bool wait_for_idle(struct intel_engine_cs *engine) { return wait_for(intel_engine_is_idle(engine), IGT_IDLE_TIMEOUT) == 0; @@ -453,7 +409,7 @@ static int igt_reset_nop(void *arg) for (i = 0; i < 16; i++) { struct i915_request *rq; - rq = i915_request_alloc(engine, ctx); + rq = igt_request_alloc(ctx, engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); break; @@ -479,19 +435,6 @@ static int igt_reset_nop(void *arg) break; } - if (!i915_reset_flush(i915)) { - struct drm_printer p = - drm_info_printer(i915->drm.dev); - - pr_err("%s failed to idle after reset\n", - engine->name); - intel_engine_dump(engine, &p, - "%s\n", engine->name); - - err = -EIO; - break; - } - err = igt_flush_test(i915, 0); if (err) break; @@ -565,7 +508,7 @@ static int igt_reset_nop_engine(void *arg) for (i = 0; i < 16; i++) { struct i915_request *rq; - rq = i915_request_alloc(engine, ctx); + rq = igt_request_alloc(ctx, engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); break; @@ -594,19 +537,6 @@ static int igt_reset_nop_engine(void *arg) err = -EINVAL; break; } - - if (!i915_reset_flush(i915)) { - struct drm_printer p = - drm_info_printer(i915->drm.dev); - - pr_err("%s failed to idle after reset\n", - engine->name); - intel_engine_dump(engine, &p, - "%s\n", engine->name); - - err = -EIO; - break; - } } while (time_before(jiffies, end_time)); clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); pr_info("%s(%s): %d resets\n", __func__, engine->name, count); @@ -669,6 +599,7 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active) reset_engine_count = i915_reset_engine_count(&i915->gpu_error, engine); + intel_engine_pm_get(engine); set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); do { if (active) { @@ -721,21 +652,9 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active) err = -EINVAL; break; } - - if (!i915_reset_flush(i915)) { - struct drm_printer p = - drm_info_printer(i915->drm.dev); - - pr_err("%s failed to idle after reset\n", - engine->name); - intel_engine_dump(engine, &p, - "%s\n", engine->name); - - err = -EIO; - break; - } } while (time_before(jiffies, end_time)); clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); + intel_engine_pm_put(engine); if (err) break; @@ -835,7 +754,7 @@ static int active_engine(void *data) struct i915_request *new; mutex_lock(&engine->i915->drm.struct_mutex); - new = i915_request_alloc(engine, ctx[idx]); + new = igt_request_alloc(ctx[idx], engine); if (IS_ERR(new)) { mutex_unlock(&engine->i915->drm.struct_mutex); err = PTR_ERR(new); @@ -942,6 +861,7 @@ static int __igt_reset_engines(struct drm_i915_private *i915, get_task_struct(tsk); } + intel_engine_pm_get(engine); set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); do { struct i915_request *rq = NULL; @@ -1018,6 +938,7 @@ static int __igt_reset_engines(struct drm_i915_private *i915, } } while (time_before(jiffies, end_time)); clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); + intel_engine_pm_put(engine); pr_info("i915_reset_engine(%s:%s): %lu resets\n", engine->name, test_name, count); @@ -1069,7 +990,9 @@ static int __igt_reset_engines(struct drm_i915_private *i915, if (err) break; - err = igt_flush_test(i915, 0); + mutex_lock(&i915->drm.struct_mutex); + err = igt_flush_test(i915, I915_WAIT_LOCKED); + mutex_unlock(&i915->drm.struct_mutex); if (err) break; } @@ -1681,44 +1604,8 @@ static int igt_handle_error(void *arg) return err; } -static void __preempt_begin(void) -{ - preempt_disable(); -} - -static void __preempt_end(void) -{ - preempt_enable(); -} - -static void __softirq_begin(void) -{ - local_bh_disable(); -} - -static void __softirq_end(void) -{ - local_bh_enable(); -} - -static void __hardirq_begin(void) -{ - local_irq_disable(); -} - -static void __hardirq_end(void) -{ - local_irq_enable(); -} - -struct atomic_section { - const char *name; - void (*critical_section_begin)(void); - void (*critical_section_end)(void); -}; - static int __igt_atomic_reset_engine(struct intel_engine_cs *engine, - const struct atomic_section *p, + const struct igt_atomic_section *p, const char *mode) { struct tasklet_struct * const t = &engine->execlists.tasklet; @@ -1743,7 +1630,7 @@ static int __igt_atomic_reset_engine(struct intel_engine_cs *engine, } static int igt_atomic_reset_engine(struct intel_engine_cs *engine, - const struct atomic_section *p) + const struct igt_atomic_section *p) { struct drm_i915_private *i915 = engine->i915; struct i915_request *rq; @@ -1794,79 +1681,43 @@ static int igt_atomic_reset_engine(struct intel_engine_cs *engine, return err; } -static void force_reset(struct drm_i915_private *i915) -{ - i915_gem_set_wedged(i915); - i915_reset(i915, 0, NULL); -} - -static int igt_atomic_reset(void *arg) +static int igt_reset_engines_atomic(void *arg) { - static const struct atomic_section phases[] = { - { "preempt", __preempt_begin, __preempt_end }, - { "softirq", __softirq_begin, __softirq_end }, - { "hardirq", __hardirq_begin, __hardirq_end }, - { } - }; struct drm_i915_private *i915 = arg; - intel_wakeref_t wakeref; + const typeof(*igt_atomic_phases) *p; int err = 0; - /* Check that the resets are usable from atomic context */ + /* Check that the engines resets are usable from atomic context */ + + if (!intel_has_reset_engine(i915)) + return 0; if (USES_GUC_SUBMISSION(i915)) - return 0; /* guc is dead; long live the guc */ + return 0; igt_global_reset_lock(i915); mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(i915); /* Flush any requests before we get started and check basics */ - force_reset(i915); - if (i915_reset_failed(i915)) + if (!igt_force_reset(i915)) goto unlock; - if (intel_has_gpu_reset(i915)) { - const typeof(*phases) *p; - - for (p = phases; p->name; p++) { - GEM_TRACE("intel_gpu_reset under %s\n", p->name); - - p->critical_section_begin(); - err = intel_gpu_reset(i915, ALL_ENGINES); - p->critical_section_end(); - - if (err) { - pr_err("intel_gpu_reset failed under %s\n", - p->name); - goto out; - } - } - - force_reset(i915); - } - - if (intel_has_reset_engine(i915)) { + for (p = igt_atomic_phases; p->name; p++) { struct intel_engine_cs *engine; enum intel_engine_id id; for_each_engine(engine, i915, id) { - const typeof(*phases) *p; - - for (p = phases; p->name; p++) { - err = igt_atomic_reset_engine(engine, p); - if (err) - goto out; - } + err = igt_atomic_reset_engine(engine, p); + if (err) + goto out; } } out: /* As we poke around the guts, do a full reset before continuing. */ - force_reset(i915); + igt_force_reset(i915); unlock: - intel_runtime_pm_put(i915, wakeref); mutex_unlock(&i915->drm.struct_mutex); igt_global_reset_unlock(i915); @@ -1876,21 +1727,19 @@ static int igt_atomic_reset(void *arg) int intel_hangcheck_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { - SUBTEST(igt_global_reset), /* attempt to recover GPU first */ - SUBTEST(igt_wedged_reset), SUBTEST(igt_hang_sanitycheck), SUBTEST(igt_reset_nop), SUBTEST(igt_reset_nop_engine), SUBTEST(igt_reset_idle_engine), SUBTEST(igt_reset_active_engine), SUBTEST(igt_reset_engines), + SUBTEST(igt_reset_engines_atomic), SUBTEST(igt_reset_queue), SUBTEST(igt_reset_wait), SUBTEST(igt_reset_evict_ggtt), SUBTEST(igt_reset_evict_ppgtt), SUBTEST(igt_reset_evict_fence), SUBTEST(igt_handle_error), - SUBTEST(igt_atomic_reset), }; intel_wakeref_t wakeref; bool saved_hangcheck; diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c similarity index 71% rename from drivers/gpu/drm/i915/selftests/intel_lrc.c rename to drivers/gpu/drm/i915/gt/selftest_lrc.c index e8b0b5dbcb2cb44be448b9781d6d2cbd12ea9750..a8c50900e2d4613386784d6e67385d666bf2261f 100644 --- a/drivers/gpu/drm/i915/selftests/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -6,15 +6,15 @@ #include -#include "../i915_reset.h" - -#include "../i915_selftest.h" -#include "igt_flush_test.h" -#include "igt_live_test.h" -#include "igt_spinner.h" -#include "i915_random.h" - -#include "mock_context.h" +#include "gt/intel_reset.h" +#include "i915_selftest.h" +#include "selftests/i915_random.h" +#include "selftests/igt_flush_test.h" +#include "selftests/igt_gem_utils.h" +#include "selftests/igt_live_test.h" +#include "selftests/igt_spinner.h" +#include "selftests/lib_sw_fence.h" +#include "selftests/mock_context.h" static int live_sanitycheck(void *arg) { @@ -152,7 +152,7 @@ static int live_busywait_preempt(void *arg) * fails, we hang instead. */ - lo = i915_request_alloc(engine, ctx_lo); + lo = igt_request_alloc(ctx_lo, engine); if (IS_ERR(lo)) { err = PTR_ERR(lo); goto err_vma; @@ -196,7 +196,7 @@ static int live_busywait_preempt(void *arg) goto err_vma; } - hi = i915_request_alloc(engine, ctx_hi); + hi = igt_request_alloc(ctx_hi, engine); if (IS_ERR(hi)) { err = PTR_ERR(hi); goto err_vma; @@ -641,14 +641,19 @@ static struct i915_request *dummy_request(struct intel_engine_cs *engine) GEM_BUG_ON(i915_request_completed(rq)); i915_sw_fence_init(&rq->submit, dummy_notify); - i915_sw_fence_commit(&rq->submit); + set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); return rq; } static void dummy_request_free(struct i915_request *dummy) { + /* We have to fake the CS interrupt to kick the next request */ + i915_sw_fence_commit(&dummy->submit); + i915_request_mark_complete(dummy); + dma_fence_signal(&dummy->fence); + i915_sched_node_fini(&dummy->sched); i915_sw_fence_fini(&dummy->submit); @@ -861,13 +866,13 @@ static int live_chain_preempt(void *arg) i915_request_add(rq); for (i = 0; i < count; i++) { - rq = i915_request_alloc(engine, lo.ctx); + rq = igt_request_alloc(lo.ctx, engine); if (IS_ERR(rq)) goto err_wedged; i915_request_add(rq); } - rq = i915_request_alloc(engine, hi.ctx); + rq = igt_request_alloc(hi.ctx, engine); if (IS_ERR(rq)) goto err_wedged; i915_request_add(rq); @@ -886,7 +891,7 @@ static int live_chain_preempt(void *arg) } igt_spinner_end(&lo.spin); - rq = i915_request_alloc(engine, lo.ctx); + rq = igt_request_alloc(lo.ctx, engine); if (IS_ERR(rq)) goto err_wedged; i915_request_add(rq); @@ -1093,7 +1098,7 @@ static int smoke_submit(struct preempt_smoke *smoke, ctx->sched.priority = prio; - rq = i915_request_alloc(smoke->engine, ctx); + rq = igt_request_alloc(ctx, smoke->engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto unpin; @@ -1306,6 +1311,504 @@ static int live_preempt_smoke(void *arg) return err; } +static int nop_virtual_engine(struct drm_i915_private *i915, + struct intel_engine_cs **siblings, + unsigned int nsibling, + unsigned int nctx, + unsigned int flags) +#define CHAIN BIT(0) +{ + IGT_TIMEOUT(end_time); + struct i915_request *request[16]; + struct i915_gem_context *ctx[16]; + struct intel_context *ve[16]; + unsigned long n, prime, nc; + struct igt_live_test t; + ktime_t times[2] = {}; + int err; + + GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ctx)); + + for (n = 0; n < nctx; n++) { + ctx[n] = kernel_context(i915); + if (!ctx[n]) { + err = -ENOMEM; + nctx = n; + goto out; + } + + ve[n] = intel_execlists_create_virtual(ctx[n], + siblings, nsibling); + if (IS_ERR(ve[n])) { + kernel_context_close(ctx[n]); + err = PTR_ERR(ve[n]); + nctx = n; + goto out; + } + + err = intel_context_pin(ve[n]); + if (err) { + intel_context_put(ve[n]); + kernel_context_close(ctx[n]); + nctx = n; + goto out; + } + } + + err = igt_live_test_begin(&t, i915, __func__, ve[0]->engine->name); + if (err) + goto out; + + for_each_prime_number_from(prime, 1, 8192) { + times[1] = ktime_get_raw(); + + if (flags & CHAIN) { + for (nc = 0; nc < nctx; nc++) { + for (n = 0; n < prime; n++) { + request[nc] = + i915_request_create(ve[nc]); + if (IS_ERR(request[nc])) { + err = PTR_ERR(request[nc]); + goto out; + } + + i915_request_add(request[nc]); + } + } + } else { + for (n = 0; n < prime; n++) { + for (nc = 0; nc < nctx; nc++) { + request[nc] = + i915_request_create(ve[nc]); + if (IS_ERR(request[nc])) { + err = PTR_ERR(request[nc]); + goto out; + } + + i915_request_add(request[nc]); + } + } + } + + for (nc = 0; nc < nctx; nc++) { + if (i915_request_wait(request[nc], + I915_WAIT_LOCKED, + HZ / 10) < 0) { + pr_err("%s(%s): wait for %llx:%lld timed out\n", + __func__, ve[0]->engine->name, + request[nc]->fence.context, + request[nc]->fence.seqno); + + GEM_TRACE("%s(%s) failed at request %llx:%lld\n", + __func__, ve[0]->engine->name, + request[nc]->fence.context, + request[nc]->fence.seqno); + GEM_TRACE_DUMP(); + i915_gem_set_wedged(i915); + break; + } + } + + times[1] = ktime_sub(ktime_get_raw(), times[1]); + if (prime == 1) + times[0] = times[1]; + + if (__igt_timeout(end_time, NULL)) + break; + } + + err = igt_live_test_end(&t); + if (err) + goto out; + + pr_info("Requestx%d latencies on %s: 1 = %lluns, %lu = %lluns\n", + nctx, ve[0]->engine->name, ktime_to_ns(times[0]), + prime, div64_u64(ktime_to_ns(times[1]), prime)); + +out: + if (igt_flush_test(i915, I915_WAIT_LOCKED)) + err = -EIO; + + for (nc = 0; nc < nctx; nc++) { + intel_context_unpin(ve[nc]); + intel_context_put(ve[nc]); + kernel_context_close(ctx[nc]); + } + return err; +} + +static int live_virtual_engine(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; + struct intel_engine_cs *engine; + enum intel_engine_id id; + unsigned int class, inst; + int err = -ENODEV; + + if (USES_GUC_SUBMISSION(i915)) + return 0; + + mutex_lock(&i915->drm.struct_mutex); + + for_each_engine(engine, i915, id) { + err = nop_virtual_engine(i915, &engine, 1, 1, 0); + if (err) { + pr_err("Failed to wrap engine %s: err=%d\n", + engine->name, err); + goto out_unlock; + } + } + + for (class = 0; class <= MAX_ENGINE_CLASS; class++) { + int nsibling, n; + + nsibling = 0; + for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) { + if (!i915->engine_class[class][inst]) + continue; + + siblings[nsibling++] = i915->engine_class[class][inst]; + } + if (nsibling < 2) + continue; + + for (n = 1; n <= nsibling + 1; n++) { + err = nop_virtual_engine(i915, siblings, nsibling, + n, 0); + if (err) + goto out_unlock; + } + + err = nop_virtual_engine(i915, siblings, nsibling, n, CHAIN); + if (err) + goto out_unlock; + } + +out_unlock: + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + +static int mask_virtual_engine(struct drm_i915_private *i915, + struct intel_engine_cs **siblings, + unsigned int nsibling) +{ + struct i915_request *request[MAX_ENGINE_INSTANCE + 1]; + struct i915_gem_context *ctx; + struct intel_context *ve; + struct igt_live_test t; + unsigned int n; + int err; + + /* + * Check that by setting the execution mask on a request, we can + * restrict it to our desired engine within the virtual engine. + */ + + ctx = kernel_context(i915); + if (!ctx) + return -ENOMEM; + + ve = intel_execlists_create_virtual(ctx, siblings, nsibling); + if (IS_ERR(ve)) { + err = PTR_ERR(ve); + goto out_close; + } + + err = intel_context_pin(ve); + if (err) + goto out_put; + + err = igt_live_test_begin(&t, i915, __func__, ve->engine->name); + if (err) + goto out_unpin; + + for (n = 0; n < nsibling; n++) { + request[n] = i915_request_create(ve); + if (IS_ERR(request)) { + err = PTR_ERR(request); + nsibling = n; + goto out; + } + + /* Reverse order as it's more likely to be unnatural */ + request[n]->execution_mask = siblings[nsibling - n - 1]->mask; + + i915_request_get(request[n]); + i915_request_add(request[n]); + } + + for (n = 0; n < nsibling; n++) { + if (i915_request_wait(request[n], I915_WAIT_LOCKED, HZ / 10) < 0) { + pr_err("%s(%s): wait for %llx:%lld timed out\n", + __func__, ve->engine->name, + request[n]->fence.context, + request[n]->fence.seqno); + + GEM_TRACE("%s(%s) failed at request %llx:%lld\n", + __func__, ve->engine->name, + request[n]->fence.context, + request[n]->fence.seqno); + GEM_TRACE_DUMP(); + i915_gem_set_wedged(i915); + err = -EIO; + goto out; + } + + if (request[n]->engine != siblings[nsibling - n - 1]) { + pr_err("Executed on wrong sibling '%s', expected '%s'\n", + request[n]->engine->name, + siblings[nsibling - n - 1]->name); + err = -EINVAL; + goto out; + } + } + + err = igt_live_test_end(&t); + if (err) + goto out; + +out: + if (igt_flush_test(i915, I915_WAIT_LOCKED)) + err = -EIO; + + for (n = 0; n < nsibling; n++) + i915_request_put(request[n]); + +out_unpin: + intel_context_unpin(ve); +out_put: + intel_context_put(ve); +out_close: + kernel_context_close(ctx); + return err; +} + +static int live_virtual_mask(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; + unsigned int class, inst; + int err = 0; + + if (USES_GUC_SUBMISSION(i915)) + return 0; + + mutex_lock(&i915->drm.struct_mutex); + + for (class = 0; class <= MAX_ENGINE_CLASS; class++) { + unsigned int nsibling; + + nsibling = 0; + for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) { + if (!i915->engine_class[class][inst]) + break; + + siblings[nsibling++] = i915->engine_class[class][inst]; + } + if (nsibling < 2) + continue; + + err = mask_virtual_engine(i915, siblings, nsibling); + if (err) + goto out_unlock; + } + +out_unlock: + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + +static int bond_virtual_engine(struct drm_i915_private *i915, + unsigned int class, + struct intel_engine_cs **siblings, + unsigned int nsibling, + unsigned int flags) +#define BOND_SCHEDULE BIT(0) +{ + struct intel_engine_cs *master; + struct i915_gem_context *ctx; + struct i915_request *rq[16]; + enum intel_engine_id id; + unsigned long n; + int err; + + GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1); + + ctx = kernel_context(i915); + if (!ctx) + return -ENOMEM; + + err = 0; + rq[0] = ERR_PTR(-ENOMEM); + for_each_engine(master, i915, id) { + struct i915_sw_fence fence = {}; + + if (master->class == class) + continue; + + memset_p((void *)rq, ERR_PTR(-EINVAL), ARRAY_SIZE(rq)); + + rq[0] = igt_request_alloc(ctx, master); + if (IS_ERR(rq[0])) { + err = PTR_ERR(rq[0]); + goto out; + } + i915_request_get(rq[0]); + + if (flags & BOND_SCHEDULE) { + onstack_fence_init(&fence); + err = i915_sw_fence_await_sw_fence_gfp(&rq[0]->submit, + &fence, + GFP_KERNEL); + } + i915_request_add(rq[0]); + if (err < 0) + goto out; + + for (n = 0; n < nsibling; n++) { + struct intel_context *ve; + + ve = intel_execlists_create_virtual(ctx, + siblings, + nsibling); + if (IS_ERR(ve)) { + err = PTR_ERR(ve); + onstack_fence_fini(&fence); + goto out; + } + + err = intel_virtual_engine_attach_bond(ve->engine, + master, + siblings[n]); + if (err) { + intel_context_put(ve); + onstack_fence_fini(&fence); + goto out; + } + + err = intel_context_pin(ve); + intel_context_put(ve); + if (err) { + onstack_fence_fini(&fence); + goto out; + } + + rq[n + 1] = i915_request_create(ve); + intel_context_unpin(ve); + if (IS_ERR(rq[n + 1])) { + err = PTR_ERR(rq[n + 1]); + onstack_fence_fini(&fence); + goto out; + } + i915_request_get(rq[n + 1]); + + err = i915_request_await_execution(rq[n + 1], + &rq[0]->fence, + ve->engine->bond_execute); + i915_request_add(rq[n + 1]); + if (err < 0) { + onstack_fence_fini(&fence); + goto out; + } + } + onstack_fence_fini(&fence); + + if (i915_request_wait(rq[0], + I915_WAIT_LOCKED, + HZ / 10) < 0) { + pr_err("Master request did not execute (on %s)!\n", + rq[0]->engine->name); + err = -EIO; + goto out; + } + + for (n = 0; n < nsibling; n++) { + if (i915_request_wait(rq[n + 1], + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT) < 0) { + err = -EIO; + goto out; + } + + if (rq[n + 1]->engine != siblings[n]) { + pr_err("Bonded request did not execute on target engine: expected %s, used %s; master was %s\n", + siblings[n]->name, + rq[n + 1]->engine->name, + rq[0]->engine->name); + err = -EINVAL; + goto out; + } + } + + for (n = 0; !IS_ERR(rq[n]); n++) + i915_request_put(rq[n]); + rq[0] = ERR_PTR(-ENOMEM); + } + +out: + for (n = 0; !IS_ERR(rq[n]); n++) + i915_request_put(rq[n]); + if (igt_flush_test(i915, I915_WAIT_LOCKED)) + err = -EIO; + + kernel_context_close(ctx); + return err; +} + +static int live_virtual_bond(void *arg) +{ + static const struct phase { + const char *name; + unsigned int flags; + } phases[] = { + { "", 0 }, + { "schedule", BOND_SCHEDULE }, + { }, + }; + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; + unsigned int class, inst; + int err = 0; + + if (USES_GUC_SUBMISSION(i915)) + return 0; + + mutex_lock(&i915->drm.struct_mutex); + + for (class = 0; class <= MAX_ENGINE_CLASS; class++) { + const struct phase *p; + int nsibling; + + nsibling = 0; + for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) { + if (!i915->engine_class[class][inst]) + break; + + GEM_BUG_ON(nsibling == ARRAY_SIZE(siblings)); + siblings[nsibling++] = i915->engine_class[class][inst]; + } + if (nsibling < 2) + continue; + + for (p = phases; p->name; p++) { + err = bond_virtual_engine(i915, + class, siblings, nsibling, + p->flags); + if (err) { + pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n", + __func__, p->name, class, nsibling, err); + goto out_unlock; + } + } + } + +out_unlock: + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + int intel_execlists_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { @@ -1318,6 +1821,9 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915) SUBTEST(live_chain_preempt), SUBTEST(live_preempt_hang), SUBTEST(live_preempt_smoke), + SUBTEST(live_virtual_engine), + SUBTEST(live_virtual_mask), + SUBTEST(live_virtual_bond), }; if (!HAS_EXECLISTS(i915)) diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c new file mode 100644 index 0000000000000000000000000000000000000000..607473439eb0da9b37825b333801d770157685e2 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/selftest_reset.c @@ -0,0 +1,118 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2018 Intel Corporation + */ + +#include "i915_selftest.h" +#include "selftests/igt_reset.h" +#include "selftests/igt_atomic.h" + +static int igt_global_reset(void *arg) +{ + struct drm_i915_private *i915 = arg; + unsigned int reset_count; + int err = 0; + + /* Check that we can issue a global GPU reset */ + + igt_global_reset_lock(i915); + + reset_count = i915_reset_count(&i915->gpu_error); + + i915_reset(i915, ALL_ENGINES, NULL); + + if (i915_reset_count(&i915->gpu_error) == reset_count) { + pr_err("No GPU reset recorded!\n"); + err = -EINVAL; + } + + igt_global_reset_unlock(i915); + + if (i915_reset_failed(i915)) + err = -EIO; + + return err; +} + +static int igt_wedged_reset(void *arg) +{ + struct drm_i915_private *i915 = arg; + intel_wakeref_t wakeref; + + /* Check that we can recover a wedged device with a GPU reset */ + + igt_global_reset_lock(i915); + wakeref = intel_runtime_pm_get(i915); + + i915_gem_set_wedged(i915); + + GEM_BUG_ON(!i915_reset_failed(i915)); + i915_reset(i915, ALL_ENGINES, NULL); + + intel_runtime_pm_put(i915, wakeref); + igt_global_reset_unlock(i915); + + return i915_reset_failed(i915) ? -EIO : 0; +} + +static int igt_atomic_reset(void *arg) +{ + struct drm_i915_private *i915 = arg; + const typeof(*igt_atomic_phases) *p; + int err = 0; + + /* Check that the resets are usable from atomic context */ + + igt_global_reset_lock(i915); + mutex_lock(&i915->drm.struct_mutex); + + /* Flush any requests before we get started and check basics */ + if (!igt_force_reset(i915)) + goto unlock; + + for (p = igt_atomic_phases; p->name; p++) { + GEM_TRACE("intel_gpu_reset under %s\n", p->name); + + p->critical_section_begin(); + reset_prepare(i915); + err = intel_gpu_reset(i915, ALL_ENGINES); + reset_finish(i915); + p->critical_section_end(); + + if (err) { + pr_err("intel_gpu_reset failed under %s\n", p->name); + break; + } + } + + /* As we poke around the guts, do a full reset before continuing. */ + igt_force_reset(i915); + +unlock: + mutex_unlock(&i915->drm.struct_mutex); + igt_global_reset_unlock(i915); + + return err; +} + +int intel_reset_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_global_reset), /* attempt to recover GPU first */ + SUBTEST(igt_wedged_reset), + SUBTEST(igt_atomic_reset), + }; + intel_wakeref_t wakeref; + int err = 0; + + if (!intel_has_gpu_reset(i915)) + return 0; + + if (i915_terminally_wedged(i915)) + return -EIO; /* we're long past hope of a successful reset */ + + with_intel_runtime_pm(i915, wakeref) + err = i915_subtests(tests, i915); + + return err; +} diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c similarity index 66% rename from drivers/gpu/drm/i915/selftests/intel_workarounds.c rename to drivers/gpu/drm/i915/gt/selftest_workarounds.c index 567b6f8dae861aa088c9832becb5db97191936a1..f9c9e7291187c7297d7b7e764c225b8cf0d24ea2 100644 --- a/drivers/gpu/drm/i915/selftests/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c @@ -4,15 +4,16 @@ * Copyright © 2018 Intel Corporation */ -#include "../i915_selftest.h" -#include "../i915_reset.h" +#include "i915_selftest.h" +#include "intel_reset.h" -#include "igt_flush_test.h" -#include "igt_reset.h" -#include "igt_spinner.h" -#include "igt_wedge_me.h" -#include "mock_context.h" -#include "mock_drm.h" +#include "selftests/igt_flush_test.h" +#include "selftests/igt_gem_utils.h" +#include "selftests/igt_reset.h" +#include "selftests/igt_spinner.h" +#include "selftests/igt_wedge_me.h" +#include "selftests/mock_context.h" +#include "selftests/mock_drm.h" static const struct wo_register { enum intel_platform platform; @@ -21,12 +22,13 @@ static const struct wo_register { { INTEL_GEMINILAKE, 0x731c } }; -#define REF_NAME_MAX (INTEL_ENGINE_CS_MAX_NAME + 4) +#define REF_NAME_MAX (INTEL_ENGINE_CS_MAX_NAME + 8) struct wa_lists { struct i915_wa_list gt_wa_list; struct { char name[REF_NAME_MAX]; struct i915_wa_list wa_list; + struct i915_wa_list ctx_wa_list; } engine[I915_NUM_ENGINES]; }; @@ -51,6 +53,12 @@ reference_lists_init(struct drm_i915_private *i915, struct wa_lists *lists) wa_init_start(wal, name); engine_init_workarounds(engine, wal); wa_init_finish(wal); + + snprintf(name, REF_NAME_MAX, "%s_CTX_REF", engine->name); + + __intel_engine_init_ctx_wa(engine, + &lists->engine[id].ctx_wa_list, + name); } } @@ -71,7 +79,6 @@ read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine) { const u32 base = engine->mmio_base; struct drm_i915_gem_object *result; - intel_wakeref_t wakeref; struct i915_request *rq; struct i915_vma *vma; u32 srm, *cs; @@ -103,9 +110,7 @@ read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine) if (err) goto err_obj; - rq = ERR_PTR(-ENODEV); - with_intel_runtime_pm(engine->i915, wakeref) - rq = i915_request_alloc(engine, ctx); + rq = igt_request_alloc(ctx, engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto err_pin; @@ -340,49 +345,6 @@ static int check_whitelist_across_reset(struct intel_engine_cs *engine, return err; } -static struct i915_vma *create_scratch(struct i915_gem_context *ctx) -{ - struct drm_i915_gem_object *obj; - struct i915_vma *vma; - void *ptr; - int err; - - obj = i915_gem_object_create_internal(ctx->i915, PAGE_SIZE); - if (IS_ERR(obj)) - return ERR_CAST(obj); - - i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC); - - ptr = i915_gem_object_pin_map(obj, I915_MAP_WB); - if (IS_ERR(ptr)) { - err = PTR_ERR(ptr); - goto err_obj; - } - memset(ptr, 0xc5, PAGE_SIZE); - i915_gem_object_flush_map(obj); - i915_gem_object_unpin_map(obj); - - vma = i915_vma_instance(obj, &ctx->ppgtt->vm, NULL); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto err_obj; - } - - err = i915_vma_pin(vma, 0, 0, PIN_USER); - if (err) - goto err_obj; - - err = i915_gem_object_set_to_cpu_domain(obj, false); - if (err) - goto err_obj; - - return vma; - -err_obj: - i915_gem_object_put(obj); - return ERR_PTR(err); -} - static struct i915_vma *create_batch(struct i915_gem_context *ctx) { struct drm_i915_gem_object *obj; @@ -475,7 +437,7 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx, int err = 0, i, v; u32 *cs, *results; - scratch = create_scratch(ctx); + scratch = create_scratch(&ctx->ppgtt->vm, 2 * ARRAY_SIZE(values) + 1); if (IS_ERR(scratch)) return PTR_ERR(scratch); @@ -557,7 +519,7 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx, i915_gem_object_unpin_map(batch->obj); i915_gem_chipset_flush(ctx->i915); - rq = i915_request_alloc(engine, ctx); + rq = igt_request_alloc(ctx, engine); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto out_batch; @@ -743,26 +705,343 @@ static int live_reset_whitelist(void *arg) return err; } -static bool verify_gt_engine_wa(struct drm_i915_private *i915, - struct wa_lists *lists, const char *str) +static int read_whitelisted_registers(struct i915_gem_context *ctx, + struct intel_engine_cs *engine, + struct i915_vma *results) +{ + struct i915_request *rq; + int i, err = 0; + u32 srm, *cs; + + rq = igt_request_alloc(ctx, engine); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + srm = MI_STORE_REGISTER_MEM; + if (INTEL_GEN(ctx->i915) >= 8) + srm++; + + cs = intel_ring_begin(rq, 4 * engine->whitelist.count); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + goto err_req; + } + + for (i = 0; i < engine->whitelist.count; i++) { + u64 offset = results->node.start + sizeof(u32) * i; + + *cs++ = srm; + *cs++ = i915_mmio_reg_offset(engine->whitelist.list[i].reg); + *cs++ = lower_32_bits(offset); + *cs++ = upper_32_bits(offset); + } + intel_ring_advance(rq, cs); + +err_req: + i915_request_add(rq); + + if (i915_request_wait(rq, I915_WAIT_LOCKED, HZ / 5) < 0) + err = -EIO; + + return err; +} + +static int scrub_whitelisted_registers(struct i915_gem_context *ctx, + struct intel_engine_cs *engine) +{ + struct i915_request *rq; + struct i915_vma *batch; + int i, err = 0; + u32 *cs; + + batch = create_batch(ctx); + if (IS_ERR(batch)) + return PTR_ERR(batch); + + cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + goto err_batch; + } + + *cs++ = MI_LOAD_REGISTER_IMM(engine->whitelist.count); + for (i = 0; i < engine->whitelist.count; i++) { + *cs++ = i915_mmio_reg_offset(engine->whitelist.list[i].reg); + *cs++ = 0xffffffff; + } + *cs++ = MI_BATCH_BUFFER_END; + + i915_gem_object_flush_map(batch->obj); + i915_gem_chipset_flush(ctx->i915); + + rq = igt_request_alloc(ctx, engine); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_unpin; + } + + if (engine->emit_init_breadcrumb) { /* Be nice if we hang */ + err = engine->emit_init_breadcrumb(rq); + if (err) + goto err_request; + } + + /* Perform the writes from an unprivileged "user" batch */ + err = engine->emit_bb_start(rq, batch->node.start, 0, 0); + +err_request: + i915_request_add(rq); + if (i915_request_wait(rq, I915_WAIT_LOCKED, HZ / 5) < 0) + err = -EIO; + +err_unpin: + i915_gem_object_unpin_map(batch->obj); +err_batch: + i915_vma_unpin_and_release(&batch, 0); + return err; +} + +struct regmask { + i915_reg_t reg; + unsigned long gen_mask; +}; + +static bool find_reg(struct drm_i915_private *i915, + i915_reg_t reg, + const struct regmask *tbl, + unsigned long count) +{ + u32 offset = i915_mmio_reg_offset(reg); + + while (count--) { + if (INTEL_INFO(i915)->gen_mask & tbl->gen_mask && + i915_mmio_reg_offset(tbl->reg) == offset) + return true; + tbl++; + } + + return false; +} + +static bool pardon_reg(struct drm_i915_private *i915, i915_reg_t reg) +{ + /* Alas, we must pardon some whitelists. Mistakes already made */ + static const struct regmask pardon[] = { + { GEN9_CTX_PREEMPT_REG, INTEL_GEN_MASK(9, 9) }, + { GEN8_L3SQCREG4, INTEL_GEN_MASK(9, 9) }, + }; + + return find_reg(i915, reg, pardon, ARRAY_SIZE(pardon)); +} + +static bool result_eq(struct intel_engine_cs *engine, + u32 a, u32 b, i915_reg_t reg) +{ + if (a != b && !pardon_reg(engine->i915, reg)) { + pr_err("Whitelisted register 0x%4x not context saved: A=%08x, B=%08x\n", + i915_mmio_reg_offset(reg), a, b); + return false; + } + + return true; +} + +static bool writeonly_reg(struct drm_i915_private *i915, i915_reg_t reg) +{ + /* Some registers do not seem to behave and our writes unreadable */ + static const struct regmask wo[] = { + { GEN9_SLICE_COMMON_ECO_CHICKEN1, INTEL_GEN_MASK(9, 9) }, + }; + + return find_reg(i915, reg, wo, ARRAY_SIZE(wo)); +} + +static bool result_neq(struct intel_engine_cs *engine, + u32 a, u32 b, i915_reg_t reg) +{ + if (a == b && !writeonly_reg(engine->i915, reg)) { + pr_err("Whitelist register 0x%4x:%08x was unwritable\n", + i915_mmio_reg_offset(reg), a); + return false; + } + + return true; +} + +static int +check_whitelisted_registers(struct intel_engine_cs *engine, + struct i915_vma *A, + struct i915_vma *B, + bool (*fn)(struct intel_engine_cs *engine, + u32 a, u32 b, + i915_reg_t reg)) +{ + u32 *a, *b; + int i, err; + + a = i915_gem_object_pin_map(A->obj, I915_MAP_WB); + if (IS_ERR(a)) + return PTR_ERR(a); + + b = i915_gem_object_pin_map(B->obj, I915_MAP_WB); + if (IS_ERR(b)) { + err = PTR_ERR(b); + goto err_a; + } + + err = 0; + for (i = 0; i < engine->whitelist.count; i++) { + if (!fn(engine, a[i], b[i], engine->whitelist.list[i].reg)) + err = -EINVAL; + } + + i915_gem_object_unpin_map(B->obj); +err_a: + i915_gem_object_unpin_map(A->obj); + return err; +} + +static int live_isolated_whitelist(void *arg) { + struct drm_i915_private *i915 = arg; + struct { + struct i915_gem_context *ctx; + struct i915_vma *scratch[2]; + } client[2] = {}; struct intel_engine_cs *engine; enum intel_engine_id id; + int i, err = 0; + + /* + * Check that a write into a whitelist register works, but + * invisible to a second context. + */ + + if (!intel_engines_has_context_isolation(i915)) + return 0; + + if (!i915->kernel_context->ppgtt) + return 0; + + for (i = 0; i < ARRAY_SIZE(client); i++) { + struct i915_gem_context *c; + + c = kernel_context(i915); + if (IS_ERR(c)) { + err = PTR_ERR(c); + goto err; + } + + client[i].scratch[0] = create_scratch(&c->ppgtt->vm, 1024); + if (IS_ERR(client[i].scratch[0])) { + err = PTR_ERR(client[i].scratch[0]); + kernel_context_close(c); + goto err; + } + + client[i].scratch[1] = create_scratch(&c->ppgtt->vm, 1024); + if (IS_ERR(client[i].scratch[1])) { + err = PTR_ERR(client[i].scratch[1]); + i915_vma_unpin_and_release(&client[i].scratch[0], 0); + kernel_context_close(c); + goto err; + } + + client[i].ctx = c; + } + + for_each_engine(engine, i915, id) { + if (!engine->whitelist.count) + continue; + + /* Read default values */ + err = read_whitelisted_registers(client[0].ctx, engine, + client[0].scratch[0]); + if (err) + goto err; + + /* Try to overwrite registers (should only affect ctx0) */ + err = scrub_whitelisted_registers(client[0].ctx, engine); + if (err) + goto err; + + /* Read values from ctx1, we expect these to be defaults */ + err = read_whitelisted_registers(client[1].ctx, engine, + client[1].scratch[0]); + if (err) + goto err; + + /* Verify that both reads return the same default values */ + err = check_whitelisted_registers(engine, + client[0].scratch[0], + client[1].scratch[0], + result_eq); + if (err) + goto err; + + /* Read back the updated values in ctx0 */ + err = read_whitelisted_registers(client[0].ctx, engine, + client[0].scratch[1]); + if (err) + goto err; + + /* User should be granted privilege to overwhite regs */ + err = check_whitelisted_registers(engine, + client[0].scratch[0], + client[0].scratch[1], + result_neq); + if (err) + goto err; + } + +err: + for (i = 0; i < ARRAY_SIZE(client); i++) { + if (!client[i].ctx) + break; + + i915_vma_unpin_and_release(&client[i].scratch[1], 0); + i915_vma_unpin_and_release(&client[i].scratch[0], 0); + kernel_context_close(client[i].ctx); + } + + if (igt_flush_test(i915, I915_WAIT_LOCKED)) + err = -EIO; + + return err; +} + +static bool +verify_wa_lists(struct i915_gem_context *ctx, struct wa_lists *lists, + const char *str) +{ + struct drm_i915_private *i915 = ctx->i915; + struct i915_gem_engines_iter it; + struct intel_context *ce; bool ok = true; ok &= wa_list_verify(&i915->uncore, &lists->gt_wa_list, str); - for_each_engine(engine, i915, id) - ok &= wa_list_verify(engine->uncore, - &lists->engine[id].wa_list, str); + for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { + enum intel_engine_id id = ce->engine->id; + + ok &= engine_wa_list_verify(ce, + &lists->engine[id].wa_list, + str) == 0; + + ok &= engine_wa_list_verify(ce, + &lists->engine[id].ctx_wa_list, + str) == 0; + } + i915_gem_context_unlock_engines(ctx); return ok; } static int -live_gpu_reset_gt_engine_workarounds(void *arg) +live_gpu_reset_workarounds(void *arg) { struct drm_i915_private *i915 = arg; + struct i915_gem_context *ctx; intel_wakeref_t wakeref; struct wa_lists lists; bool ok; @@ -770,6 +1049,10 @@ live_gpu_reset_gt_engine_workarounds(void *arg) if (!intel_has_gpu_reset(i915)) return 0; + ctx = kernel_context(i915); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + pr_info("Verifying after GPU reset...\n"); igt_global_reset_lock(i915); @@ -777,15 +1060,16 @@ live_gpu_reset_gt_engine_workarounds(void *arg) reference_lists_init(i915, &lists); - ok = verify_gt_engine_wa(i915, &lists, "before reset"); + ok = verify_wa_lists(ctx, &lists, "before reset"); if (!ok) goto out; i915_reset(i915, ALL_ENGINES, "live_workarounds"); - ok = verify_gt_engine_wa(i915, &lists, "after reset"); + ok = verify_wa_lists(ctx, &lists, "after reset"); out: + kernel_context_close(ctx); reference_lists_fini(i915, &lists); intel_runtime_pm_put(i915, wakeref); igt_global_reset_unlock(i915); @@ -794,7 +1078,7 @@ live_gpu_reset_gt_engine_workarounds(void *arg) } static int -live_engine_reset_gt_engine_workarounds(void *arg) +live_engine_reset_workarounds(void *arg) { struct drm_i915_private *i915 = arg; struct intel_engine_cs *engine; @@ -823,7 +1107,7 @@ live_engine_reset_gt_engine_workarounds(void *arg) pr_info("Verifying after %s reset...\n", engine->name); - ok = verify_gt_engine_wa(i915, &lists, "before reset"); + ok = verify_wa_lists(ctx, &lists, "before reset"); if (!ok) { ret = -ESRCH; goto err; @@ -831,7 +1115,7 @@ live_engine_reset_gt_engine_workarounds(void *arg) i915_reset_engine(engine, "live_workarounds"); - ok = verify_gt_engine_wa(i915, &lists, "after idle reset"); + ok = verify_wa_lists(ctx, &lists, "after idle reset"); if (!ok) { ret = -ESRCH; goto err; @@ -862,7 +1146,7 @@ live_engine_reset_gt_engine_workarounds(void *arg) igt_spinner_end(&spin); igt_spinner_fini(&spin); - ok = verify_gt_engine_wa(i915, &lists, "after busy reset"); + ok = verify_wa_lists(ctx, &lists, "after busy reset"); if (!ok) { ret = -ESRCH; goto err; @@ -885,8 +1169,9 @@ int intel_workarounds_live_selftests(struct drm_i915_private *i915) static const struct i915_subtest tests[] = { SUBTEST(live_dirty_whitelist), SUBTEST(live_reset_whitelist), - SUBTEST(live_gpu_reset_gt_engine_workarounds), - SUBTEST(live_engine_reset_gt_engine_workarounds), + SUBTEST(live_isolated_whitelist), + SUBTEST(live_gpu_reset_workarounds), + SUBTEST(live_engine_reset_workarounds), }; int err; diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index f5a328b5290a59d9e9e77866cb6742762ae6083e..b54f2bdc13a400c5995463c6f0e375b97c5d10b8 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -149,9 +149,9 @@ struct intel_vgpu_submission_ops { struct intel_vgpu_submission { struct intel_vgpu_execlist execlist[I915_NUM_ENGINES]; struct list_head workload_q_head[I915_NUM_ENGINES]; + struct intel_context *shadow[I915_NUM_ENGINES]; struct kmem_cache *workloads; atomic_t running_workload_num; - struct i915_gem_context *shadow_ctx; union { u64 i915_context_pml4; u64 i915_context_pdps[GEN8_3LVL_PDPES]; diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index a68addf95c230f2edcc9b5b21860e9aee406bc27..144301b778df275e1327217de3eb497ac5317252 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -1576,7 +1576,7 @@ hw_id_show(struct device *dev, struct device_attribute *attr, struct intel_vgpu *vgpu = (struct intel_vgpu *) mdev_get_drvdata(mdev); return sprintf(buf, "%u\n", - vgpu->submission.shadow_ctx->hw_id); + vgpu->submission.shadow[0]->gem_context->hw_id); } return sprintf(buf, "\n"); } diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c index 90bb3df0db5035214d66b57d52b92b9685f5512c..96e1edf21b3f73f4d1605f525f5a1e7b6b580305 100644 --- a/drivers/gpu/drm/i915/gvt/mmio_context.c +++ b/drivers/gpu/drm/i915/gvt/mmio_context.c @@ -493,8 +493,7 @@ static void switch_mmio(struct intel_vgpu *pre, * itself. */ if (mmio->in_context && - !is_inhibit_context(intel_context_lookup(s->shadow_ctx, - dev_priv->engine[ring_id]))) + !is_inhibit_context(s->shadow[ring_id])) continue; if (mmio->mask) diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 13632dba8b2afb516effcfa1ec79bdabb1e692d6..38897d241f5f161aa22af929a17f25ab8373d0b1 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -36,6 +36,7 @@ #include #include "i915_drv.h" +#include "i915_gem_pm.h" #include "gvt.h" #define RING_CTX_OFF(x) \ @@ -277,18 +278,23 @@ static int shadow_context_status_change(struct notifier_block *nb, return NOTIFY_OK; } -static void shadow_context_descriptor_update(struct intel_context *ce) +static void +shadow_context_descriptor_update(struct intel_context *ce, + struct intel_vgpu_workload *workload) { - u64 desc = 0; - - desc = ce->lrc_desc; + u64 desc = ce->lrc_desc; - /* Update bits 0-11 of the context descriptor which includes flags + /* + * Update bits 0-11 of the context descriptor which includes flags * like GEN8_CTX_* cached in desc_template */ desc &= U64_MAX << 12; desc |= ce->gem_context->desc_template & ((1ULL << 12) - 1); + desc &= ~(0x3 << GEN8_CTX_ADDRESSING_MODE_SHIFT); + desc |= workload->ctx_desc.addressing_mode << + GEN8_CTX_ADDRESSING_MODE_SHIFT; + ce->lrc_desc = desc; } @@ -382,26 +388,22 @@ intel_gvt_workload_req_alloc(struct intel_vgpu_workload *workload) { struct intel_vgpu *vgpu = workload->vgpu; struct intel_vgpu_submission *s = &vgpu->submission; - struct i915_gem_context *shadow_ctx = s->shadow_ctx; struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; - struct intel_engine_cs *engine = dev_priv->engine[workload->ring_id]; struct i915_request *rq; - int ret = 0; lockdep_assert_held(&dev_priv->drm.struct_mutex); if (workload->req) - goto out; + return 0; - rq = i915_request_alloc(engine, shadow_ctx); + rq = i915_request_create(s->shadow[workload->ring_id]); if (IS_ERR(rq)) { gvt_vgpu_err("fail to allocate gem request\n"); - ret = PTR_ERR(rq); - goto out; + return PTR_ERR(rq); } + workload->req = i915_request_get(rq); -out: - return ret; + return 0; } /** @@ -416,10 +418,7 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload) { struct intel_vgpu *vgpu = workload->vgpu; struct intel_vgpu_submission *s = &vgpu->submission; - struct i915_gem_context *shadow_ctx = s->shadow_ctx; struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; - struct intel_engine_cs *engine = dev_priv->engine[workload->ring_id]; - struct intel_context *ce; int ret; lockdep_assert_held(&dev_priv->drm.struct_mutex); @@ -427,29 +426,13 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload) if (workload->shadow) return 0; - /* pin shadow context by gvt even the shadow context will be pinned - * when i915 alloc request. That is because gvt will update the guest - * context from shadow context when workload is completed, and at that - * moment, i915 may already unpined the shadow context to make the - * shadow_ctx pages invalid. So gvt need to pin itself. After update - * the guest context, gvt can unpin the shadow_ctx safely. - */ - ce = intel_context_pin(shadow_ctx, engine); - if (IS_ERR(ce)) { - gvt_vgpu_err("fail to pin shadow context\n"); - return PTR_ERR(ce); - } - - shadow_ctx->desc_template &= ~(0x3 << GEN8_CTX_ADDRESSING_MODE_SHIFT); - shadow_ctx->desc_template |= workload->ctx_desc.addressing_mode << - GEN8_CTX_ADDRESSING_MODE_SHIFT; - if (!test_and_set_bit(workload->ring_id, s->shadow_ctx_desc_updated)) - shadow_context_descriptor_update(ce); + shadow_context_descriptor_update(s->shadow[workload->ring_id], + workload); ret = intel_gvt_scan_and_shadow_ringbuffer(workload); if (ret) - goto err_unpin; + return ret; if (workload->ring_id == RCS0 && workload->wa_ctx.indirect_ctx.size) { ret = intel_gvt_scan_and_shadow_wa_ctx(&workload->wa_ctx); @@ -461,8 +444,6 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload) return 0; err_shadow: release_shadow_wa_ctx(&workload->wa_ctx); -err_unpin: - intel_context_unpin(ce); return ret; } @@ -689,7 +670,6 @@ static int dispatch_workload(struct intel_vgpu_workload *workload) struct intel_vgpu *vgpu = workload->vgpu; struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; struct intel_vgpu_submission *s = &vgpu->submission; - struct i915_gem_context *shadow_ctx = s->shadow_ctx; struct i915_request *rq; int ring_id = workload->ring_id; int ret; @@ -700,7 +680,8 @@ static int dispatch_workload(struct intel_vgpu_workload *workload) mutex_lock(&vgpu->vgpu_lock); mutex_lock(&dev_priv->drm.struct_mutex); - ret = set_context_ppgtt_from_shadow(workload, shadow_ctx); + ret = set_context_ppgtt_from_shadow(workload, + s->shadow[ring_id]->gem_context); if (ret < 0) { gvt_vgpu_err("workload shadow ppgtt isn't ready\n"); goto err_req; @@ -928,11 +909,6 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id) intel_vgpu_trigger_virtual_event(vgpu, event); } - /* unpin shadow ctx as the shadow_ctx update is done */ - mutex_lock(&rq->i915->drm.struct_mutex); - intel_context_unpin(rq->hw_context); - mutex_unlock(&rq->i915->drm.struct_mutex); - i915_request_put(fetch_and_zero(&workload->req)); } @@ -1011,8 +987,6 @@ static int workload_thread(void *priv) workload->ring_id, workload, workload->vgpu->id); - intel_runtime_pm_get(gvt->dev_priv); - gvt_dbg_sched("ring id %d will dispatch workload %p\n", workload->ring_id, workload); @@ -1042,7 +1016,6 @@ static int workload_thread(void *priv) intel_uncore_forcewake_put(&gvt->dev_priv->uncore, FORCEWAKE_ALL); - intel_runtime_pm_put_unchecked(gvt->dev_priv); if (ret && (vgpu_is_vm_unhealthy(ret))) enter_failsafe_mode(vgpu, GVT_FAILSAFE_GUEST_ERR); } @@ -1125,17 +1098,17 @@ int intel_gvt_init_workload_scheduler(struct intel_gvt *gvt) } static void -i915_context_ppgtt_root_restore(struct intel_vgpu_submission *s) +i915_context_ppgtt_root_restore(struct intel_vgpu_submission *s, + struct i915_hw_ppgtt *ppgtt) { - struct i915_hw_ppgtt *i915_ppgtt = s->shadow_ctx->ppgtt; int i; - if (i915_vm_is_4lvl(&i915_ppgtt->vm)) { - px_dma(&i915_ppgtt->pml4) = s->i915_context_pml4; + if (i915_vm_is_4lvl(&ppgtt->vm)) { + px_dma(&ppgtt->pml4) = s->i915_context_pml4; } else { for (i = 0; i < GEN8_3LVL_PDPES; i++) - px_dma(i915_ppgtt->pdp.page_directory[i]) = - s->i915_context_pdps[i]; + px_dma(ppgtt->pdp.page_directory[i]) = + s->i915_context_pdps[i]; } } @@ -1149,10 +1122,15 @@ i915_context_ppgtt_root_restore(struct intel_vgpu_submission *s) void intel_vgpu_clean_submission(struct intel_vgpu *vgpu) { struct intel_vgpu_submission *s = &vgpu->submission; + struct intel_engine_cs *engine; + enum intel_engine_id id; intel_vgpu_select_submission_ops(vgpu, ALL_ENGINES, 0); - i915_context_ppgtt_root_restore(s); - i915_gem_context_put(s->shadow_ctx); + + i915_context_ppgtt_root_restore(s, s->shadow[0]->gem_context->ppgtt); + for_each_engine(engine, vgpu->gvt->dev_priv, id) + intel_context_unpin(s->shadow[id]); + kmem_cache_destroy(s->workloads); } @@ -1178,17 +1156,17 @@ void intel_vgpu_reset_submission(struct intel_vgpu *vgpu, } static void -i915_context_ppgtt_root_save(struct intel_vgpu_submission *s) +i915_context_ppgtt_root_save(struct intel_vgpu_submission *s, + struct i915_hw_ppgtt *ppgtt) { - struct i915_hw_ppgtt *i915_ppgtt = s->shadow_ctx->ppgtt; int i; - if (i915_vm_is_4lvl(&i915_ppgtt->vm)) - s->i915_context_pml4 = px_dma(&i915_ppgtt->pml4); - else { + if (i915_vm_is_4lvl(&ppgtt->vm)) { + s->i915_context_pml4 = px_dma(&ppgtt->pml4); + } else { for (i = 0; i < GEN8_3LVL_PDPES; i++) s->i915_context_pdps[i] = - px_dma(i915_ppgtt->pdp.page_directory[i]); + px_dma(ppgtt->pdp.page_directory[i]); } } @@ -1205,16 +1183,36 @@ i915_context_ppgtt_root_save(struct intel_vgpu_submission *s) int intel_vgpu_setup_submission(struct intel_vgpu *vgpu) { struct intel_vgpu_submission *s = &vgpu->submission; - enum intel_engine_id i; struct intel_engine_cs *engine; + struct i915_gem_context *ctx; + enum intel_engine_id i; int ret; - s->shadow_ctx = i915_gem_context_create_gvt( - &vgpu->gvt->dev_priv->drm); - if (IS_ERR(s->shadow_ctx)) - return PTR_ERR(s->shadow_ctx); + ctx = i915_gem_context_create_gvt(&vgpu->gvt->dev_priv->drm); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + i915_context_ppgtt_root_save(s, ctx->ppgtt); + + for_each_engine(engine, vgpu->gvt->dev_priv, i) { + struct intel_context *ce; + + INIT_LIST_HEAD(&s->workload_q_head[i]); + s->shadow[i] = ERR_PTR(-EINVAL); + + ce = i915_gem_context_get_engine(ctx, i); + if (IS_ERR(ce)) { + ret = PTR_ERR(ce); + goto out_shadow_ctx; + } - i915_context_ppgtt_root_save(s); + ret = intel_context_pin(ce); + intel_context_put(ce); + if (ret) + goto out_shadow_ctx; + + s->shadow[i] = ce; + } bitmap_zero(s->shadow_ctx_desc_updated, I915_NUM_ENGINES); @@ -1230,16 +1228,21 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu) goto out_shadow_ctx; } - for_each_engine(engine, vgpu->gvt->dev_priv, i) - INIT_LIST_HEAD(&s->workload_q_head[i]); - atomic_set(&s->running_workload_num, 0); bitmap_zero(s->tlb_handle_pending, I915_NUM_ENGINES); + i915_gem_context_put(ctx); return 0; out_shadow_ctx: - i915_gem_context_put(s->shadow_ctx); + i915_context_ppgtt_root_restore(s, ctx->ppgtt); + for_each_engine(engine, vgpu->gvt->dev_priv, i) { + if (IS_ERR(s->shadow[i])) + break; + + intel_context_unpin(s->shadow[i]); + } + i915_gem_context_put(ctx); return ret; } diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 503d548a55f7d5003b3f676e12be6da0b81e4721..e9fadcb4d59248ae34e0bbb7c3d6c4a47e92ee1a 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -25,8 +25,9 @@ * */ +#include "gt/intel_engine.h" + #include "i915_drv.h" -#include "intel_ringbuffer.h" /** * DOC: batch buffer command parser diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 5823ffb17821d78939087d8dee8f4df6797d0268..633a08c0f9079f65051b41557bf8946602d4c6a2 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -32,7 +32,12 @@ #include #include -#include "i915_reset.h" +#include "gt/intel_reset.h" + +#include "i915_debugfs.h" +#include "i915_gem_context.h" +#include "i915_irq.h" +#include "intel_csr.h" #include "intel_dp.h" #include "intel_drv.h" #include "intel_fbc.h" @@ -41,6 +46,7 @@ #include "intel_hdmi.h" #include "intel_pm.h" #include "intel_psr.h" +#include "intel_sideband.h" static inline struct drm_i915_private *node_to_i915(struct drm_info_node *node) { @@ -206,6 +212,18 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) vma->ggtt_view.rotated.plane[1].offset); break; + case I915_GGTT_VIEW_REMAPPED: + seq_printf(m, ", remapped [(%ux%u, stride=%u, offset=%u), (%ux%u, stride=%u, offset=%u)]", + vma->ggtt_view.remapped.plane[0].width, + vma->ggtt_view.remapped.plane[0].height, + vma->ggtt_view.remapped.plane[0].stride, + vma->ggtt_view.remapped.plane[0].offset, + vma->ggtt_view.remapped.plane[1].width, + vma->ggtt_view.remapped.plane[1].height, + vma->ggtt_view.remapped.plane[1].stride, + vma->ggtt_view.remapped.plane[1].offset); + break; + default: MISSING_CASE(vma->ggtt_view.type); break; @@ -395,14 +413,17 @@ static void print_context_stats(struct seq_file *m, struct i915_gem_context *ctx; list_for_each_entry(ctx, &i915->contexts.list, link) { + struct i915_gem_engines_iter it; struct intel_context *ce; - list_for_each_entry(ce, &ctx->active_engines, active_link) { + for_each_gem_engine(ce, + i915_gem_context_lock_engines(ctx), it) { if (ce->state) per_file_stats(0, ce->state->obj, &kstats); if (ce->ring) per_file_stats(0, ce->ring->vma->obj, &kstats); } + i915_gem_context_unlock_engines(ctx); if (!IS_ERR_OR_NULL(ctx->file_priv)) { struct file_stats stats = { .vm = &ctx->ppgtt->vm, }; @@ -1045,8 +1066,6 @@ static int i915_frequency_info(struct seq_file *m, void *unused) } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { u32 rpmodectl, freq_sts; - mutex_lock(&dev_priv->pcu_lock); - rpmodectl = I915_READ(GEN6_RP_CONTROL); seq_printf(m, "Video Turbo Mode: %s\n", yesno(rpmodectl & GEN6_RP_MEDIA_TURBO)); @@ -1056,7 +1075,10 @@ static int i915_frequency_info(struct seq_file *m, void *unused) yesno((rpmodectl & GEN6_RP_MEDIA_MODE_MASK) == GEN6_RP_MEDIA_SW_MODE)); + vlv_punit_get(dev_priv); freq_sts = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); + vlv_punit_put(dev_priv); + seq_printf(m, "PUNIT_REG_GPU_FREQ_STS: 0x%08x\n", freq_sts); seq_printf(m, "DDR freq: %d MHz\n", dev_priv->mem_freq); @@ -1078,7 +1100,6 @@ static int i915_frequency_info(struct seq_file *m, void *unused) seq_printf(m, "efficient (RPe) frequency: %d MHz\n", intel_gpu_freq(dev_priv, rps->efficient_freq)); - mutex_unlock(&dev_priv->pcu_lock); } else if (INTEL_GEN(dev_priv) >= 6) { u32 rp_state_limits; u32 gt_perf_status; @@ -1279,7 +1300,6 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) struct drm_i915_private *dev_priv = node_to_i915(m->private); struct intel_engine_cs *engine; u64 acthd[I915_NUM_ENGINES]; - u32 seqno[I915_NUM_ENGINES]; struct intel_instdone instdone; intel_wakeref_t wakeref; enum intel_engine_id id; @@ -1296,10 +1316,8 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) } with_intel_runtime_pm(dev_priv, wakeref) { - for_each_engine(engine, dev_priv, id) { + for_each_engine(engine, dev_priv, id) acthd[id] = intel_engine_get_active_head(engine); - seqno[id] = intel_engine_get_hangcheck_seqno(engine); - } intel_engine_get_instdone(dev_priv->engine[RCS0], &instdone); } @@ -1316,11 +1334,8 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) seq_printf(m, "GT active? %s\n", yesno(dev_priv->gt.awake)); for_each_engine(engine, dev_priv, id) { - seq_printf(m, "%s:\n", engine->name); - seq_printf(m, "\tseqno = %x [current %x, last %x], %dms ago\n", - engine->hangcheck.last_seqno, - seqno[id], - engine->hangcheck.next_seqno, + seq_printf(m, "%s: %d ms ago\n", + engine->name, jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp)); @@ -1483,12 +1498,9 @@ static int gen6_drpc_info(struct seq_file *m) gen9_powergate_status = I915_READ(GEN9_PWRGT_DOMAIN_STATUS); } - if (INTEL_GEN(dev_priv) <= 7) { - mutex_lock(&dev_priv->pcu_lock); + if (INTEL_GEN(dev_priv) <= 7) sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids); - mutex_unlock(&dev_priv->pcu_lock); - } seq_printf(m, "RC1e Enabled: %s\n", yesno(rcctl1 & GEN6_RC_CTL_RC1e_ENABLE)); @@ -1752,17 +1764,10 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused) unsigned int max_gpu_freq, min_gpu_freq; intel_wakeref_t wakeref; int gpu_freq, ia_freq; - int ret; if (!HAS_LLC(dev_priv)) return -ENODEV; - wakeref = intel_runtime_pm_get(dev_priv); - - ret = mutex_lock_interruptible(&dev_priv->pcu_lock); - if (ret) - goto out; - min_gpu_freq = rps->min_freq; max_gpu_freq = rps->max_freq; if (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) { @@ -1773,6 +1778,7 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused) seq_puts(m, "GPU freq (MHz)\tEffective CPU freq (MHz)\tEffective Ring freq (MHz)\n"); + wakeref = intel_runtime_pm_get(dev_priv); for (gpu_freq = min_gpu_freq; gpu_freq <= max_gpu_freq; gpu_freq++) { ia_freq = gpu_freq; sandybridge_pcode_read(dev_priv, @@ -1786,12 +1792,9 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused) ((ia_freq >> 0) & 0xff) * 100, ((ia_freq >> 8) & 0xff) * 100); } - - mutex_unlock(&dev_priv->pcu_lock); - -out: intel_runtime_pm_put(dev_priv, wakeref); - return ret; + + return 0; } static int i915_opregion(struct seq_file *m, void *unused) @@ -1892,6 +1895,7 @@ static int i915_context_status(struct seq_file *m, void *unused) return ret; list_for_each_entry(ctx, &dev_priv->contexts.list, link) { + struct i915_gem_engines_iter it; struct intel_context *ce; seq_puts(m, "HW context "); @@ -1916,7 +1920,8 @@ static int i915_context_status(struct seq_file *m, void *unused) seq_putc(m, ctx->remap_slice ? 'R' : 'r'); seq_putc(m, '\n'); - list_for_each_entry(ce, &ctx->active_engines, active_link) { + for_each_gem_engine(ce, + i915_gem_context_lock_engines(ctx), it) { seq_printf(m, "%s: ", ce->engine->name); if (ce->state) describe_obj(m, ce->state->obj); @@ -1924,6 +1929,7 @@ static int i915_context_status(struct seq_file *m, void *unused) describe_ctx_ring(m, ce->ring); seq_putc(m, '\n'); } + i915_gem_context_unlock_engines(ctx); seq_putc(m, '\n'); } @@ -2028,11 +2034,11 @@ static int i915_rps_boost_info(struct seq_file *m, void *data) with_intel_runtime_pm_if_in_use(dev_priv, wakeref) { if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { - mutex_lock(&dev_priv->pcu_lock); + vlv_punit_get(dev_priv); act_freq = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); + vlv_punit_put(dev_priv); act_freq = (act_freq >> 8) & 0xff; - mutex_unlock(&dev_priv->pcu_lock); } else { act_freq = intel_get_cagf(dev_priv, I915_READ(GEN6_RPSTAT1)); @@ -2040,8 +2046,7 @@ static int i915_rps_boost_info(struct seq_file *m, void *data) } seq_printf(m, "RPS enabled? %d\n", rps->enabled); - seq_printf(m, "GPU busy? %s [%d requests]\n", - yesno(dev_priv->gt.awake), dev_priv->gt.active_requests); + seq_printf(m, "GPU busy? %s\n", yesno(dev_priv->gt.awake)); seq_printf(m, "Boosts outstanding? %d\n", atomic_read(&rps->num_waiters)); seq_printf(m, "Interactive? %d\n", READ_ONCE(rps->power.interactive)); @@ -2060,9 +2065,7 @@ static int i915_rps_boost_info(struct seq_file *m, void *data) seq_printf(m, "Wait boosts: %d\n", atomic_read(&rps->boosts)); - if (INTEL_GEN(dev_priv) >= 6 && - rps->enabled && - dev_priv->gt.active_requests) { + if (INTEL_GEN(dev_priv) >= 6 && rps->enabled && dev_priv->gt.awake) { u32 rpup, rpupei; u32 rpdown, rpdownei; @@ -3091,9 +3094,9 @@ static int i915_engine_info(struct seq_file *m, void *unused) wakeref = intel_runtime_pm_get(dev_priv); - seq_printf(m, "GT awake? %s\n", yesno(dev_priv->gt.awake)); - seq_printf(m, "Global active requests: %d\n", - dev_priv->gt.active_requests); + seq_printf(m, "GT awake? %s [%d]\n", + yesno(dev_priv->gt.awake), + atomic_read(&dev_priv->gt.wakeref.count)); seq_printf(m, "CS timestamp frequency: %u kHz\n", RUNTIME_INFO(dev_priv)->cs_timestamp_frequency_khz); @@ -3904,14 +3907,26 @@ i915_drop_caches_set(void *data, u64 val) /* No need to check and wait for gpu resets, only libdrm auto-restarts * on ioctls on -EAGAIN. */ - if (val & (DROP_ACTIVE | DROP_RETIRE | DROP_RESET_SEQNO)) { + if (val & (DROP_ACTIVE | DROP_IDLE | DROP_RETIRE | DROP_RESET_SEQNO)) { int ret; ret = mutex_lock_interruptible(&i915->drm.struct_mutex); if (ret) return ret; - if (val & DROP_ACTIVE) + /* + * To finish the flush of the idle_worker, we must complete + * the switch-to-kernel-context, which requires a double + * pass through wait_for_idle: first queues the switch, + * second waits for the switch. + */ + if (ret == 0 && val & (DROP_IDLE | DROP_ACTIVE)) + ret = i915_gem_wait_for_idle(i915, + I915_WAIT_INTERRUPTIBLE | + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); + + if (ret == 0 && val & DROP_IDLE) ret = i915_gem_wait_for_idle(i915, I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED, @@ -3938,11 +3953,8 @@ i915_drop_caches_set(void *data, u64 val) fs_reclaim_release(GFP_KERNEL); if (val & DROP_IDLE) { - do { - if (READ_ONCE(i915->gt.active_requests)) - flush_delayed_work(&i915->gt.retire_work); - drain_delayed_work(&i915->gt.idle_work); - } while (READ_ONCE(i915->gt.awake)); + flush_delayed_work(&i915->gem.retire_work); + flush_work(&i915->gem.idle_work); } if (val & DROP_FREED) @@ -4757,6 +4769,7 @@ static int i915_hdcp_sink_capability_show(struct seq_file *m, void *data) { struct drm_connector *connector = m->private; struct intel_connector *intel_connector = to_intel_connector(connector); + bool hdcp_cap, hdcp2_cap; if (connector->status != connector_status_connected) return -ENODEV; @@ -4767,8 +4780,16 @@ static int i915_hdcp_sink_capability_show(struct seq_file *m, void *data) seq_printf(m, "%s:%d HDCP version: ", connector->name, connector->base.id); - seq_printf(m, "%s ", !intel_hdcp_capable(intel_connector) ? - "None" : "HDCP1.4"); + hdcp_cap = intel_hdcp_capable(intel_connector); + hdcp2_cap = intel_hdcp2_capable(intel_connector); + + if (hdcp_cap) + seq_puts(m, "HDCP1.4 "); + if (hdcp2_cap) + seq_puts(m, "HDCP2.2 "); + + if (!hdcp_cap && !hdcp2_cap) + seq_puts(m, "None"); seq_puts(m, "\n"); return 0; diff --git a/drivers/gpu/drm/i915/i915_debugfs.h b/drivers/gpu/drm/i915/i915_debugfs.h new file mode 100644 index 0000000000000000000000000000000000000000..c0cd22eb916dd4a64418edfe556ae914afac8782 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_debugfs.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef __I915_DEBUGFS_H__ +#define __I915_DEBUGFS_H__ + +struct drm_i915_private; +struct drm_connector; + +#ifdef CONFIG_DEBUG_FS +int i915_debugfs_register(struct drm_i915_private *dev_priv); +int i915_debugfs_connector_add(struct drm_connector *connector); +#else +static inline int i915_debugfs_register(struct drm_i915_private *dev_priv) { return 0; } +static inline int i915_debugfs_connector_add(struct drm_connector *connector) { return 0; } +#endif + +#endif /* __I915_DEBUGFS_H__ */ diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 1ad88e6d7c0444dbf814ccf013e95b9d8162cebb..83d2eb9e74cb72bb47198bd89bb8766a0b081c2a 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -47,22 +47,31 @@ #include #include +#include "gt/intel_gt_pm.h" +#include "gt/intel_reset.h" +#include "gt/intel_workarounds.h" + +#include "i915_debugfs.h" #include "i915_drv.h" +#include "i915_irq.h" #include "i915_pmu.h" #include "i915_query.h" -#include "i915_reset.h" #include "i915_trace.h" #include "i915_vgpu.h" +#include "intel_acpi.h" #include "intel_audio.h" #include "intel_cdclk.h" #include "intel_csr.h" #include "intel_dp.h" #include "intel_drv.h" #include "intel_fbdev.h" +#include "intel_gmbus.h" +#include "intel_hotplug.h" +#include "intel_overlay.h" +#include "intel_pipe_crc.h" #include "intel_pm.h" #include "intel_sprite.h" #include "intel_uc.h" -#include "intel_workarounds.h" static struct drm_driver driver; @@ -186,7 +195,8 @@ intel_pch_type(const struct drm_i915_private *dev_priv, unsigned short id) DRM_DEBUG_KMS("Found Kaby Lake PCH (KBP)\n"); WARN_ON(!IS_SKYLAKE(dev_priv) && !IS_KABYLAKE(dev_priv) && !IS_COFFEELAKE(dev_priv)); - return PCH_KBP; + /* KBP is SPT compatible */ + return PCH_SPT; case INTEL_PCH_CNP_DEVICE_ID_TYPE: DRM_DEBUG_KMS("Found Cannon Lake PCH (CNP)\n"); WARN_ON(!IS_CANNONLAKE(dev_priv) && !IS_COFFEELAKE(dev_priv)); @@ -433,6 +443,7 @@ static int i915_getparam_ioctl(struct drm_device *dev, void *data, case I915_PARAM_HAS_EXEC_CAPTURE: case I915_PARAM_HAS_EXEC_BATCH_FIRST: case I915_PARAM_HAS_EXEC_FENCE_ARRAY: + case I915_PARAM_HAS_EXEC_SUBMIT_FENCE: /* For the time being all of these are always true; * if some supported hardware does not have one of these * features this value needs to be provided from @@ -697,7 +708,7 @@ static int i915_load_modeset_init(struct drm_device *dev) if (ret) goto cleanup_csr; - intel_setup_gmbus(dev_priv); + intel_gmbus_setup(dev_priv); /* Important: The output setup functions called by modeset_init need * working irqs for e.g. gmbus and dp aux transfers. */ @@ -732,7 +743,7 @@ static int i915_load_modeset_init(struct drm_device *dev) intel_modeset_cleanup(dev); cleanup_irq: drm_irq_uninstall(dev); - intel_teardown_gmbus(dev_priv); + intel_gmbus_teardown(dev_priv); cleanup_csr: intel_csr_ucode_fini(dev_priv); intel_power_domains_fini_hw(dev_priv); @@ -884,6 +895,9 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv) mutex_init(&dev_priv->backlight_lock); mutex_init(&dev_priv->sb_lock); + pm_qos_add_request(&dev_priv->sb_qos, + PM_QOS_CPU_DMA_LATENCY, PM_QOS_DEFAULT_VALUE); + mutex_init(&dev_priv->av_mutex); mutex_init(&dev_priv->wm.wm_mutex); mutex_init(&dev_priv->pps_mutex); @@ -943,6 +957,9 @@ static void i915_driver_cleanup_early(struct drm_i915_private *dev_priv) i915_gem_cleanup_early(dev_priv); i915_workqueues_cleanup(dev_priv); i915_engines_cleanup(dev_priv); + + pm_qos_remove_request(&dev_priv->sb_qos); + mutex_destroy(&dev_priv->sb_lock); } /** @@ -1760,7 +1777,7 @@ static void i915_driver_unregister(struct drm_i915_private *dev_priv) i915_pmu_unregister(dev_priv); i915_teardown_sysfs(dev_priv); - drm_dev_unregister(&dev_priv->drm); + drm_dev_unplug(&dev_priv->drm); i915_gem_shrinker_unregister(dev_priv); } @@ -2322,7 +2339,7 @@ static int i915_drm_resume_early(struct drm_device *dev) intel_power_domains_resume(dev_priv); - intel_engines_sanitize(dev_priv, true); + intel_gt_sanitize(dev_priv, true); enable_rpm_wakeref_asserts(dev_priv); @@ -2875,7 +2892,7 @@ static int intel_runtime_suspend(struct device *kdev) */ i915_gem_runtime_suspend(dev_priv); - intel_uc_suspend(dev_priv); + intel_uc_runtime_suspend(dev_priv); intel_runtime_pm_disable_interrupts(dev_priv); @@ -3098,7 +3115,7 @@ static const struct drm_ioctl_desc i915_ioctls[] = { DRM_IOCTL_DEF_DRV(I915_BATCHBUFFER, drm_noop, DRM_AUTH), DRM_IOCTL_DEF_DRV(I915_IRQ_EMIT, drm_noop, DRM_AUTH), DRM_IOCTL_DEF_DRV(I915_IRQ_WAIT, drm_noop, DRM_AUTH), - DRM_IOCTL_DEF_DRV(I915_GETPARAM, i915_getparam_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(I915_GETPARAM, i915_getparam_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_SETPARAM, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), DRM_IOCTL_DEF_DRV(I915_ALLOC, drm_noop, DRM_AUTH), DRM_IOCTL_DEF_DRV(I915_FREE, drm_noop, DRM_AUTH), @@ -3111,13 +3128,13 @@ static const struct drm_ioctl_desc i915_ioctls[] = { DRM_IOCTL_DEF_DRV(I915_HWS_ADDR, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), DRM_IOCTL_DEF_DRV(I915_GEM_INIT, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER, i915_gem_execbuffer_ioctl, DRM_AUTH), - DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER2_WR, i915_gem_execbuffer2_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER2_WR, i915_gem_execbuffer2_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_GEM_PIN, i915_gem_reject_pin_ioctl, DRM_AUTH|DRM_ROOT_ONLY), DRM_IOCTL_DEF_DRV(I915_GEM_UNPIN, i915_gem_reject_pin_ioctl, DRM_AUTH|DRM_ROOT_ONLY), - DRM_IOCTL_DEF_DRV(I915_GEM_BUSY, i915_gem_busy_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(I915_GEM_BUSY, i915_gem_busy_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_GEM_SET_CACHING, i915_gem_set_caching_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_GEM_GET_CACHING, i915_gem_get_caching_ioctl, DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(I915_GEM_THROTTLE, i915_gem_throttle_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(I915_GEM_THROTTLE, i915_gem_throttle_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_GEM_ENTERVT, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), DRM_IOCTL_DEF_DRV(I915_GEM_LEAVEVT, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), DRM_IOCTL_DEF_DRV(I915_GEM_CREATE, i915_gem_create_ioctl, DRM_RENDER_ALLOW), @@ -3136,7 +3153,7 @@ static const struct drm_ioctl_desc i915_ioctls[] = { DRM_IOCTL_DEF_DRV(I915_OVERLAY_ATTRS, intel_overlay_attrs_ioctl, DRM_MASTER), DRM_IOCTL_DEF_DRV(I915_SET_SPRITE_COLORKEY, intel_sprite_set_colorkey_ioctl, DRM_MASTER), DRM_IOCTL_DEF_DRV(I915_GET_SPRITE_COLORKEY, drm_noop, DRM_MASTER), - DRM_IOCTL_DEF_DRV(I915_GEM_WAIT, i915_gem_wait_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(I915_GEM_WAIT, i915_gem_wait_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_CREATE_EXT, i915_gem_context_create_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_DESTROY, i915_gem_context_destroy_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_REG_READ, i915_reg_read_ioctl, DRM_RENDER_ALLOW), @@ -3148,6 +3165,8 @@ static const struct drm_ioctl_desc i915_ioctls[] = { DRM_IOCTL_DEF_DRV(I915_PERF_ADD_CONFIG, i915_perf_add_config_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_PERF_REMOVE_CONFIG, i915_perf_remove_config_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_QUERY, i915_query_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(I915_GEM_VM_CREATE, i915_gem_vm_create_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(I915_GEM_VM_DESTROY, i915_gem_vm_destroy_ioctl, DRM_RENDER_ALLOW), }; static struct drm_driver driver = { diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 066fd2a1285197eda197f53cd5d12603bc9debbd..a2664ea1395b54927e9b85b8982fa619440d6385 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -62,18 +62,21 @@ #include "i915_reg.h" #include "i915_utils.h" +#include "gt/intel_lrc.h" +#include "gt/intel_engine.h" +#include "gt/intel_workarounds.h" + #include "intel_bios.h" #include "intel_device_info.h" #include "intel_display.h" #include "intel_dpll_mgr.h" #include "intel_frontbuffer.h" -#include "intel_lrc.h" #include "intel_opregion.h" -#include "intel_ringbuffer.h" +#include "intel_runtime_pm.h" #include "intel_uc.h" #include "intel_uncore.h" +#include "intel_wakeref.h" #include "intel_wopcm.h" -#include "intel_workarounds.h" #include "i915_gem.h" #include "i915_gem_context.h" @@ -93,8 +96,8 @@ #define DRIVER_NAME "i915" #define DRIVER_DESC "Intel Graphics" -#define DRIVER_DATE "20190417" -#define DRIVER_TIMESTAMP 1555492067 +#define DRIVER_DATE "20190524" +#define DRIVER_TIMESTAMP 1558719322 /* Use I915_STATE_WARN(x) and I915_STATE_WARN_ON() (rather than WARN() and * WARN_ON()) for hw state sanity checks to check for unexpected conditions @@ -133,8 +136,6 @@ bool i915_error_injected(void); __i915_printk(i915, i915_error_injected() ? KERN_DEBUG : KERN_ERR, \ fmt, ##__VA_ARGS__) -typedef depot_stack_handle_t intel_wakeref_t; - enum hpd_pin { HPD_NONE = 0, HPD_TV = HPD_NONE, /* TV is known to be unreliable */ @@ -344,10 +345,6 @@ struct drm_i915_display_funcs { void (*load_luts)(const struct intel_crtc_state *crtc_state); }; -#define CSR_VERSION(major, minor) ((major) << 16 | (minor)) -#define CSR_VERSION_MAJOR(version) ((version) >> 16) -#define CSR_VERSION_MINOR(version) ((version) & 0xffff) - struct intel_csr { struct work_struct work; const char *fw_path; @@ -535,17 +532,11 @@ enum intel_pch { PCH_IBX, /* Ibexpeak PCH */ PCH_CPT, /* Cougarpoint/Pantherpoint PCH */ PCH_LPT, /* Lynxpoint/Wildcatpoint PCH */ - PCH_SPT, /* Sunrisepoint PCH */ - PCH_KBP, /* Kaby Lake PCH */ + PCH_SPT, /* Sunrisepoint/Kaby Lake PCH */ PCH_CNP, /* Cannon/Comet Lake PCH */ PCH_ICP, /* Ice Lake PCH */ }; -enum intel_sbi_destination { - SBI_ICLK, - SBI_MPHY, -}; - #define QUIRK_LVDS_SSC_DISABLE (1<<1) #define QUIRK_INVERT_BRIGHTNESS (1<<2) #define QUIRK_BACKLIGHT_PRESENT (1<<3) @@ -648,6 +639,8 @@ struct intel_rps_ei { }; struct intel_rps { + struct mutex lock; /* protects enabling and the worker */ + /* * work, interrupts_enabled and pm_iir are protected by * dev_priv->irq_lock @@ -841,6 +834,11 @@ struct i915_power_domains { struct mutex lock; int domain_use_count[POWER_DOMAIN_NUM]; + + struct delayed_work async_put_work; + intel_wakeref_t async_put_wakeref; + u64 async_put_domains[2]; + struct i915_power_well *power_wells; }; @@ -1561,6 +1559,7 @@ struct drm_i915_private { /* Sideband mailbox protection */ struct mutex sb_lock; + struct pm_qos_request sb_qos; /** Cached value of IMR to avoid reads in updating the bitfield */ union { @@ -1709,14 +1708,6 @@ struct drm_i915_private { */ u32 edram_size_mb; - /* - * Protects RPS/RC6 register access and PCU communication. - * Must be taken after struct_mutex if nested. Note that - * this lock may be held for long periods of time when - * talking to hw - so only take it when talking to hw! - */ - struct mutex pcu_lock; - /* gen6+ GT PM state */ struct intel_gen6_power_mgmt gt_pm; @@ -1995,8 +1986,6 @@ struct drm_i915_private { /* Abstract the submission mechanism (legacy ringbuffer or execlists) away */ struct { - void (*cleanup_engine)(struct intel_engine_cs *engine); - struct i915_gt_timelines { struct mutex mutex; /* protects list, tainted by GPU */ struct list_head active_list; @@ -2006,10 +1995,10 @@ struct drm_i915_private { struct list_head hwsp_free_list; } timelines; - intel_engine_mask_t active_engines; struct list_head active_rings; struct list_head closed_vma; - u32 active_requests; + + struct intel_wakeref wakeref; /** * Is the GPU currently considered idle, or busy executing @@ -2020,6 +2009,16 @@ struct drm_i915_private { */ intel_wakeref_t awake; + struct blocking_notifier_head pm_notifications; + + ktime_t last_init_time; + + struct i915_vma *scratch; + } gt; + + struct { + struct notifier_block pm_notifier; + /** * We leave the user IRQ off as much as possible, * but this means that requests will finish and never @@ -2036,12 +2035,8 @@ struct drm_i915_private { * arrive within a small period of time, we fire * off the idle_work. */ - struct delayed_work idle_work; - - ktime_t last_init_time; - - struct i915_vma *scratch; - } gt; + struct work_struct idle_work; + } gem; /* For i945gm vblank irq vs. C3 workaround */ struct { @@ -2585,6 +2580,8 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define HAS_RC6p(dev_priv) (INTEL_INFO(dev_priv)->has_rc6p) #define HAS_RC6pp(dev_priv) (false) /* HW was never validated */ +#define HAS_RPS(dev_priv) (INTEL_INFO(dev_priv)->has_rps) + #define HAS_CSR(dev_priv) (INTEL_INFO(dev_priv)->display.has_csr) #define HAS_RUNTIME_PM(dev_priv) (INTEL_INFO(dev_priv)->has_runtime_pm) @@ -2636,7 +2633,6 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define INTEL_PCH_ID(dev_priv) ((dev_priv)->pch_id) #define HAS_PCH_ICP(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_ICP) #define HAS_PCH_CNP(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_CNP) -#define HAS_PCH_KBP(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_KBP) #define HAS_PCH_SPT(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_SPT) #define HAS_PCH_LPT(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_LPT) #define HAS_PCH_LPT_LP(dev_priv) \ @@ -2714,23 +2710,8 @@ extern unsigned long i915_gfx_val(struct drm_i915_private *dev_priv); extern void i915_update_gfx_val(struct drm_i915_private *dev_priv); int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool on); -int intel_engines_init_mmio(struct drm_i915_private *dev_priv); -int intel_engines_init(struct drm_i915_private *dev_priv); - u32 intel_calculate_mcr_s_ss_select(struct drm_i915_private *dev_priv); -/* intel_hotplug.c */ -void intel_hpd_irq_handler(struct drm_i915_private *dev_priv, - u32 pin_mask, u32 long_mask); -void intel_hpd_init(struct drm_i915_private *dev_priv); -void intel_hpd_init_work(struct drm_i915_private *dev_priv); -void intel_hpd_cancel_work(struct drm_i915_private *dev_priv); -enum hpd_pin intel_hpd_pin_default(struct drm_i915_private *dev_priv, - enum port port); -bool intel_hpd_disable(struct drm_i915_private *dev_priv, enum hpd_pin pin); -void intel_hpd_enable(struct drm_i915_private *dev_priv, enum hpd_pin pin); - -/* i915_irq.c */ static inline void i915_queue_hangcheck(struct drm_i915_private *dev_priv) { unsigned long delay; @@ -2748,11 +2729,6 @@ static inline void i915_queue_hangcheck(struct drm_i915_private *dev_priv) &dev_priv->gpu_error.hangcheck_work, delay); } -extern void intel_irq_init(struct drm_i915_private *dev_priv); -extern void intel_irq_fini(struct drm_i915_private *dev_priv); -int intel_irq_install(struct drm_i915_private *dev_priv); -void intel_irq_uninstall(struct drm_i915_private *dev_priv); - static inline bool intel_gvt_active(struct drm_i915_private *dev_priv) { return dev_priv->gvt; @@ -2763,62 +2739,6 @@ static inline bool intel_vgpu_active(struct drm_i915_private *dev_priv) return dev_priv->vgpu.active; } -u32 i915_pipestat_enable_mask(struct drm_i915_private *dev_priv, - enum pipe pipe); -void -i915_enable_pipestat(struct drm_i915_private *dev_priv, enum pipe pipe, - u32 status_mask); - -void -i915_disable_pipestat(struct drm_i915_private *dev_priv, enum pipe pipe, - u32 status_mask); - -void valleyview_enable_display_irqs(struct drm_i915_private *dev_priv); -void valleyview_disable_display_irqs(struct drm_i915_private *dev_priv); -void i915_hotplug_interrupt_update(struct drm_i915_private *dev_priv, - u32 mask, - u32 bits); -void ilk_update_display_irq(struct drm_i915_private *dev_priv, - u32 interrupt_mask, - u32 enabled_irq_mask); -static inline void -ilk_enable_display_irq(struct drm_i915_private *dev_priv, u32 bits) -{ - ilk_update_display_irq(dev_priv, bits, bits); -} -static inline void -ilk_disable_display_irq(struct drm_i915_private *dev_priv, u32 bits) -{ - ilk_update_display_irq(dev_priv, bits, 0); -} -void bdw_update_pipe_irq(struct drm_i915_private *dev_priv, - enum pipe pipe, - u32 interrupt_mask, - u32 enabled_irq_mask); -static inline void bdw_enable_pipe_irq(struct drm_i915_private *dev_priv, - enum pipe pipe, u32 bits) -{ - bdw_update_pipe_irq(dev_priv, pipe, bits, bits); -} -static inline void bdw_disable_pipe_irq(struct drm_i915_private *dev_priv, - enum pipe pipe, u32 bits) -{ - bdw_update_pipe_irq(dev_priv, pipe, bits, 0); -} -void ibx_display_interrupt_update(struct drm_i915_private *dev_priv, - u32 interrupt_mask, - u32 enabled_irq_mask); -static inline void -ibx_enable_display_interrupt(struct drm_i915_private *dev_priv, u32 bits) -{ - ibx_display_interrupt_update(dev_priv, bits, bits); -} -static inline void -ibx_disable_display_interrupt(struct drm_i915_private *dev_priv, u32 bits) -{ - ibx_display_interrupt_update(dev_priv, bits, 0); -} - /* i915_gem.c */ int i915_gem_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); @@ -2903,15 +2823,15 @@ static inline void i915_gem_drain_workqueue(struct drm_i915_private *i915) * grace period so that we catch work queued via RCU from the first * pass. As neither drain_workqueue() nor flush_workqueue() report * a result, we make an assumption that we only don't require more - * than 2 passes to catch all recursive RCU delayed work. + * than 3 passes to catch all _recursive_ RCU delayed work. * */ - int pass = 2; + int pass = 3; do { rcu_barrier(); i915_gem_drain_freed_objects(i915); - drain_workqueue(i915->wq); } while (--pass); + drain_workqueue(i915->wq); } struct i915_vma * __must_check @@ -2944,6 +2864,10 @@ i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, unsigned int n); dma_addr_t +i915_gem_object_get_dma_address_len(struct drm_i915_gem_object *obj, + unsigned long n, + unsigned int *len); +dma_addr_t i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj, unsigned long n); @@ -3005,7 +2929,7 @@ enum i915_mm_subclass { /* lockdep subclass for obj->mm.lock/struct_mutex */ int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, enum i915_mm_subclass subclass); -void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj); +void __i915_gem_object_truncate(struct drm_i915_gem_object *obj); enum i915_map_type { I915_MAP_WB = 0, @@ -3124,7 +3048,6 @@ int __must_check i915_gem_init(struct drm_i915_private *dev_priv); int __must_check i915_gem_init_hw(struct drm_i915_private *dev_priv); void i915_gem_init_swizzling(struct drm_i915_private *dev_priv); void i915_gem_fini(struct drm_i915_private *dev_priv); -void i915_gem_cleanup_engines(struct drm_i915_private *dev_priv); int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv, unsigned int flags, long timeout); void i915_gem_suspend(struct drm_i915_private *dev_priv); @@ -3266,11 +3189,12 @@ unsigned long i915_gem_shrink(struct drm_i915_private *i915, unsigned long target, unsigned long *nr_scanned, unsigned flags); -#define I915_SHRINK_PURGEABLE 0x1 -#define I915_SHRINK_UNBOUND 0x2 -#define I915_SHRINK_BOUND 0x4 -#define I915_SHRINK_ACTIVE 0x8 -#define I915_SHRINK_VMAPS 0x10 +#define I915_SHRINK_PURGEABLE BIT(0) +#define I915_SHRINK_UNBOUND BIT(1) +#define I915_SHRINK_BOUND BIT(2) +#define I915_SHRINK_ACTIVE BIT(3) +#define I915_SHRINK_VMAPS BIT(4) +#define I915_SHRINK_WRITEBACK BIT(5) unsigned long i915_gem_shrink_all(struct drm_i915_private *i915); void i915_gem_shrinker_register(struct drm_i915_private *i915); void i915_gem_shrinker_unregister(struct drm_i915_private *i915); @@ -3291,18 +3215,6 @@ u32 i915_gem_fence_size(struct drm_i915_private *dev_priv, u32 size, u32 i915_gem_fence_alignment(struct drm_i915_private *dev_priv, u32 size, unsigned int tiling, unsigned int stride); -/* i915_debugfs.c */ -#ifdef CONFIG_DEBUG_FS -int i915_debugfs_register(struct drm_i915_private *dev_priv); -int i915_debugfs_connector_add(struct drm_connector *connector); -void intel_display_crc_init(struct drm_i915_private *dev_priv); -#else -static inline int i915_debugfs_register(struct drm_i915_private *dev_priv) {return 0;} -static inline int i915_debugfs_connector_add(struct drm_connector *connector) -{ return 0; } -static inline void intel_display_crc_init(struct drm_i915_private *dev_priv) {} -#endif - const char *i915_cache_level_str(struct drm_i915_private *i915, int type); /* i915_cmd_parser.c */ @@ -3330,56 +3242,6 @@ extern int i915_restore_state(struct drm_i915_private *dev_priv); void i915_setup_sysfs(struct drm_i915_private *dev_priv); void i915_teardown_sysfs(struct drm_i915_private *dev_priv); -/* intel_lpe_audio.c */ -int intel_lpe_audio_init(struct drm_i915_private *dev_priv); -void intel_lpe_audio_teardown(struct drm_i915_private *dev_priv); -void intel_lpe_audio_irq_handler(struct drm_i915_private *dev_priv); -void intel_lpe_audio_notify(struct drm_i915_private *dev_priv, - enum pipe pipe, enum port port, - const void *eld, int ls_clock, bool dp_output); - -/* intel_i2c.c */ -extern int intel_setup_gmbus(struct drm_i915_private *dev_priv); -extern void intel_teardown_gmbus(struct drm_i915_private *dev_priv); -extern bool intel_gmbus_is_valid_pin(struct drm_i915_private *dev_priv, - unsigned int pin); -extern int intel_gmbus_output_aksv(struct i2c_adapter *adapter); - -extern struct i2c_adapter * -intel_gmbus_get_adapter(struct drm_i915_private *dev_priv, unsigned int pin); -extern void intel_gmbus_set_speed(struct i2c_adapter *adapter, int speed); -extern void intel_gmbus_force_bit(struct i2c_adapter *adapter, bool force_bit); -static inline bool intel_gmbus_is_forced_bit(struct i2c_adapter *adapter) -{ - return container_of(adapter, struct intel_gmbus, adapter)->force_bit; -} -extern void intel_i2c_reset(struct drm_i915_private *dev_priv); - -/* intel_bios.c */ -void intel_bios_init(struct drm_i915_private *dev_priv); -void intel_bios_cleanup(struct drm_i915_private *dev_priv); -bool intel_bios_is_valid_vbt(const void *buf, size_t size); -bool intel_bios_is_tv_present(struct drm_i915_private *dev_priv); -bool intel_bios_is_lvds_present(struct drm_i915_private *dev_priv, u8 *i2c_pin); -bool intel_bios_is_port_present(struct drm_i915_private *dev_priv, enum port port); -bool intel_bios_is_port_edp(struct drm_i915_private *dev_priv, enum port port); -bool intel_bios_is_port_dp_dual_mode(struct drm_i915_private *dev_priv, enum port port); -bool intel_bios_is_dsi_present(struct drm_i915_private *dev_priv, enum port *port); -bool intel_bios_is_port_hpd_inverted(struct drm_i915_private *dev_priv, - enum port port); -bool intel_bios_is_lspcon_present(struct drm_i915_private *dev_priv, - enum port port); -enum aux_ch intel_bios_port_aux_ch(struct drm_i915_private *dev_priv, enum port port); - -/* intel_acpi.c */ -#ifdef CONFIG_ACPI -extern void intel_register_dsm_handler(void); -extern void intel_unregister_dsm_handler(void); -#else -static inline void intel_register_dsm_handler(void) { return; } -static inline void intel_unregister_dsm_handler(void) { return; } -#endif /* CONFIG_ACPI */ - /* intel_device_info.c */ static inline struct intel_device_info * mkwrite_device_info(struct drm_i915_private *dev_priv) @@ -3387,20 +3249,6 @@ mkwrite_device_info(struct drm_i915_private *dev_priv) return (struct intel_device_info *)INTEL_INFO(dev_priv); } -static inline struct intel_sseu -intel_device_default_sseu(struct drm_i915_private *i915) -{ - const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu; - struct intel_sseu value = { - .slice_mask = sseu->slice_mask, - .subslice_mask = sseu->subslice_mask[0], - .min_eus_per_subslice = sseu->max_eus_per_subslice, - .max_eus_per_subslice = sseu->max_eus_per_subslice, - }; - - return value; -} - /* modesetting */ extern void intel_modeset_init_hw(struct drm_device *dev); extern int intel_modeset_init(struct drm_device *dev); @@ -3417,115 +3265,15 @@ extern void intel_rps_mark_interactive(struct drm_i915_private *i915, bool interactive); extern bool intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable); -void intel_dsc_enable(struct intel_encoder *encoder, - const struct intel_crtc_state *crtc_state); -void intel_dsc_disable(const struct intel_crtc_state *crtc_state); int i915_reg_read_ioctl(struct drm_device *dev, void *data, struct drm_file *file); -/* overlay */ -extern struct intel_overlay_error_state * -intel_overlay_capture_error_state(struct drm_i915_private *dev_priv); -extern void intel_overlay_print_error_state(struct drm_i915_error_state_buf *e, - struct intel_overlay_error_state *error); - extern struct intel_display_error_state * intel_display_capture_error_state(struct drm_i915_private *dev_priv); extern void intel_display_print_error_state(struct drm_i915_error_state_buf *e, struct intel_display_error_state *error); -int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val); -int sandybridge_pcode_write_timeout(struct drm_i915_private *dev_priv, u32 mbox, - u32 val, int fast_timeout_us, - int slow_timeout_ms); -#define sandybridge_pcode_write(dev_priv, mbox, val) \ - sandybridge_pcode_write_timeout(dev_priv, mbox, val, 500, 0) - -int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request, - u32 reply_mask, u32 reply, int timeout_base_ms); - -/* intel_sideband.c */ -u32 vlv_punit_read(struct drm_i915_private *dev_priv, u32 addr); -int vlv_punit_write(struct drm_i915_private *dev_priv, u32 addr, u32 val); -u32 vlv_nc_read(struct drm_i915_private *dev_priv, u8 addr); -u32 vlv_iosf_sb_read(struct drm_i915_private *dev_priv, u8 port, u32 reg); -void vlv_iosf_sb_write(struct drm_i915_private *dev_priv, u8 port, u32 reg, u32 val); -u32 vlv_cck_read(struct drm_i915_private *dev_priv, u32 reg); -void vlv_cck_write(struct drm_i915_private *dev_priv, u32 reg, u32 val); -u32 vlv_ccu_read(struct drm_i915_private *dev_priv, u32 reg); -void vlv_ccu_write(struct drm_i915_private *dev_priv, u32 reg, u32 val); -u32 vlv_bunit_read(struct drm_i915_private *dev_priv, u32 reg); -void vlv_bunit_write(struct drm_i915_private *dev_priv, u32 reg, u32 val); -u32 vlv_dpio_read(struct drm_i915_private *dev_priv, enum pipe pipe, int reg); -void vlv_dpio_write(struct drm_i915_private *dev_priv, enum pipe pipe, int reg, u32 val); -u32 intel_sbi_read(struct drm_i915_private *dev_priv, u16 reg, - enum intel_sbi_destination destination); -void intel_sbi_write(struct drm_i915_private *dev_priv, u16 reg, u32 value, - enum intel_sbi_destination destination); -u32 vlv_flisdsi_read(struct drm_i915_private *dev_priv, u32 reg); -void vlv_flisdsi_write(struct drm_i915_private *dev_priv, u32 reg, u32 val); - -/* intel_dpio_phy.c */ -void bxt_port_to_phy_channel(struct drm_i915_private *dev_priv, enum port port, - enum dpio_phy *phy, enum dpio_channel *ch); -void bxt_ddi_phy_set_signal_level(struct drm_i915_private *dev_priv, - enum port port, u32 margin, u32 scale, - u32 enable, u32 deemphasis); -void bxt_ddi_phy_init(struct drm_i915_private *dev_priv, enum dpio_phy phy); -void bxt_ddi_phy_uninit(struct drm_i915_private *dev_priv, enum dpio_phy phy); -bool bxt_ddi_phy_is_enabled(struct drm_i915_private *dev_priv, - enum dpio_phy phy); -bool bxt_ddi_phy_verify_state(struct drm_i915_private *dev_priv, - enum dpio_phy phy); -u8 bxt_ddi_phy_calc_lane_lat_optim_mask(u8 lane_count); -void bxt_ddi_phy_set_lane_optim_mask(struct intel_encoder *encoder, - u8 lane_lat_optim_mask); -u8 bxt_ddi_phy_get_lane_lat_optim_mask(struct intel_encoder *encoder); - -void chv_set_phy_signal_level(struct intel_encoder *encoder, - u32 deemph_reg_value, u32 margin_reg_value, - bool uniq_trans_scale); -void chv_data_lane_soft_reset(struct intel_encoder *encoder, - const struct intel_crtc_state *crtc_state, - bool reset); -void chv_phy_pre_pll_enable(struct intel_encoder *encoder, - const struct intel_crtc_state *crtc_state); -void chv_phy_pre_encoder_enable(struct intel_encoder *encoder, - const struct intel_crtc_state *crtc_state); -void chv_phy_release_cl2_override(struct intel_encoder *encoder); -void chv_phy_post_pll_disable(struct intel_encoder *encoder, - const struct intel_crtc_state *old_crtc_state); - -void vlv_set_phy_signal_level(struct intel_encoder *encoder, - u32 demph_reg_value, u32 preemph_reg_value, - u32 uniqtranscale_reg_value, u32 tx3_demph); -void vlv_phy_pre_pll_enable(struct intel_encoder *encoder, - const struct intel_crtc_state *crtc_state); -void vlv_phy_pre_encoder_enable(struct intel_encoder *encoder, - const struct intel_crtc_state *crtc_state); -void vlv_phy_reset_lanes(struct intel_encoder *encoder, - const struct intel_crtc_state *old_crtc_state); - -/* intel_combo_phy.c */ -void icl_combo_phys_init(struct drm_i915_private *dev_priv); -void icl_combo_phys_uninit(struct drm_i915_private *dev_priv); -void cnl_combo_phys_init(struct drm_i915_private *dev_priv); -void cnl_combo_phys_uninit(struct drm_i915_private *dev_priv); - -int intel_gpu_freq(struct drm_i915_private *dev_priv, int val); -int intel_freq_opcode(struct drm_i915_private *dev_priv, int val); -u64 intel_rc6_residency_ns(struct drm_i915_private *dev_priv, - const i915_reg_t reg); - -u32 intel_get_cagf(struct drm_i915_private *dev_priv, u32 rpstat1); - -static inline u64 intel_rc6_residency_us(struct drm_i915_private *dev_priv, - const i915_reg_t reg) -{ - return DIV_ROUND_UP_ULL(intel_rc6_residency_ns(dev_priv, reg), 1000); -} - #define __I915_REG_OP(op__, dev_priv__, ...) \ intel_uncore_##op__(&(dev_priv__)->uncore, __VA_ARGS__) @@ -3599,60 +3347,6 @@ static inline u64 intel_rc6_residency_us(struct drm_i915_private *dev_priv, #define INTEL_BROADCAST_RGB_FULL 1 #define INTEL_BROADCAST_RGB_LIMITED 2 -static inline i915_reg_t i915_vgacntrl_reg(struct drm_i915_private *dev_priv) -{ - if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) - return VLV_VGACNTRL; - else if (INTEL_GEN(dev_priv) >= 5) - return CPU_VGACNTRL; - else - return VGACNTRL; -} - -static inline unsigned long msecs_to_jiffies_timeout(const unsigned int m) -{ - unsigned long j = msecs_to_jiffies(m); - - return min_t(unsigned long, MAX_JIFFY_OFFSET, j + 1); -} - -static inline unsigned long nsecs_to_jiffies_timeout(const u64 n) -{ - /* nsecs_to_jiffies64() does not guard against overflow */ - if (NSEC_PER_SEC % HZ && - div_u64(n, NSEC_PER_SEC) >= MAX_JIFFY_OFFSET / HZ) - return MAX_JIFFY_OFFSET; - - return min_t(u64, MAX_JIFFY_OFFSET, nsecs_to_jiffies64(n) + 1); -} - -/* - * If you need to wait X milliseconds between events A and B, but event B - * doesn't happen exactly after event A, you record the timestamp (jiffies) of - * when event A happened, then just before event B you call this function and - * pass the timestamp as the first argument, and X as the second argument. - */ -static inline void -wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms) -{ - unsigned long target_jiffies, tmp_jiffies, remaining_jiffies; - - /* - * Don't re-read the value of "jiffies" every time since it may change - * behind our back and break the math. - */ - tmp_jiffies = jiffies; - target_jiffies = timestamp_jiffies + - msecs_to_jiffies_timeout(to_wait_ms); - - if (time_after(target_jiffies, tmp_jiffies)) { - remaining_jiffies = target_jiffies - tmp_jiffies; - while (remaining_jiffies) - remaining_jiffies = - schedule_timeout_uninterruptible(remaining_jiffies); - } -} - void i915_memcpy_init_early(struct drm_i915_private *dev_priv); bool i915_memcpy_from_wc(void *dst, const void *src, unsigned long len); @@ -3690,4 +3384,15 @@ static inline u32 i915_scratch_offset(const struct drm_i915_private *i915) return i915_ggtt_offset(i915->gt.scratch); } +static inline void add_taint_for_CI(unsigned int taint) +{ + /* + * The system is "ok", just about surviving for the user, but + * CI results are now unreliable as the HW is very suspect. + * CI checks the taint state after every test and will reboot + * the machine if the kernel is tainted. + */ + add_taint(taint, LOCKDEP_STILL_OK); +} + #endif diff --git a/drivers/gpu/drm/i915/i915_fixed.h b/drivers/gpu/drm/i915/i915_fixed.h index 591dd89ba7aff251846d19bc2fb02147ee92e3fc..6621595fe74ca5a3c0f2217caa8021ff3b6d2a16 100644 --- a/drivers/gpu/drm/i915/i915_fixed.h +++ b/drivers/gpu/drm/i915/i915_fixed.h @@ -71,7 +71,7 @@ static inline u32 mul_round_up_u32_fixed16(u32 val, uint_fixed_16_16_t mul) { u64 tmp; - tmp = (u64)val * mul.val; + tmp = mul_u32_u32(val, mul.val); tmp = DIV_ROUND_UP_ULL(tmp, 1 << 16); WARN_ON(tmp > U32_MAX); @@ -83,7 +83,7 @@ static inline uint_fixed_16_16_t mul_fixed16(uint_fixed_16_16_t val, { u64 tmp; - tmp = (u64)val.val * mul.val; + tmp = mul_u32_u32(val.val, mul.val); tmp = tmp >> 16; return clamp_u64_to_fixed16(tmp); @@ -114,7 +114,7 @@ static inline uint_fixed_16_16_t mul_u32_fixed16(u32 val, uint_fixed_16_16_t mul { u64 tmp; - tmp = (u64)val * mul.val; + tmp = mul_u32_u32(val, mul.val); return clamp_u64_to_fixed16(tmp); } diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index ad01c92aaf74881aae3cff9740359be36063732c..d3b7dac527dc7783a5d87bca0fb3fdac3c22d3ad 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -39,19 +39,23 @@ #include #include +#include "gt/intel_engine_pm.h" +#include "gt/intel_gt_pm.h" +#include "gt/intel_mocs.h" +#include "gt/intel_reset.h" +#include "gt/intel_workarounds.h" + #include "i915_drv.h" #include "i915_gem_clflush.h" #include "i915_gemfs.h" -#include "i915_globals.h" -#include "i915_reset.h" +#include "i915_gem_pm.h" #include "i915_trace.h" #include "i915_vgpu.h" +#include "intel_display.h" #include "intel_drv.h" #include "intel_frontbuffer.h" -#include "intel_mocs.h" #include "intel_pm.h" -#include "intel_workarounds.h" static void i915_gem_flush_free_objects(struct drm_i915_private *i915); @@ -102,105 +106,6 @@ static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv, spin_unlock(&dev_priv->mm.object_stat_lock); } -static void __i915_gem_park(struct drm_i915_private *i915) -{ - intel_wakeref_t wakeref; - - GEM_TRACE("\n"); - - lockdep_assert_held(&i915->drm.struct_mutex); - GEM_BUG_ON(i915->gt.active_requests); - GEM_BUG_ON(!list_empty(&i915->gt.active_rings)); - - if (!i915->gt.awake) - return; - - /* - * Be paranoid and flush a concurrent interrupt to make sure - * we don't reactivate any irq tasklets after parking. - * - * FIXME: Note that even though we have waited for execlists to be idle, - * there may still be an in-flight interrupt even though the CSB - * is now empty. synchronize_irq() makes sure that a residual interrupt - * is completed before we continue, but it doesn't prevent the HW from - * raising a spurious interrupt later. To complete the shield we should - * coordinate disabling the CS irq with flushing the interrupts. - */ - synchronize_irq(i915->drm.irq); - - intel_engines_park(i915); - i915_timelines_park(i915); - - i915_pmu_gt_parked(i915); - i915_vma_parked(i915); - - wakeref = fetch_and_zero(&i915->gt.awake); - GEM_BUG_ON(!wakeref); - - if (INTEL_GEN(i915) >= 6) - gen6_rps_idle(i915); - - intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ, wakeref); - - i915_globals_park(); -} - -void i915_gem_park(struct drm_i915_private *i915) -{ - GEM_TRACE("\n"); - - lockdep_assert_held(&i915->drm.struct_mutex); - GEM_BUG_ON(i915->gt.active_requests); - - if (!i915->gt.awake) - return; - - /* Defer the actual call to __i915_gem_park() to prevent ping-pongs */ - mod_delayed_work(i915->wq, &i915->gt.idle_work, msecs_to_jiffies(100)); -} - -void i915_gem_unpark(struct drm_i915_private *i915) -{ - GEM_TRACE("\n"); - - lockdep_assert_held(&i915->drm.struct_mutex); - GEM_BUG_ON(!i915->gt.active_requests); - assert_rpm_wakelock_held(i915); - - if (i915->gt.awake) - return; - - /* - * It seems that the DMC likes to transition between the DC states a lot - * when there are no connected displays (no active power domains) during - * command submission. - * - * This activity has negative impact on the performance of the chip with - * huge latencies observed in the interrupt handler and elsewhere. - * - * Work around it by grabbing a GT IRQ power domain whilst there is any - * GT activity, preventing any DC state transitions. - */ - i915->gt.awake = intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ); - GEM_BUG_ON(!i915->gt.awake); - - i915_globals_unpark(); - - intel_enable_gt_powersave(i915); - i915_update_gfx_val(i915); - if (INTEL_GEN(i915) >= 6) - gen6_rps_busy(i915); - i915_pmu_gt_unparked(i915); - - intel_engines_unpark(i915); - - i915_queue_hangcheck(i915); - - queue_delayed_work(i915->wq, - &i915->gt.retire_work, - round_jiffies_up_relative(HZ)); -} - int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, struct drm_file *file) @@ -656,8 +561,31 @@ i915_gem_dumb_create(struct drm_file *file, struct drm_device *dev, struct drm_mode_create_dumb *args) { + int cpp = DIV_ROUND_UP(args->bpp, 8); + u32 format; + + switch (cpp) { + case 1: + format = DRM_FORMAT_C8; + break; + case 2: + format = DRM_FORMAT_RGB565; + break; + case 4: + format = DRM_FORMAT_XRGB8888; + break; + default: + return -EINVAL; + } + /* have to work out size/pitch and return them */ - args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64); + args->pitch = ALIGN(args->width * cpp, 64); + + /* align stride to page size so that we can remap */ + if (args->pitch > intel_plane_fb_max_stride(to_i915(dev), format, + DRM_FORMAT_MOD_LINEAR)) + args->pitch = ALIGN(args->pitch, 4096); + args->size = args->pitch * args->height; return i915_gem_create(file, to_i915(dev), &args->size, &args->handle); @@ -2087,7 +2015,7 @@ static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj) if (!err) break; - } while (flush_delayed_work(&dev_priv->gt.retire_work)); + } while (flush_delayed_work(&dev_priv->gem.retire_work)); return err; } @@ -2143,8 +2071,7 @@ i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, } /* Immediately discard the backing storage */ -static void -i915_gem_object_truncate(struct drm_i915_gem_object *obj) +void __i915_gem_object_truncate(struct drm_i915_gem_object *obj) { i915_gem_object_free_mmap_offset(obj); @@ -2161,28 +2088,6 @@ i915_gem_object_truncate(struct drm_i915_gem_object *obj) obj->mm.pages = ERR_PTR(-EFAULT); } -/* Try to discard unwanted pages */ -void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj) -{ - struct address_space *mapping; - - lockdep_assert_held(&obj->mm.lock); - GEM_BUG_ON(i915_gem_object_has_pages(obj)); - - switch (obj->mm.madv) { - case I915_MADV_DONTNEED: - i915_gem_object_truncate(obj); - case __I915_MADV_PURGED: - return; - } - - if (obj->base.filp == NULL) - return; - - mapping = obj->base.filp->f_mapping, - invalidate_mapping_pages(mapping, 0, (loff_t)-1); -} - /* * Move pages to appropriate lru and release the pagevec, decrementing the * ref count of those pages. @@ -2870,132 +2775,6 @@ i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj, return 0; } -static void -i915_gem_retire_work_handler(struct work_struct *work) -{ - struct drm_i915_private *dev_priv = - container_of(work, typeof(*dev_priv), gt.retire_work.work); - struct drm_device *dev = &dev_priv->drm; - - /* Come back later if the device is busy... */ - if (mutex_trylock(&dev->struct_mutex)) { - i915_retire_requests(dev_priv); - mutex_unlock(&dev->struct_mutex); - } - - /* - * Keep the retire handler running until we are finally idle. - * We do not need to do this test under locking as in the worst-case - * we queue the retire worker once too often. - */ - if (READ_ONCE(dev_priv->gt.awake)) - queue_delayed_work(dev_priv->wq, - &dev_priv->gt.retire_work, - round_jiffies_up_relative(HZ)); -} - -static bool switch_to_kernel_context_sync(struct drm_i915_private *i915, - unsigned long mask) -{ - bool result = true; - - /* - * Even if we fail to switch, give whatever is running a small chance - * to save itself before we report the failure. Yes, this may be a - * false positive due to e.g. ENOMEM, caveat emptor! - */ - if (i915_gem_switch_to_kernel_context(i915, mask)) - result = false; - - if (i915_gem_wait_for_idle(i915, - I915_WAIT_LOCKED | - I915_WAIT_FOR_IDLE_BOOST, - I915_GEM_IDLE_TIMEOUT)) - result = false; - - if (!result) { - if (i915_modparams.reset) { /* XXX hide warning from gem_eio */ - dev_err(i915->drm.dev, - "Failed to idle engines, declaring wedged!\n"); - GEM_TRACE_DUMP(); - } - - /* Forcibly cancel outstanding work and leave the gpu quiet. */ - i915_gem_set_wedged(i915); - } - - i915_retire_requests(i915); /* ensure we flush after wedging */ - return result; -} - -static bool load_power_context(struct drm_i915_private *i915) -{ - /* Force loading the kernel context on all engines */ - if (!switch_to_kernel_context_sync(i915, ALL_ENGINES)) - return false; - - /* - * Immediately park the GPU so that we enable powersaving and - * treat it as idle. The next time we issue a request, we will - * unpark and start using the engine->pinned_default_state, otherwise - * it is in limbo and an early reset may fail. - */ - __i915_gem_park(i915); - - return true; -} - -static void -i915_gem_idle_work_handler(struct work_struct *work) -{ - struct drm_i915_private *i915 = - container_of(work, typeof(*i915), gt.idle_work.work); - bool rearm_hangcheck; - - if (!READ_ONCE(i915->gt.awake)) - return; - - if (READ_ONCE(i915->gt.active_requests)) - return; - - rearm_hangcheck = - cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work); - - if (!mutex_trylock(&i915->drm.struct_mutex)) { - /* Currently busy, come back later */ - mod_delayed_work(i915->wq, - &i915->gt.idle_work, - msecs_to_jiffies(50)); - goto out_rearm; - } - - /* - * Flush out the last user context, leaving only the pinned - * kernel context resident. Should anything unfortunate happen - * while we are idle (such as the GPU being power cycled), no users - * will be harmed. - */ - if (!work_pending(&i915->gt.idle_work.work) && - !i915->gt.active_requests) { - ++i915->gt.active_requests; /* don't requeue idle */ - - switch_to_kernel_context_sync(i915, i915->gt.active_engines); - - if (!--i915->gt.active_requests) { - __i915_gem_park(i915); - rearm_hangcheck = false; - } - } - - mutex_unlock(&i915->drm.struct_mutex); - -out_rearm: - if (rearm_hangcheck) { - GEM_BUG_ON(!i915->gt.awake); - i915_queue_hangcheck(i915); - } -} - void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file) { struct drm_i915_private *i915 = to_i915(gem->dev); @@ -3135,9 +2914,6 @@ wait_for_timelines(struct drm_i915_private *i915, struct i915_gt_timelines *gt = &i915->gt.timelines; struct i915_timeline *tl; - if (!READ_ONCE(i915->gt.active_requests)) - return timeout; - mutex_lock(>->mutex); list_for_each_entry(tl, >->active_list, link) { struct i915_request *rq; @@ -3177,9 +2953,10 @@ wait_for_timelines(struct drm_i915_private *i915, int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags, long timeout) { - GEM_TRACE("flags=%x (%s), timeout=%ld%s\n", + GEM_TRACE("flags=%x (%s), timeout=%ld%s, awake?=%s\n", flags, flags & I915_WAIT_LOCKED ? "locked" : "unlocked", - timeout, timeout == MAX_SCHEDULE_TIMEOUT ? " (forever)" : ""); + timeout, timeout == MAX_SCHEDULE_TIMEOUT ? " (forever)" : "", + yesno(i915->gt.awake)); /* If the device is asleep, we have no requests outstanding */ if (!READ_ONCE(i915->gt.awake)) @@ -4023,7 +3800,7 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data, /* if the object is no longer attached, discard its backing storage */ if (obj->mm.madv == I915_MADV_DONTNEED && !i915_gem_object_has_pages(obj)) - i915_gem_object_truncate(obj); + __i915_gem_object_truncate(obj); args->retained = obj->mm.madv != __I915_MADV_PURGED; mutex_unlock(&obj->mm.lock); @@ -4401,7 +4178,7 @@ void i915_gem_sanitize(struct drm_i915_private *i915) * it may impact the display and we are uncertain about the stability * of the reset, so this could be applied to even earlier gen. */ - intel_engines_sanitize(i915, false); + intel_gt_sanitize(i915, false); intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL); intel_runtime_pm_put(i915, wakeref); @@ -4411,133 +4188,6 @@ void i915_gem_sanitize(struct drm_i915_private *i915) mutex_unlock(&i915->drm.struct_mutex); } -void i915_gem_suspend(struct drm_i915_private *i915) -{ - intel_wakeref_t wakeref; - - GEM_TRACE("\n"); - - wakeref = intel_runtime_pm_get(i915); - - flush_workqueue(i915->wq); - - mutex_lock(&i915->drm.struct_mutex); - - /* - * We have to flush all the executing contexts to main memory so - * that they can saved in the hibernation image. To ensure the last - * context image is coherent, we have to switch away from it. That - * leaves the i915->kernel_context still active when - * we actually suspend, and its image in memory may not match the GPU - * state. Fortunately, the kernel_context is disposable and we do - * not rely on its state. - */ - switch_to_kernel_context_sync(i915, i915->gt.active_engines); - - mutex_unlock(&i915->drm.struct_mutex); - i915_reset_flush(i915); - - drain_delayed_work(&i915->gt.retire_work); - - /* - * As the idle_work is rearming if it detects a race, play safe and - * repeat the flush until it is definitely idle. - */ - drain_delayed_work(&i915->gt.idle_work); - - /* - * Assert that we successfully flushed all the work and - * reset the GPU back to its idle, low power state. - */ - GEM_BUG_ON(i915->gt.awake); - - intel_uc_suspend(i915); - - intel_runtime_pm_put(i915, wakeref); -} - -void i915_gem_suspend_late(struct drm_i915_private *i915) -{ - struct drm_i915_gem_object *obj; - struct list_head *phases[] = { - &i915->mm.unbound_list, - &i915->mm.bound_list, - NULL - }, **phase; - - /* - * Neither the BIOS, ourselves or any other kernel - * expects the system to be in execlists mode on startup, - * so we need to reset the GPU back to legacy mode. And the only - * known way to disable logical contexts is through a GPU reset. - * - * So in order to leave the system in a known default configuration, - * always reset the GPU upon unload and suspend. Afterwards we then - * clean up the GEM state tracking, flushing off the requests and - * leaving the system in a known idle state. - * - * Note that is of the upmost importance that the GPU is idle and - * all stray writes are flushed *before* we dismantle the backing - * storage for the pinned objects. - * - * However, since we are uncertain that resetting the GPU on older - * machines is a good idea, we don't - just in case it leaves the - * machine in an unusable condition. - */ - - mutex_lock(&i915->drm.struct_mutex); - for (phase = phases; *phase; phase++) { - list_for_each_entry(obj, *phase, mm.link) - WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false)); - } - mutex_unlock(&i915->drm.struct_mutex); - - intel_uc_sanitize(i915); - i915_gem_sanitize(i915); -} - -void i915_gem_resume(struct drm_i915_private *i915) -{ - GEM_TRACE("\n"); - - WARN_ON(i915->gt.awake); - - mutex_lock(&i915->drm.struct_mutex); - intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL); - - i915_gem_restore_gtt_mappings(i915); - i915_gem_restore_fences(i915); - - /* - * As we didn't flush the kernel context before suspend, we cannot - * guarantee that the context image is complete. So let's just reset - * it and start again. - */ - intel_gt_resume(i915); - - if (i915_gem_init_hw(i915)) - goto err_wedged; - - intel_uc_resume(i915); - - /* Always reload a context for powersaving. */ - if (!load_power_context(i915)) - goto err_wedged; - -out_unlock: - intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL); - mutex_unlock(&i915->drm.struct_mutex); - return; - -err_wedged: - if (!i915_reset_failed(i915)) { - dev_err(i915->drm.dev, - "Failed to re-initialize GPU, declaring it wedged!\n"); - i915_gem_set_wedged(i915); - } - goto out_unlock; -} - void i915_gem_init_swizzling(struct drm_i915_private *dev_priv) { if (INTEL_GEN(dev_priv) < 5 || @@ -4586,27 +4236,6 @@ static void init_unused_rings(struct drm_i915_private *dev_priv) } } -static int __i915_gem_restart_engines(void *data) -{ - struct drm_i915_private *i915 = data; - struct intel_engine_cs *engine; - enum intel_engine_id id; - int err; - - for_each_engine(engine, i915, id) { - err = engine->init_hw(engine); - if (err) { - DRM_ERROR("Failed to restart %s (%d)\n", - engine->name, err); - return err; - } - } - - intel_engines_set_scheduler_caps(i915); - - return 0; -} - int i915_gem_init_hw(struct drm_i915_private *dev_priv) { int ret; @@ -4665,12 +4294,13 @@ int i915_gem_init_hw(struct drm_i915_private *dev_priv) intel_mocs_init_l3cc_table(dev_priv); /* Only when the HW is re-initialised, can we replay the requests */ - ret = __i915_gem_restart_engines(dev_priv); + ret = intel_engines_resume(dev_priv); if (ret) goto cleanup_uc; intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); + intel_engines_set_scheduler_caps(dev_priv); return 0; cleanup_uc: @@ -4683,8 +4313,9 @@ int i915_gem_init_hw(struct drm_i915_private *dev_priv) static int __intel_engines_record_defaults(struct drm_i915_private *i915) { - struct i915_gem_context *ctx; struct intel_engine_cs *engine; + struct i915_gem_context *ctx; + struct i915_gem_engines *e; enum intel_engine_id id; int err = 0; @@ -4701,18 +4332,21 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915) if (IS_ERR(ctx)) return PTR_ERR(ctx); + e = i915_gem_context_lock_engines(ctx); + for_each_engine(engine, i915, id) { + struct intel_context *ce = e->engines[id]; struct i915_request *rq; - rq = i915_request_alloc(engine, ctx); + rq = intel_context_create_request(ce); if (IS_ERR(rq)) { err = PTR_ERR(rq); - goto out_ctx; + goto err_active; } err = 0; - if (engine->init_context) - err = engine->init_context(rq); + if (rq->engine->init_context) + err = rq->engine->init_context(rq); i915_request_add(rq); if (err) @@ -4720,21 +4354,16 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915) } /* Flush the default context image to memory, and enable powersaving. */ - if (!load_power_context(i915)) { + if (!i915_gem_load_power_context(i915)) { err = -EIO; goto err_active; } for_each_engine(engine, i915, id) { - struct intel_context *ce; - struct i915_vma *state; + struct intel_context *ce = e->engines[id]; + struct i915_vma *state = ce->state; void *vaddr; - ce = intel_context_lookup(ctx, engine); - if (!ce) - continue; - - state = ce->state; if (!state) continue; @@ -4790,6 +4419,7 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915) } out_ctx: + i915_gem_context_unlock_engines(ctx); i915_gem_context_set_closed(ctx); i915_gem_context_put(ctx); return err; @@ -4842,6 +4472,23 @@ static void i915_gem_fini_scratch(struct drm_i915_private *i915) i915_vma_unpin_and_release(&i915->gt.scratch, 0); } +static int intel_engines_verify_workarounds(struct drm_i915_private *i915) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err = 0; + + if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) + return 0; + + for_each_engine(engine, i915, id) { + if (intel_engine_verify_workarounds(engine, "load")) + err = -EIO; + } + + return err; +} + int i915_gem_init(struct drm_i915_private *dev_priv) { int ret; @@ -4853,11 +4500,6 @@ int i915_gem_init(struct drm_i915_private *dev_priv) dev_priv->mm.unordered_timeline = dma_fence_context_alloc(1); - if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) - dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup; - else - dev_priv->gt.cleanup_engine = intel_engine_cleanup; - i915_timelines_init(dev_priv); ret = i915_gem_init_userptr(dev_priv); @@ -4894,6 +4536,12 @@ int i915_gem_init(struct drm_i915_private *dev_priv) goto err_ggtt; } + ret = intel_engines_setup(dev_priv); + if (ret) { + GEM_BUG_ON(ret == -EIO); + goto err_unlock; + } + ret = i915_gem_contexts_init(dev_priv); if (ret) { GEM_BUG_ON(ret == -EIO); @@ -4927,6 +4575,10 @@ int i915_gem_init(struct drm_i915_private *dev_priv) */ intel_init_clock_gating(dev_priv); + ret = intel_engines_verify_workarounds(dev_priv); + if (ret) + goto err_init_hw; + ret = __intel_engines_record_defaults(dev_priv); if (ret) goto err_init_hw; @@ -4955,6 +4607,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv) err_init_hw: mutex_unlock(&dev_priv->drm.struct_mutex); + i915_gem_set_wedged(dev_priv); i915_gem_suspend(dev_priv); i915_gem_suspend_late(dev_priv); @@ -4967,7 +4620,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv) err_pm: if (ret != -EIO) { intel_cleanup_gt_powersave(dev_priv); - i915_gem_cleanup_engines(dev_priv); + intel_engines_cleanup(dev_priv); } err_context: if (ret != -EIO) @@ -5016,6 +4669,8 @@ int i915_gem_init(struct drm_i915_private *dev_priv) void i915_gem_fini(struct drm_i915_private *dev_priv) { + GEM_BUG_ON(dev_priv->gt.awake); + i915_gem_suspend_late(dev_priv); intel_disable_gt_powersave(dev_priv); @@ -5025,7 +4680,7 @@ void i915_gem_fini(struct drm_i915_private *dev_priv) mutex_lock(&dev_priv->drm.struct_mutex); intel_uc_fini_hw(dev_priv); intel_uc_fini(dev_priv); - i915_gem_cleanup_engines(dev_priv); + intel_engines_cleanup(dev_priv); i915_gem_contexts_fini(dev_priv); i915_gem_fini_scratch(dev_priv); mutex_unlock(&dev_priv->drm.struct_mutex); @@ -5048,16 +4703,6 @@ void i915_gem_init_mmio(struct drm_i915_private *i915) i915_gem_sanitize(i915); } -void -i915_gem_cleanup_engines(struct drm_i915_private *dev_priv) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - - for_each_engine(engine, dev_priv, id) - dev_priv->gt.cleanup_engine(engine); -} - void i915_gem_load_init_fences(struct drm_i915_private *dev_priv) { @@ -5110,15 +4755,14 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv) { int err; + intel_gt_pm_init(dev_priv); + INIT_LIST_HEAD(&dev_priv->gt.active_rings); INIT_LIST_HEAD(&dev_priv->gt.closed_vma); i915_gem_init__mm(dev_priv); + i915_gem_init__pm(dev_priv); - INIT_DELAYED_WORK(&dev_priv->gt.retire_work, - i915_gem_retire_work_handler); - INIT_DELAYED_WORK(&dev_priv->gt.idle_work, - i915_gem_idle_work_handler); init_waitqueue_head(&dev_priv->gpu_error.wait_queue); init_waitqueue_head(&dev_priv->gpu_error.reset_queue); mutex_init(&dev_priv->gpu_error.wedge_mutex); @@ -5461,16 +5105,29 @@ i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, } dma_addr_t -i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj, - unsigned long n) +i915_gem_object_get_dma_address_len(struct drm_i915_gem_object *obj, + unsigned long n, + unsigned int *len) { struct scatterlist *sg; unsigned int offset; sg = i915_gem_object_get_sg(obj, n, &offset); + + if (len) + *len = sg_dma_len(sg) - (offset << PAGE_SHIFT); + return sg_dma_address(sg) + (offset << PAGE_SHIFT); } +dma_addr_t +i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj, + unsigned long n) +{ + return i915_gem_object_get_dma_address_len(obj, n, NULL); +} + + int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align) { struct sg_table *pages; diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h index 9074eb1e843f20bcfc3e80958de792ace320a2b6..fe82d3571072b0bbdcba564f0af6e0ea53b4018f 100644 --- a/drivers/gpu/drm/i915/i915_gem.h +++ b/drivers/gpu/drm/i915/i915_gem.h @@ -75,9 +75,6 @@ struct drm_i915_private; #define I915_GEM_IDLE_TIMEOUT (HZ / 5) -void i915_gem_park(struct drm_i915_private *i915); -void i915_gem_unpark(struct drm_i915_private *i915); - static inline void __tasklet_disable_sync_once(struct tasklet_struct *t) { if (!atomic_fetch_inc(&t->count)) @@ -94,4 +91,9 @@ static inline bool __tasklet_enable(struct tasklet_struct *t) return atomic_dec_and_test(&t->count); } +static inline bool __tasklet_is_scheduled(struct tasklet_struct *t) +{ + return test_bit(TASKLET_STATE_SCHED, &t->state); +} + #endif /* __I915_GEM_H__ */ diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index dd728b26b5aab135e110fc61eca40ca84d4456e2..5d2f8ba92b59708f69ed5c0f52a2ef81d0550ec1 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -86,16 +86,16 @@ */ #include +#include + #include + +#include "gt/intel_lrc_reg.h" + #include "i915_drv.h" #include "i915_globals.h" #include "i915_trace.h" #include "i915_user_extensions.h" -#include "intel_lrc_reg.h" -#include "intel_workarounds.h" - -#define I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE (1 << 1) -#define I915_CONTEXT_PARAM_VM 0x9 #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1 @@ -138,6 +138,34 @@ static void lut_close(struct i915_gem_context *ctx) rcu_read_unlock(); } +static struct intel_context * +lookup_user_engine(struct i915_gem_context *ctx, + unsigned long flags, + const struct i915_engine_class_instance *ci) +#define LOOKUP_USER_INDEX BIT(0) +{ + int idx; + + if (!!(flags & LOOKUP_USER_INDEX) != i915_gem_context_user_engines(ctx)) + return ERR_PTR(-EINVAL); + + if (!i915_gem_context_user_engines(ctx)) { + struct intel_engine_cs *engine; + + engine = intel_engine_lookup_user(ctx->i915, + ci->engine_class, + ci->engine_instance); + if (!engine) + return ERR_PTR(-EINVAL); + + idx = engine->id; + } else { + idx = ci->engine_instance; + } + + return i915_gem_context_get_engine(ctx, idx); +} + static inline int new_hw_id(struct drm_i915_private *i915, gfp_t gfp) { unsigned int max; @@ -227,19 +255,70 @@ static void release_hw_id(struct i915_gem_context *ctx) mutex_unlock(&i915->contexts.mutex); } -static void i915_gem_context_free(struct i915_gem_context *ctx) +static void __free_engines(struct i915_gem_engines *e, unsigned int count) +{ + while (count--) { + if (!e->engines[count]) + continue; + + intel_context_put(e->engines[count]); + } + kfree(e); +} + +static void free_engines(struct i915_gem_engines *e) +{ + __free_engines(e, e->num_engines); +} + +static void free_engines_rcu(struct work_struct *wrk) { - struct intel_context *it, *n; + struct i915_gem_engines *e = + container_of(wrk, struct i915_gem_engines, rcu.work); + struct drm_i915_private *i915 = e->i915; + mutex_lock(&i915->drm.struct_mutex); + free_engines(e); + mutex_unlock(&i915->drm.struct_mutex); +} + +static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx) +{ + struct intel_engine_cs *engine; + struct i915_gem_engines *e; + enum intel_engine_id id; + + e = kzalloc(struct_size(e, engines, I915_NUM_ENGINES), GFP_KERNEL); + if (!e) + return ERR_PTR(-ENOMEM); + + e->i915 = ctx->i915; + for_each_engine(engine, ctx->i915, id) { + struct intel_context *ce; + + ce = intel_context_create(ctx, engine); + if (IS_ERR(ce)) { + __free_engines(e, id); + return ERR_CAST(ce); + } + + e->engines[id] = ce; + } + e->num_engines = id; + + return e; +} + +static void i915_gem_context_free(struct i915_gem_context *ctx) +{ lockdep_assert_held(&ctx->i915->drm.struct_mutex); GEM_BUG_ON(!i915_gem_context_is_closed(ctx)); - GEM_BUG_ON(!list_empty(&ctx->active_engines)); release_hw_id(ctx); i915_ppgtt_put(ctx->ppgtt); - rbtree_postorder_for_each_entry_safe(it, n, &ctx->hw_contexts, node) - intel_context_put(it); + free_engines(rcu_access_pointer(ctx->engines)); + mutex_destroy(&ctx->engines_mutex); if (ctx->timeline) i915_timeline_put(ctx->timeline); @@ -348,6 +427,8 @@ static struct i915_gem_context * __create_context(struct drm_i915_private *dev_priv) { struct i915_gem_context *ctx; + struct i915_gem_engines *e; + int err; int i; ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); @@ -358,11 +439,15 @@ __create_context(struct drm_i915_private *dev_priv) list_add_tail(&ctx->link, &dev_priv->contexts.list); ctx->i915 = dev_priv; ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_NORMAL); - INIT_LIST_HEAD(&ctx->active_engines); mutex_init(&ctx->mutex); - ctx->hw_contexts = RB_ROOT; - spin_lock_init(&ctx->hw_contexts_lock); + mutex_init(&ctx->engines_mutex); + e = default_engines(ctx); + if (IS_ERR(e)) { + err = PTR_ERR(e); + goto err_free; + } + RCU_INIT_POINTER(ctx->engines, e); INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL); INIT_LIST_HEAD(&ctx->handles_list); @@ -384,6 +469,10 @@ __create_context(struct drm_i915_private *dev_priv) ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES; return ctx; + +err_free: + kfree(ctx); + return ERR_PTR(err); } static struct i915_hw_ppgtt * @@ -415,8 +504,6 @@ i915_gem_create_context(struct drm_i915_private *dev_priv, unsigned int flags) lockdep_assert_held(&dev_priv->drm.struct_mutex); - BUILD_BUG_ON(I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE & - ~I915_CONTEXT_CREATE_FLAGS_UNKNOWN); if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE && !HAS_EXECLISTS(dev_priv)) return ERR_PTR(-EINVAL); @@ -769,8 +856,7 @@ int i915_gem_vm_create_ioctl(struct drm_device *dev, void *data, if (err < 0) goto err_unlock; - GEM_BUG_ON(err == 0); /* reserved for default/unassigned ppgtt */ - ppgtt->user_handle = err; + GEM_BUG_ON(err == 0); /* reserved for invalid/unassigned ppgtt */ mutex_unlock(&file_priv->vm_idr_lock); @@ -808,10 +894,6 @@ int i915_gem_vm_destroy_ioctl(struct drm_device *dev, void *data, return err; ppgtt = idr_remove(&file_priv->vm_idr, id); - if (ppgtt) { - GEM_BUG_ON(ppgtt->user_handle != id); - ppgtt->user_handle = 0; - } mutex_unlock(&file_priv->vm_idr_lock); if (!ppgtt) @@ -821,26 +903,6 @@ int i915_gem_vm_destroy_ioctl(struct drm_device *dev, void *data, return 0; } -static struct i915_request * -last_request_on_engine(struct i915_timeline *timeline, - struct intel_engine_cs *engine) -{ - struct i915_request *rq; - - GEM_BUG_ON(timeline == &engine->timeline); - - rq = i915_active_request_raw(&timeline->last_request, - &engine->i915->drm.struct_mutex); - if (rq && rq->engine->mask & engine->mask) { - GEM_TRACE("last request on engine %s: %llx:%llu\n", - engine->name, rq->fence.context, rq->fence.seqno); - GEM_BUG_ON(rq->timeline != timeline); - return rq; - } - - return NULL; -} - struct context_barrier_task { struct i915_active base; void (*task)(void *data); @@ -867,8 +929,8 @@ static int context_barrier_task(struct i915_gem_context *ctx, { struct drm_i915_private *i915 = ctx->i915; struct context_barrier_task *cb; - struct intel_context *ce, *next; - intel_wakeref_t wakeref; + struct i915_gem_engines_iter it; + struct intel_context *ce; int err = 0; lockdep_assert_held(&i915->drm.struct_mutex); @@ -881,21 +943,19 @@ static int context_barrier_task(struct i915_gem_context *ctx, i915_active_init(i915, &cb->base, cb_retire); i915_active_acquire(&cb->base); - wakeref = intel_runtime_pm_get(i915); - rbtree_postorder_for_each_entry_safe(ce, next, &ctx->hw_contexts, node) { - struct intel_engine_cs *engine = ce->engine; + for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { struct i915_request *rq; - if (!(engine->mask & engines)) - continue; - if (I915_SELFTEST_ONLY(context_barrier_inject_fault & - engine->mask)) { + ce->engine->mask)) { err = -ENXIO; break; } - rq = i915_request_alloc(engine, ctx); + if (!(ce->engine->mask & engines) || !ce->state) + continue; + + rq = intel_context_create_request(ce); if (IS_ERR(rq)) { err = PTR_ERR(rq); break; @@ -911,7 +971,7 @@ static int context_barrier_task(struct i915_gem_context *ctx, if (err) break; } - intel_runtime_pm_put(i915, wakeref); + i915_gem_context_unlock_engines(ctx); cb->task = err ? NULL : task; /* caller needs to unwind instead */ cb->data = data; @@ -921,54 +981,6 @@ static int context_barrier_task(struct i915_gem_context *ctx, return err; } -int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915, - intel_engine_mask_t mask) -{ - struct intel_engine_cs *engine; - - GEM_TRACE("awake?=%s\n", yesno(i915->gt.awake)); - - lockdep_assert_held(&i915->drm.struct_mutex); - GEM_BUG_ON(!i915->kernel_context); - - /* Inoperable, so presume the GPU is safely pointing into the void! */ - if (i915_terminally_wedged(i915)) - return 0; - - for_each_engine_masked(engine, i915, mask, mask) { - struct intel_ring *ring; - struct i915_request *rq; - - rq = i915_request_alloc(engine, i915->kernel_context); - if (IS_ERR(rq)) - return PTR_ERR(rq); - - /* Queue this switch after all other activity */ - list_for_each_entry(ring, &i915->gt.active_rings, active_link) { - struct i915_request *prev; - - prev = last_request_on_engine(ring->timeline, engine); - if (!prev) - continue; - - if (prev->gem_context == i915->kernel_context) - continue; - - GEM_TRACE("add barrier on %s for %llx:%lld\n", - engine->name, - prev->fence.context, - prev->fence.seqno); - i915_sw_fence_await_sw_fence_gfp(&rq->submit, - &prev->submit, - I915_FENCE_GFP); - } - - i915_request_add(rq); - } - - return 0; -} - static int get_ppgtt(struct drm_i915_file_private *file_priv, struct i915_gem_context *ctx, struct drm_i915_gem_context_param *args) @@ -976,8 +988,6 @@ static int get_ppgtt(struct drm_i915_file_private *file_priv, struct i915_hw_ppgtt *ppgtt; int ret; - return -EINVAL; /* nothing to see here; please move along */ - if (!ctx->ppgtt) return -ENODEV; @@ -993,18 +1003,15 @@ static int get_ppgtt(struct drm_i915_file_private *file_priv, if (ret) goto err_put; - if (!ppgtt->user_handle) { - ret = idr_alloc(&file_priv->vm_idr, ppgtt, 0, 0, GFP_KERNEL); - GEM_BUG_ON(!ret); - if (ret < 0) - goto err_unlock; + ret = idr_alloc(&file_priv->vm_idr, ppgtt, 0, 0, GFP_KERNEL); + GEM_BUG_ON(!ret); + if (ret < 0) + goto err_unlock; - ppgtt->user_handle = ret; - i915_ppgtt_get(ppgtt); - } + i915_ppgtt_get(ppgtt); args->size = 0; - args->value = ppgtt->user_handle; + args->value = ret; ret = 0; err_unlock: @@ -1079,8 +1086,6 @@ static int set_ppgtt(struct drm_i915_file_private *file_priv, struct i915_hw_ppgtt *ppgtt, *old; int err; - return -EINVAL; /* nothing to see here; please move along */ - if (args->size) return -EINVAL; @@ -1095,10 +1100,8 @@ static int set_ppgtt(struct drm_i915_file_private *file_priv, return err; ppgtt = idr_find(&file_priv->vm_idr, args->value); - if (ppgtt) { - GEM_BUG_ON(ppgtt->user_handle != args->value); + if (ppgtt) i915_ppgtt_get(ppgtt); - } mutex_unlock(&file_priv->vm_idr_lock); if (!ppgtt) return -ENOENT; @@ -1156,7 +1159,7 @@ static int gen8_emit_rpcs_config(struct i915_request *rq, *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; *cs++ = lower_32_bits(offset); *cs++ = upper_32_bits(offset); - *cs++ = gen8_make_rpcs(rq->i915, &sseu); + *cs++ = intel_sseu_make_rpcs(rq->i915, &sseu); intel_ring_advance(rq, cs); @@ -1166,9 +1169,7 @@ static int gen8_emit_rpcs_config(struct i915_request *rq, static int gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu) { - struct drm_i915_private *i915 = ce->engine->i915; struct i915_request *rq; - intel_wakeref_t wakeref; int ret; lockdep_assert_held(&ce->pin_mutex); @@ -1182,14 +1183,9 @@ gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu) if (!intel_context_is_pinned(ce)) return 0; - /* Submitting requests etc needs the hw awake. */ - wakeref = intel_runtime_pm_get(i915); - - rq = i915_request_alloc(ce->engine, i915->kernel_context); - if (IS_ERR(rq)) { - ret = PTR_ERR(rq); - goto out_put; - } + rq = i915_request_create(ce->engine->kernel_context); + if (IS_ERR(rq)) + return PTR_ERR(rq); /* Queue this switch after all other activity by this context. */ ret = i915_active_request_set(&ce->ring->timeline->last_request, rq); @@ -1213,26 +1209,20 @@ gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu) out_add: i915_request_add(rq); -out_put: - intel_runtime_pm_put(i915, wakeref); - return ret; } static int -__i915_gem_context_reconfigure_sseu(struct i915_gem_context *ctx, - struct intel_engine_cs *engine, - struct intel_sseu sseu) +__intel_context_reconfigure_sseu(struct intel_context *ce, + struct intel_sseu sseu) { - struct intel_context *ce; - int ret = 0; + int ret; - GEM_BUG_ON(INTEL_GEN(ctx->i915) < 8); - GEM_BUG_ON(engine->id != RCS0); + GEM_BUG_ON(INTEL_GEN(ce->gem_context->i915) < 8); - ce = intel_context_pin_lock(ctx, engine); - if (IS_ERR(ce)) - return PTR_ERR(ce); + ret = intel_context_lock_pinned(ce); + if (ret) + return ret; /* Nothing to do if unmodified. */ if (!memcmp(&ce->sseu, &sseu, sizeof(sseu))) @@ -1243,24 +1233,23 @@ __i915_gem_context_reconfigure_sseu(struct i915_gem_context *ctx, ce->sseu = sseu; unlock: - intel_context_pin_unlock(ce); + intel_context_unlock_pinned(ce); return ret; } static int -i915_gem_context_reconfigure_sseu(struct i915_gem_context *ctx, - struct intel_engine_cs *engine, - struct intel_sseu sseu) +intel_context_reconfigure_sseu(struct intel_context *ce, struct intel_sseu sseu) { + struct drm_i915_private *i915 = ce->gem_context->i915; int ret; - ret = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex); + ret = mutex_lock_interruptible(&i915->drm.struct_mutex); if (ret) return ret; - ret = __i915_gem_context_reconfigure_sseu(ctx, engine, sseu); + ret = __intel_context_reconfigure_sseu(ce, sseu); - mutex_unlock(&ctx->i915->drm.struct_mutex); + mutex_unlock(&i915->drm.struct_mutex); return ret; } @@ -1368,8 +1357,9 @@ static int set_sseu(struct i915_gem_context *ctx, { struct drm_i915_private *i915 = ctx->i915; struct drm_i915_gem_context_param_sseu user_sseu; - struct intel_engine_cs *engine; + struct intel_context *ce; struct intel_sseu sseu; + unsigned long lookup; int ret; if (args->size < sizeof(user_sseu)) @@ -1382,32 +1372,429 @@ static int set_sseu(struct i915_gem_context *ctx, sizeof(user_sseu))) return -EFAULT; - if (user_sseu.flags || user_sseu.rsvd) + if (user_sseu.rsvd) return -EINVAL; - engine = intel_engine_lookup_user(i915, - user_sseu.engine.engine_class, - user_sseu.engine.engine_instance); - if (!engine) + if (user_sseu.flags & ~(I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX)) return -EINVAL; + lookup = 0; + if (user_sseu.flags & I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX) + lookup |= LOOKUP_USER_INDEX; + + ce = lookup_user_engine(ctx, lookup, &user_sseu.engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); + /* Only render engine supports RPCS configuration. */ - if (engine->class != RENDER_CLASS) - return -ENODEV; + if (ce->engine->class != RENDER_CLASS) { + ret = -ENODEV; + goto out_ce; + } ret = user_to_context_sseu(i915, &user_sseu, &sseu); if (ret) - return ret; + goto out_ce; - ret = i915_gem_context_reconfigure_sseu(ctx, engine, sseu); + ret = intel_context_reconfigure_sseu(ce, sseu); if (ret) - return ret; + goto out_ce; args->size = sizeof(user_sseu); +out_ce: + intel_context_put(ce); + return ret; +} + +struct set_engines { + struct i915_gem_context *ctx; + struct i915_gem_engines *engines; +}; + +static int +set_engines__load_balance(struct i915_user_extension __user *base, void *data) +{ + struct i915_context_engines_load_balance __user *ext = + container_of_user(base, typeof(*ext), base); + const struct set_engines *set = data; + struct intel_engine_cs *stack[16]; + struct intel_engine_cs **siblings; + struct intel_context *ce; + u16 num_siblings, idx; + unsigned int n; + int err; + + if (!HAS_EXECLISTS(set->ctx->i915)) + return -ENODEV; + + if (USES_GUC_SUBMISSION(set->ctx->i915)) + return -ENODEV; /* not implement yet */ + + if (get_user(idx, &ext->engine_index)) + return -EFAULT; + + if (idx >= set->engines->num_engines) { + DRM_DEBUG("Invalid placement value, %d >= %d\n", + idx, set->engines->num_engines); + return -EINVAL; + } + + idx = array_index_nospec(idx, set->engines->num_engines); + if (set->engines->engines[idx]) { + DRM_DEBUG("Invalid placement[%d], already occupied\n", idx); + return -EEXIST; + } + + if (get_user(num_siblings, &ext->num_siblings)) + return -EFAULT; + + err = check_user_mbz(&ext->flags); + if (err) + return err; + + err = check_user_mbz(&ext->mbz64); + if (err) + return err; + + siblings = stack; + if (num_siblings > ARRAY_SIZE(stack)) { + siblings = kmalloc_array(num_siblings, + sizeof(*siblings), + GFP_KERNEL); + if (!siblings) + return -ENOMEM; + } + + for (n = 0; n < num_siblings; n++) { + struct i915_engine_class_instance ci; + + if (copy_from_user(&ci, &ext->engines[n], sizeof(ci))) { + err = -EFAULT; + goto out_siblings; + } + + siblings[n] = intel_engine_lookup_user(set->ctx->i915, + ci.engine_class, + ci.engine_instance); + if (!siblings[n]) { + DRM_DEBUG("Invalid sibling[%d]: { class:%d, inst:%d }\n", + n, ci.engine_class, ci.engine_instance); + err = -EINVAL; + goto out_siblings; + } + } + + ce = intel_execlists_create_virtual(set->ctx, siblings, n); + if (IS_ERR(ce)) { + err = PTR_ERR(ce); + goto out_siblings; + } + + if (cmpxchg(&set->engines->engines[idx], NULL, ce)) { + intel_context_put(ce); + err = -EEXIST; + goto out_siblings; + } + +out_siblings: + if (siblings != stack) + kfree(siblings); + + return err; +} + +static int +set_engines__bond(struct i915_user_extension __user *base, void *data) +{ + struct i915_context_engines_bond __user *ext = + container_of_user(base, typeof(*ext), base); + const struct set_engines *set = data; + struct i915_engine_class_instance ci; + struct intel_engine_cs *virtual; + struct intel_engine_cs *master; + u16 idx, num_bonds; + int err, n; + + if (get_user(idx, &ext->virtual_index)) + return -EFAULT; + + if (idx >= set->engines->num_engines) { + DRM_DEBUG("Invalid index for virtual engine: %d >= %d\n", + idx, set->engines->num_engines); + return -EINVAL; + } + + idx = array_index_nospec(idx, set->engines->num_engines); + if (!set->engines->engines[idx]) { + DRM_DEBUG("Invalid engine at %d\n", idx); + return -EINVAL; + } + virtual = set->engines->engines[idx]->engine; + + err = check_user_mbz(&ext->flags); + if (err) + return err; + + for (n = 0; n < ARRAY_SIZE(ext->mbz64); n++) { + err = check_user_mbz(&ext->mbz64[n]); + if (err) + return err; + } + + if (copy_from_user(&ci, &ext->master, sizeof(ci))) + return -EFAULT; + + master = intel_engine_lookup_user(set->ctx->i915, + ci.engine_class, ci.engine_instance); + if (!master) { + DRM_DEBUG("Unrecognised master engine: { class:%u, instance:%u }\n", + ci.engine_class, ci.engine_instance); + return -EINVAL; + } + + if (get_user(num_bonds, &ext->num_bonds)) + return -EFAULT; + + for (n = 0; n < num_bonds; n++) { + struct intel_engine_cs *bond; + + if (copy_from_user(&ci, &ext->engines[n], sizeof(ci))) + return -EFAULT; + + bond = intel_engine_lookup_user(set->ctx->i915, + ci.engine_class, + ci.engine_instance); + if (!bond) { + DRM_DEBUG("Unrecognised engine[%d] for bonding: { class:%d, instance: %d }\n", + n, ci.engine_class, ci.engine_instance); + return -EINVAL; + } + + /* + * A non-virtual engine has no siblings to choose between; and + * a submit fence will always be directed to the one engine. + */ + if (intel_engine_is_virtual(virtual)) { + err = intel_virtual_engine_attach_bond(virtual, + master, + bond); + if (err) + return err; + } + } + + return 0; +} + +static const i915_user_extension_fn set_engines__extensions[] = { + [I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE] = set_engines__load_balance, + [I915_CONTEXT_ENGINES_EXT_BOND] = set_engines__bond, +}; + +static int +set_engines(struct i915_gem_context *ctx, + const struct drm_i915_gem_context_param *args) +{ + struct i915_context_param_engines __user *user = + u64_to_user_ptr(args->value); + struct set_engines set = { .ctx = ctx }; + unsigned int num_engines, n; + u64 extensions; + int err; + + if (!args->size) { /* switch back to legacy user_ring_map */ + if (!i915_gem_context_user_engines(ctx)) + return 0; + + set.engines = default_engines(ctx); + if (IS_ERR(set.engines)) + return PTR_ERR(set.engines); + + goto replace; + } + + BUILD_BUG_ON(!IS_ALIGNED(sizeof(*user), sizeof(*user->engines))); + if (args->size < sizeof(*user) || + !IS_ALIGNED(args->size, sizeof(*user->engines))) { + DRM_DEBUG("Invalid size for engine array: %d\n", + args->size); + return -EINVAL; + } + + /* + * Note that I915_EXEC_RING_MASK limits execbuf to only using the + * first 64 engines defined here. + */ + num_engines = (args->size - sizeof(*user)) / sizeof(*user->engines); + + set.engines = kmalloc(struct_size(set.engines, engines, num_engines), + GFP_KERNEL); + if (!set.engines) + return -ENOMEM; + + set.engines->i915 = ctx->i915; + for (n = 0; n < num_engines; n++) { + struct i915_engine_class_instance ci; + struct intel_engine_cs *engine; + + if (copy_from_user(&ci, &user->engines[n], sizeof(ci))) { + __free_engines(set.engines, n); + return -EFAULT; + } + + if (ci.engine_class == (u16)I915_ENGINE_CLASS_INVALID && + ci.engine_instance == (u16)I915_ENGINE_CLASS_INVALID_NONE) { + set.engines->engines[n] = NULL; + continue; + } + + engine = intel_engine_lookup_user(ctx->i915, + ci.engine_class, + ci.engine_instance); + if (!engine) { + DRM_DEBUG("Invalid engine[%d]: { class:%d, instance:%d }\n", + n, ci.engine_class, ci.engine_instance); + __free_engines(set.engines, n); + return -ENOENT; + } + + set.engines->engines[n] = intel_context_create(ctx, engine); + if (!set.engines->engines[n]) { + __free_engines(set.engines, n); + return -ENOMEM; + } + } + set.engines->num_engines = num_engines; + + err = -EFAULT; + if (!get_user(extensions, &user->extensions)) + err = i915_user_extensions(u64_to_user_ptr(extensions), + set_engines__extensions, + ARRAY_SIZE(set_engines__extensions), + &set); + if (err) { + free_engines(set.engines); + return err; + } + +replace: + mutex_lock(&ctx->engines_mutex); + if (args->size) + i915_gem_context_set_user_engines(ctx); + else + i915_gem_context_clear_user_engines(ctx); + rcu_swap_protected(ctx->engines, set.engines, 1); + mutex_unlock(&ctx->engines_mutex); + + INIT_RCU_WORK(&set.engines->rcu, free_engines_rcu); + queue_rcu_work(system_wq, &set.engines->rcu); + return 0; } +static struct i915_gem_engines * +__copy_engines(struct i915_gem_engines *e) +{ + struct i915_gem_engines *copy; + unsigned int n; + + copy = kmalloc(struct_size(e, engines, e->num_engines), GFP_KERNEL); + if (!copy) + return ERR_PTR(-ENOMEM); + + copy->i915 = e->i915; + for (n = 0; n < e->num_engines; n++) { + if (e->engines[n]) + copy->engines[n] = intel_context_get(e->engines[n]); + else + copy->engines[n] = NULL; + } + copy->num_engines = n; + + return copy; +} + +static int +get_engines(struct i915_gem_context *ctx, + struct drm_i915_gem_context_param *args) +{ + struct i915_context_param_engines __user *user; + struct i915_gem_engines *e; + size_t n, count, size; + int err = 0; + + err = mutex_lock_interruptible(&ctx->engines_mutex); + if (err) + return err; + + e = NULL; + if (i915_gem_context_user_engines(ctx)) + e = __copy_engines(i915_gem_context_engines(ctx)); + mutex_unlock(&ctx->engines_mutex); + if (IS_ERR_OR_NULL(e)) { + args->size = 0; + return PTR_ERR_OR_ZERO(e); + } + + count = e->num_engines; + + /* Be paranoid in case we have an impedance mismatch */ + if (!check_struct_size(user, engines, count, &size)) { + err = -EINVAL; + goto err_free; + } + if (overflows_type(size, args->size)) { + err = -EINVAL; + goto err_free; + } + + if (!args->size) { + args->size = size; + goto err_free; + } + + if (args->size < size) { + err = -EINVAL; + goto err_free; + } + + user = u64_to_user_ptr(args->value); + if (!access_ok(user, size)) { + err = -EFAULT; + goto err_free; + } + + if (put_user(0, &user->extensions)) { + err = -EFAULT; + goto err_free; + } + + for (n = 0; n < count; n++) { + struct i915_engine_class_instance ci = { + .engine_class = I915_ENGINE_CLASS_INVALID, + .engine_instance = I915_ENGINE_CLASS_INVALID_NONE, + }; + + if (e->engines[n]) { + ci.engine_class = e->engines[n]->engine->uabi_class; + ci.engine_instance = e->engines[n]->engine->instance; + } + + if (copy_to_user(&user->engines[n], &ci, sizeof(ci))) { + err = -EFAULT; + goto err_free; + } + } + + args->size = size; + +err_free: + INIT_RCU_WORK(&e->rcu, free_engines_rcu); + queue_rcu_work(system_wq, &e->rcu); + return err; +} + static int ctx_setparam(struct drm_i915_file_private *fpriv, struct i915_gem_context *ctx, struct drm_i915_gem_context_param *args) @@ -1481,6 +1868,10 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv, ret = set_ppgtt(fpriv, ctx, args); break; + case I915_CONTEXT_PARAM_ENGINES: + ret = set_engines(ctx, args); + break; + case I915_CONTEXT_PARAM_BAN_PERIOD: default: ret = -EINVAL; @@ -1509,8 +1900,229 @@ static int create_setparam(struct i915_user_extension __user *ext, void *data) return ctx_setparam(arg->fpriv, arg->ctx, &local.param); } +static int clone_engines(struct i915_gem_context *dst, + struct i915_gem_context *src) +{ + struct i915_gem_engines *e = i915_gem_context_lock_engines(src); + struct i915_gem_engines *clone; + bool user_engines; + unsigned long n; + + clone = kmalloc(struct_size(e, engines, e->num_engines), GFP_KERNEL); + if (!clone) + goto err_unlock; + + clone->i915 = dst->i915; + for (n = 0; n < e->num_engines; n++) { + struct intel_engine_cs *engine; + + if (!e->engines[n]) { + clone->engines[n] = NULL; + continue; + } + engine = e->engines[n]->engine; + + /* + * Virtual engines are singletons; they can only exist + * inside a single context, because they embed their + * HW context... As each virtual context implies a single + * timeline (each engine can only dequeue a single request + * at any time), it would be surprising for two contexts + * to use the same engine. So let's create a copy of + * the virtual engine instead. + */ + if (intel_engine_is_virtual(engine)) + clone->engines[n] = + intel_execlists_clone_virtual(dst, engine); + else + clone->engines[n] = intel_context_create(dst, engine); + if (IS_ERR_OR_NULL(clone->engines[n])) { + __free_engines(clone, n); + goto err_unlock; + } + } + clone->num_engines = n; + + user_engines = i915_gem_context_user_engines(src); + i915_gem_context_unlock_engines(src); + + free_engines(dst->engines); + RCU_INIT_POINTER(dst->engines, clone); + if (user_engines) + i915_gem_context_set_user_engines(dst); + else + i915_gem_context_clear_user_engines(dst); + return 0; + +err_unlock: + i915_gem_context_unlock_engines(src); + return -ENOMEM; +} + +static int clone_flags(struct i915_gem_context *dst, + struct i915_gem_context *src) +{ + dst->user_flags = src->user_flags; + return 0; +} + +static int clone_schedattr(struct i915_gem_context *dst, + struct i915_gem_context *src) +{ + dst->sched = src->sched; + return 0; +} + +static int clone_sseu(struct i915_gem_context *dst, + struct i915_gem_context *src) +{ + struct i915_gem_engines *e = i915_gem_context_lock_engines(src); + struct i915_gem_engines *clone; + unsigned long n; + int err; + + clone = dst->engines; /* no locking required; sole access */ + if (e->num_engines != clone->num_engines) { + err = -EINVAL; + goto unlock; + } + + for (n = 0; n < e->num_engines; n++) { + struct intel_context *ce = e->engines[n]; + + if (clone->engines[n]->engine->class != ce->engine->class) { + /* Must have compatible engine maps! */ + err = -EINVAL; + goto unlock; + } + + /* serialises with set_sseu */ + err = intel_context_lock_pinned(ce); + if (err) + goto unlock; + + clone->engines[n]->sseu = ce->sseu; + intel_context_unlock_pinned(ce); + } + + err = 0; +unlock: + i915_gem_context_unlock_engines(src); + return err; +} + +static int clone_timeline(struct i915_gem_context *dst, + struct i915_gem_context *src) +{ + if (src->timeline) { + GEM_BUG_ON(src->timeline == dst->timeline); + + if (dst->timeline) + i915_timeline_put(dst->timeline); + dst->timeline = i915_timeline_get(src->timeline); + } + + return 0; +} + +static int clone_vm(struct i915_gem_context *dst, + struct i915_gem_context *src) +{ + struct i915_hw_ppgtt *ppgtt; + + rcu_read_lock(); + do { + ppgtt = READ_ONCE(src->ppgtt); + if (!ppgtt) + break; + + if (!kref_get_unless_zero(&ppgtt->ref)) + continue; + + /* + * This ppgtt may have be reallocated between + * the read and the kref, and reassigned to a third + * context. In order to avoid inadvertent sharing + * of this ppgtt with that third context (and not + * src), we have to confirm that we have the same + * ppgtt after passing through the strong memory + * barrier implied by a successful + * kref_get_unless_zero(). + * + * Once we have acquired the current ppgtt of src, + * we no longer care if it is released from src, as + * it cannot be reallocated elsewhere. + */ + + if (ppgtt == READ_ONCE(src->ppgtt)) + break; + + i915_ppgtt_put(ppgtt); + } while (1); + rcu_read_unlock(); + + if (ppgtt) { + __assign_ppgtt(dst, ppgtt); + i915_ppgtt_put(ppgtt); + } + + return 0; +} + +static int create_clone(struct i915_user_extension __user *ext, void *data) +{ + static int (* const fn[])(struct i915_gem_context *dst, + struct i915_gem_context *src) = { +#define MAP(x, y) [ilog2(I915_CONTEXT_CLONE_##x)] = y + MAP(ENGINES, clone_engines), + MAP(FLAGS, clone_flags), + MAP(SCHEDATTR, clone_schedattr), + MAP(SSEU, clone_sseu), + MAP(TIMELINE, clone_timeline), + MAP(VM, clone_vm), +#undef MAP + }; + struct drm_i915_gem_context_create_ext_clone local; + const struct create_ext *arg = data; + struct i915_gem_context *dst = arg->ctx; + struct i915_gem_context *src; + int err, bit; + + if (copy_from_user(&local, ext, sizeof(local))) + return -EFAULT; + + BUILD_BUG_ON(GENMASK(BITS_PER_TYPE(local.flags) - 1, ARRAY_SIZE(fn)) != + I915_CONTEXT_CLONE_UNKNOWN); + + if (local.flags & I915_CONTEXT_CLONE_UNKNOWN) + return -EINVAL; + + if (local.rsvd) + return -EINVAL; + + rcu_read_lock(); + src = __i915_gem_context_lookup_rcu(arg->fpriv, local.clone_id); + rcu_read_unlock(); + if (!src) + return -ENOENT; + + GEM_BUG_ON(src == dst); + + for (bit = 0; bit < ARRAY_SIZE(fn); bit++) { + if (!(local.flags & BIT(bit))) + continue; + + err = fn[bit](dst, src); + if (err) + return err; + } + + return 0; +} + static const i915_user_extension_fn create_extensions[] = { [I915_CONTEXT_CREATE_EXT_SETPARAM] = create_setparam, + [I915_CONTEXT_CREATE_EXT_CLONE] = create_clone, }; static bool client_is_banned(struct drm_i915_file_private *file_priv) @@ -1610,8 +2222,9 @@ static int get_sseu(struct i915_gem_context *ctx, struct drm_i915_gem_context_param *args) { struct drm_i915_gem_context_param_sseu user_sseu; - struct intel_engine_cs *engine; struct intel_context *ce; + unsigned long lookup; + int err; if (args->size == 0) goto out; @@ -1622,25 +2235,33 @@ static int get_sseu(struct i915_gem_context *ctx, sizeof(user_sseu))) return -EFAULT; - if (user_sseu.flags || user_sseu.rsvd) + if (user_sseu.rsvd) return -EINVAL; - engine = intel_engine_lookup_user(ctx->i915, - user_sseu.engine.engine_class, - user_sseu.engine.engine_instance); - if (!engine) + if (user_sseu.flags & ~(I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX)) return -EINVAL; - ce = intel_context_pin_lock(ctx, engine); /* serialises with set_sseu */ + lookup = 0; + if (user_sseu.flags & I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX) + lookup |= LOOKUP_USER_INDEX; + + ce = lookup_user_engine(ctx, lookup, &user_sseu.engine); if (IS_ERR(ce)) return PTR_ERR(ce); + err = intel_context_lock_pinned(ce); /* serialises with set_sseu */ + if (err) { + intel_context_put(ce); + return err; + } + user_sseu.slice_mask = ce->sseu.slice_mask; user_sseu.subslice_mask = ce->sseu.subslice_mask; user_sseu.min_eus_per_subslice = ce->sseu.min_eus_per_subslice; user_sseu.max_eus_per_subslice = ce->sseu.max_eus_per_subslice; - intel_context_pin_unlock(ce); + intel_context_unlock_pinned(ce); + intel_context_put(ce); if (copy_to_user(u64_to_user_ptr(args->value), &user_sseu, sizeof(user_sseu))) @@ -1708,6 +2329,10 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, ret = get_ppgtt(file_priv, ctx, args); break; + case I915_CONTEXT_PARAM_ENGINES: + ret = get_engines(ctx, args); + break; + case I915_CONTEXT_PARAM_BAN_PERIOD: default: ret = -EINVAL; @@ -1801,6 +2426,23 @@ int __i915_gem_context_pin_hw_id(struct i915_gem_context *ctx) return err; } +/* GEM context-engines iterator: for_each_gem_engine() */ +struct intel_context * +i915_gem_engines_iter_next(struct i915_gem_engines_iter *it) +{ + const struct i915_gem_engines *e = it->engines; + struct intel_context *ctx; + + do { + if (it->idx >= e->num_engines) + return NULL; + + ctx = e->engines[it->idx++]; + } while (!ctx); + + return ctx; +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/mock_context.c" #include "selftests/i915_gem_context.c" diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h index 23dcb01bfd82f50b3f02992b0cd6001e7ecca019..9ad4a636243811e44b807d3a67cbf24ad39af2df 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.h +++ b/drivers/gpu/drm/i915/i915_gem_context.h @@ -27,9 +27,10 @@ #include "i915_gem_context_types.h" +#include "gt/intel_context.h" + #include "i915_gem.h" #include "i915_scheduler.h" -#include "intel_context.h" #include "intel_device_info.h" struct drm_device; @@ -111,6 +112,24 @@ static inline void i915_gem_context_set_force_single_submission(struct i915_gem_ __set_bit(CONTEXT_FORCE_SINGLE_SUBMISSION, &ctx->flags); } +static inline bool +i915_gem_context_user_engines(const struct i915_gem_context *ctx) +{ + return test_bit(CONTEXT_USER_ENGINES, &ctx->flags); +} + +static inline void +i915_gem_context_set_user_engines(struct i915_gem_context *ctx) +{ + set_bit(CONTEXT_USER_ENGINES, &ctx->flags); +} + +static inline void +i915_gem_context_clear_user_engines(struct i915_gem_context *ctx) +{ + clear_bit(CONTEXT_USER_ENGINES, &ctx->flags); +} + int __i915_gem_context_pin_hw_id(struct i915_gem_context *ctx); static inline int i915_gem_context_pin_hw_id(struct i915_gem_context *ctx) { @@ -140,10 +159,6 @@ int i915_gem_context_open(struct drm_i915_private *i915, struct drm_file *file); void i915_gem_context_close(struct drm_file *file); -int i915_switch_context(struct i915_request *rq); -int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915, - intel_engine_mask_t engine_mask); - void i915_gem_context_release(struct kref *ctx_ref); struct i915_gem_context * i915_gem_context_create_gvt(struct drm_device *dev); @@ -179,6 +194,64 @@ static inline void i915_gem_context_put(struct i915_gem_context *ctx) kref_put(&ctx->ref, i915_gem_context_release); } +static inline struct i915_gem_engines * +i915_gem_context_engines(struct i915_gem_context *ctx) +{ + return rcu_dereference_protected(ctx->engines, + lockdep_is_held(&ctx->engines_mutex)); +} + +static inline struct i915_gem_engines * +i915_gem_context_lock_engines(struct i915_gem_context *ctx) + __acquires(&ctx->engines_mutex) +{ + mutex_lock(&ctx->engines_mutex); + return i915_gem_context_engines(ctx); +} + +static inline void +i915_gem_context_unlock_engines(struct i915_gem_context *ctx) + __releases(&ctx->engines_mutex) +{ + mutex_unlock(&ctx->engines_mutex); +} + +static inline struct intel_context * +i915_gem_context_lookup_engine(struct i915_gem_context *ctx, unsigned int idx) +{ + return i915_gem_context_engines(ctx)->engines[idx]; +} + +static inline struct intel_context * +i915_gem_context_get_engine(struct i915_gem_context *ctx, unsigned int idx) +{ + struct intel_context *ce = ERR_PTR(-EINVAL); + + rcu_read_lock(); { + struct i915_gem_engines *e = rcu_dereference(ctx->engines); + if (likely(idx < e->num_engines && e->engines[idx])) + ce = intel_context_get(e->engines[idx]); + } rcu_read_unlock(); + + return ce; +} + +static inline void +i915_gem_engines_iter_init(struct i915_gem_engines_iter *it, + struct i915_gem_engines *engines) +{ + GEM_BUG_ON(!engines); + it->engines = engines; + it->idx = 0; +} + +struct intel_context * +i915_gem_engines_iter_next(struct i915_gem_engines_iter *it); + +#define for_each_gem_engine(ce, engines, it) \ + for (i915_gem_engines_iter_init(&(it), (engines)); \ + ((ce) = i915_gem_engines_iter_next(&(it)));) + struct i915_lut_handle *i915_lut_handle_alloc(void); void i915_lut_handle_free(struct i915_lut_handle *lut); diff --git a/drivers/gpu/drm/i915/i915_gem_context_types.h b/drivers/gpu/drm/i915/i915_gem_context_types.h index e2ec58b10fb286f340ab6c7966223571970b8d5a..fb965ded2508fa7868f17a3cddb0d24da594fa81 100644 --- a/drivers/gpu/drm/i915/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/i915_gem_context_types.h @@ -17,8 +17,9 @@ #include #include +#include "gt/intel_context_types.h" + #include "i915_scheduler.h" -#include "intel_context_types.h" struct pid; @@ -28,6 +29,18 @@ struct i915_hw_ppgtt; struct i915_timeline; struct intel_ring; +struct i915_gem_engines { + struct rcu_work rcu; + struct drm_i915_private *i915; + unsigned int num_engines; + struct intel_context *engines[]; +}; + +struct i915_gem_engines_iter { + unsigned int idx; + const struct i915_gem_engines *engines; +}; + /** * struct i915_gem_context - client state * @@ -41,6 +54,30 @@ struct i915_gem_context { /** file_priv: owning file descriptor */ struct drm_i915_file_private *file_priv; + /** + * @engines: User defined engines for this context + * + * Various uAPI offer the ability to lookup up an + * index from this array to select an engine operate on. + * + * Multiple logically distinct instances of the same engine + * may be defined in the array, as well as composite virtual + * engines. + * + * Execbuf uses the I915_EXEC_RING_MASK as an index into this + * array to select which HW context + engine to execute on. For + * the default array, the user_ring_map[] is used to translate + * the legacy uABI onto the approprate index (e.g. both + * I915_EXEC_DEFAULT and I915_EXEC_RENDER select the same + * context, and I915_EXEC_BSD is weird). For a use defined + * array, execbuf uses I915_EXEC_RING_MASK as a plain index. + * + * User defined by I915_CONTEXT_PARAM_ENGINE (when the + * CONTEXT_USER_ENGINES flag is set). + */ + struct i915_gem_engines __rcu *engines; + struct mutex engines_mutex; /* guards writes to engines */ + struct i915_timeline *timeline; /** @@ -109,6 +146,7 @@ struct i915_gem_context { #define CONTEXT_BANNED 0 #define CONTEXT_CLOSED 1 #define CONTEXT_FORCE_SINGLE_SUBMISSION 2 +#define CONTEXT_USER_ENGINES 3 /** * @hw_id: - unique identifier for the context @@ -128,15 +166,10 @@ struct i915_gem_context { atomic_t hw_id_pin_count; struct list_head hw_id_link; - struct list_head active_engines; struct mutex mutex; struct i915_sched_attr sched; - /** hw_contexts: per-engine logical HW state */ - struct rb_root hw_contexts; - spinlock_t hw_contexts_lock; - /** ring_size: size for allocating the per-engine ring buffer */ u32 ring_size; /** desc_template: invariant fields for the HW context descriptor */ diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 060f5903544a042427f195e95c0bf057b129a029..0bdb3e072ba5fe4aec081ae61e7aeabf225ec70d 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -36,15 +36,8 @@ I915_SELFTEST_DECLARE(static struct igt_evict_ctl { bool fail_if_busy:1; } igt_evict_ctl;) -static bool ggtt_is_idle(struct drm_i915_private *i915) -{ - return !i915->gt.active_requests; -} - static int ggtt_flush(struct drm_i915_private *i915) { - int err; - /* * Not everything in the GGTT is tracked via vma (otherwise we * could evict as required with minimal stalling) so we are forced @@ -52,19 +45,10 @@ static int ggtt_flush(struct drm_i915_private *i915) * the hopes that we can then remove contexts and the like only * bound by their active reference. */ - err = i915_gem_switch_to_kernel_context(i915, i915->gt.active_engines); - if (err) - return err; - - err = i915_gem_wait_for_idle(i915, - I915_WAIT_INTERRUPTIBLE | - I915_WAIT_LOCKED, - MAX_SCHEDULE_TIMEOUT); - if (err) - return err; - - GEM_BUG_ON(!ggtt_is_idle(i915)); - return 0; + return i915_gem_wait_for_idle(i915, + I915_WAIT_INTERRUPTIBLE | + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); } static bool @@ -222,24 +206,17 @@ i915_gem_evict_something(struct i915_address_space *vm, * us a termination condition, when the last retired context is * the kernel's there is no more we can evict. */ - if (!ggtt_is_idle(dev_priv)) { - if (I915_SELFTEST_ONLY(igt_evict_ctl.fail_if_busy)) - return -EBUSY; + if (I915_SELFTEST_ONLY(igt_evict_ctl.fail_if_busy)) + return -EBUSY; - ret = ggtt_flush(dev_priv); - if (ret) - return ret; + ret = ggtt_flush(dev_priv); + if (ret) + return ret; - cond_resched(); - goto search_again; - } + cond_resched(); - /* - * If we still have pending pageflip completions, drop - * back to userspace to give our workqueues time to - * acquire our locks and unpin the old scanouts. - */ - return intel_has_pending_fb_unpin(dev_priv) ? -EAGAIN : -ENOSPC; + flags |= PIN_NONBLOCK; + goto search_again; found: /* drm_mm doesn't allow any other other operations while diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index c83d2a195d150b68e27482fb821f2494033b65af..8b85c91c3ea4cf77c7ada1610ca16cb60831fcb0 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -34,6 +34,8 @@ #include #include +#include "gt/intel_gt_pm.h" + #include "i915_drv.h" #include "i915_gem_clflush.h" #include "i915_trace.h" @@ -236,7 +238,8 @@ struct i915_execbuffer { unsigned int *flags; struct intel_engine_cs *engine; /** engine to queue the request to */ - struct i915_gem_context *ctx; /** context for building the request */ + struct intel_context *context; /* logical state for the request */ + struct i915_gem_context *gem_context; /** caller's context */ struct i915_address_space *vm; /** GTT and vma for the request */ struct i915_request *request; /** our request to build */ @@ -738,7 +741,7 @@ static int eb_select_context(struct i915_execbuffer *eb) if (unlikely(!ctx)) return -ENOENT; - eb->ctx = ctx; + eb->gem_context = ctx; if (ctx->ppgtt) { eb->vm = &ctx->ppgtt->vm; eb->invalid_flags |= EXEC_OBJECT_NEEDS_GTT; @@ -784,7 +787,6 @@ static struct i915_request *__eb_wait_for_ring(struct intel_ring *ring) static int eb_wait_for_ring(const struct i915_execbuffer *eb) { - const struct intel_context *ce; struct i915_request *rq; int ret = 0; @@ -794,11 +796,7 @@ static int eb_wait_for_ring(const struct i915_execbuffer *eb) * keeping all of their resources pinned. */ - ce = intel_context_lookup(eb->ctx, eb->engine); - if (!ce || !ce->ring) /* first use, assume empty! */ - return 0; - - rq = __eb_wait_for_ring(ce->ring); + rq = __eb_wait_for_ring(eb->context->ring); if (rq) { mutex_unlock(&eb->i915->drm.struct_mutex); @@ -817,15 +815,15 @@ static int eb_wait_for_ring(const struct i915_execbuffer *eb) static int eb_lookup_vmas(struct i915_execbuffer *eb) { - struct radix_tree_root *handles_vma = &eb->ctx->handles_vma; + struct radix_tree_root *handles_vma = &eb->gem_context->handles_vma; struct drm_i915_gem_object *obj; unsigned int i, batch; int err; - if (unlikely(i915_gem_context_is_closed(eb->ctx))) + if (unlikely(i915_gem_context_is_closed(eb->gem_context))) return -ENOENT; - if (unlikely(i915_gem_context_is_banned(eb->ctx))) + if (unlikely(i915_gem_context_is_banned(eb->gem_context))) return -EIO; INIT_LIST_HEAD(&eb->relocs); @@ -870,8 +868,8 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb) if (!vma->open_count++) i915_vma_reopen(vma); list_add(&lut->obj_link, &obj->lut_list); - list_add(&lut->ctx_link, &eb->ctx->handles_list); - lut->ctx = eb->ctx; + list_add(&lut->ctx_link, &eb->gem_context->handles_list); + lut->ctx = eb->gem_context; lut->handle = handle; add_vma: @@ -1227,7 +1225,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, if (err) goto err_unmap; - rq = i915_request_alloc(eb->engine, eb->ctx); + rq = i915_request_create(eb->context); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto err_unpin; @@ -2079,9 +2077,7 @@ gen8_dispatch_bsd_engine(struct drm_i915_private *dev_priv, return file_priv->bsd_engine; } -#define I915_USER_RINGS (4) - -static const enum intel_engine_id user_ring_map[I915_USER_RINGS + 1] = { +static const enum intel_engine_id user_ring_map[] = { [I915_EXEC_DEFAULT] = RCS0, [I915_EXEC_RENDER] = RCS0, [I915_EXEC_BLT] = BCS0, @@ -2089,31 +2085,57 @@ static const enum intel_engine_id user_ring_map[I915_USER_RINGS + 1] = { [I915_EXEC_VEBOX] = VECS0 }; -static struct intel_engine_cs * -eb_select_engine(struct drm_i915_private *dev_priv, - struct drm_file *file, - struct drm_i915_gem_execbuffer2 *args) +static int eb_pin_context(struct i915_execbuffer *eb, struct intel_context *ce) { - unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK; - struct intel_engine_cs *engine; + int err; - if (user_ring_id > I915_USER_RINGS) { - DRM_DEBUG("execbuf with unknown ring: %u\n", user_ring_id); - return NULL; - } + /* + * ABI: Before userspace accesses the GPU (e.g. execbuffer), report + * EIO if the GPU is already wedged. + */ + err = i915_terminally_wedged(eb->i915); + if (err) + return err; + + /* + * Pinning the contexts may generate requests in order to acquire + * GGTT space, so do this first before we reserve a seqno for + * ourselves. + */ + err = intel_context_pin(ce); + if (err) + return err; + + eb->engine = ce->engine; + eb->context = ce; + return 0; +} + +static void eb_unpin_context(struct i915_execbuffer *eb) +{ + intel_context_unpin(eb->context); +} - if ((user_ring_id != I915_EXEC_BSD) && - ((args->flags & I915_EXEC_BSD_MASK) != 0)) { +static unsigned int +eb_select_legacy_ring(struct i915_execbuffer *eb, + struct drm_file *file, + struct drm_i915_gem_execbuffer2 *args) +{ + struct drm_i915_private *i915 = eb->i915; + unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK; + + if (user_ring_id != I915_EXEC_BSD && + (args->flags & I915_EXEC_BSD_MASK)) { DRM_DEBUG("execbuf with non bsd ring but with invalid " "bsd dispatch flags: %d\n", (int)(args->flags)); - return NULL; + return -1; } - if (user_ring_id == I915_EXEC_BSD && HAS_ENGINE(dev_priv, VCS1)) { + if (user_ring_id == I915_EXEC_BSD && HAS_ENGINE(i915, VCS1)) { unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK; if (bsd_idx == I915_EXEC_BSD_DEFAULT) { - bsd_idx = gen8_dispatch_bsd_engine(dev_priv, file); + bsd_idx = gen8_dispatch_bsd_engine(i915, file); } else if (bsd_idx >= I915_EXEC_BSD_RING1 && bsd_idx <= I915_EXEC_BSD_RING2) { bsd_idx >>= I915_EXEC_BSD_SHIFT; @@ -2121,20 +2143,42 @@ eb_select_engine(struct drm_i915_private *dev_priv, } else { DRM_DEBUG("execbuf with unknown bsd ring: %u\n", bsd_idx); - return NULL; + return -1; } - engine = dev_priv->engine[_VCS(bsd_idx)]; - } else { - engine = dev_priv->engine[user_ring_map[user_ring_id]]; + return _VCS(bsd_idx); } - if (!engine) { - DRM_DEBUG("execbuf with invalid ring: %u\n", user_ring_id); - return NULL; + if (user_ring_id >= ARRAY_SIZE(user_ring_map)) { + DRM_DEBUG("execbuf with unknown ring: %u\n", user_ring_id); + return -1; } - return engine; + return user_ring_map[user_ring_id]; +} + +static int +eb_select_engine(struct i915_execbuffer *eb, + struct drm_file *file, + struct drm_i915_gem_execbuffer2 *args) +{ + struct intel_context *ce; + unsigned int idx; + int err; + + if (i915_gem_context_user_engines(eb->gem_context)) + idx = args->flags & I915_EXEC_RING_MASK; + else + idx = eb_select_legacy_ring(eb, file, args); + + ce = i915_gem_context_get_engine(eb->gem_context, idx); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + err = eb_pin_context(eb, ce); + intel_context_put(ce); + + return err; } static void @@ -2275,8 +2319,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, { struct i915_execbuffer eb; struct dma_fence *in_fence = NULL; + struct dma_fence *exec_fence = NULL; struct sync_file *out_fence = NULL; - intel_wakeref_t wakeref; int out_fence_fd = -1; int err; @@ -2318,11 +2362,24 @@ i915_gem_do_execbuffer(struct drm_device *dev, return -EINVAL; } + if (args->flags & I915_EXEC_FENCE_SUBMIT) { + if (in_fence) { + err = -EINVAL; + goto err_in_fence; + } + + exec_fence = sync_file_get_fence(lower_32_bits(args->rsvd2)); + if (!exec_fence) { + err = -EINVAL; + goto err_in_fence; + } + } + if (args->flags & I915_EXEC_FENCE_OUT) { out_fence_fd = get_unused_fd_flags(O_CLOEXEC); if (out_fence_fd < 0) { err = out_fence_fd; - goto err_in_fence; + goto err_exec_fence; } } @@ -2336,12 +2393,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, if (unlikely(err)) goto err_destroy; - eb.engine = eb_select_engine(eb.i915, file, args); - if (!eb.engine) { - err = -EINVAL; - goto err_engine; - } - /* * Take a local wakeref for preparing to dispatch the execbuf as * we expect to access the hardware fairly frequently in the @@ -2349,16 +2400,20 @@ i915_gem_do_execbuffer(struct drm_device *dev, * wakeref that we hold until the GPU has been idle for at least * 100ms. */ - wakeref = intel_runtime_pm_get(eb.i915); + intel_gt_pm_get(eb.i915); err = i915_mutex_lock_interruptible(dev); if (err) goto err_rpm; - err = eb_wait_for_ring(&eb); /* may temporarily drop struct_mutex */ + err = eb_select_engine(&eb, file, args); if (unlikely(err)) goto err_unlock; + err = eb_wait_for_ring(&eb); /* may temporarily drop struct_mutex */ + if (unlikely(err)) + goto err_engine; + err = eb_relocate(&eb); if (err) { /* @@ -2442,7 +2497,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, GEM_BUG_ON(eb.reloc_cache.rq); /* Allocate a request for this batch buffer nice and early. */ - eb.request = i915_request_alloc(eb.engine, eb.ctx); + eb.request = i915_request_create(eb.context); if (IS_ERR(eb.request)) { err = PTR_ERR(eb.request); goto err_batch_unpin; @@ -2454,6 +2509,13 @@ i915_gem_do_execbuffer(struct drm_device *dev, goto err_request; } + if (exec_fence) { + err = i915_request_await_execution(eb.request, exec_fence, + eb.engine->bond_execute); + if (err < 0) + goto err_request; + } + if (fences) { err = await_fence_array(&eb, fences); if (err) @@ -2480,8 +2542,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, trace_i915_request_queue(eb.request, eb.batch_flags); err = eb_submit(&eb); err_request: - i915_request_add(eb.request); add_to_client(eb.request, file); + i915_request_add(eb.request); if (fences) signal_fence_array(&eb, fences); @@ -2503,17 +2565,20 @@ i915_gem_do_execbuffer(struct drm_device *dev, err_vma: if (eb.exec) eb_release_vmas(&eb); +err_engine: + eb_unpin_context(&eb); err_unlock: mutex_unlock(&dev->struct_mutex); err_rpm: - intel_runtime_pm_put(eb.i915, wakeref); -err_engine: - i915_gem_context_put(eb.ctx); + intel_gt_pm_put(eb.i915); + i915_gem_context_put(eb.gem_context); err_destroy: eb_destroy(&eb); err_out_fence: if (out_fence_fd != -1) put_unused_fd(out_fence_fd); +err_exec_fence: + dma_fence_put(exec_fence); err_in_fence: dma_fence_put(in_fence); return err; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 8f460cc4cc1f6170e49dfa0fc06fd5f89e4318bc..266baa11df64b6e4379160f1936eee2b417f6a73 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -37,7 +37,6 @@ #include "i915_drv.h" #include "i915_vgpu.h" -#include "i915_reset.h" #include "i915_trace.h" #include "intel_drv.h" #include "intel_frontbuffer.h" @@ -1829,11 +1828,62 @@ static void gen6_ppgtt_free_pd(struct gen6_hw_ppgtt *ppgtt) free_pt(&ppgtt->base.vm, pt); } +struct gen6_ppgtt_cleanup_work { + struct work_struct base; + struct i915_vma *vma; +}; + +static void gen6_ppgtt_cleanup_work(struct work_struct *wrk) +{ + struct gen6_ppgtt_cleanup_work *work = + container_of(wrk, typeof(*work), base); + /* Side note, vma->vm is the GGTT not the ppgtt we just destroyed! */ + struct drm_i915_private *i915 = work->vma->vm->i915; + + mutex_lock(&i915->drm.struct_mutex); + i915_vma_destroy(work->vma); + mutex_unlock(&i915->drm.struct_mutex); + + kfree(work); +} + +static int nop_set_pages(struct i915_vma *vma) +{ + return -ENODEV; +} + +static void nop_clear_pages(struct i915_vma *vma) +{ +} + +static int nop_bind(struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 unused) +{ + return -ENODEV; +} + +static void nop_unbind(struct i915_vma *vma) +{ +} + +static const struct i915_vma_ops nop_vma_ops = { + .set_pages = nop_set_pages, + .clear_pages = nop_clear_pages, + .bind_vma = nop_bind, + .unbind_vma = nop_unbind, +}; + static void gen6_ppgtt_cleanup(struct i915_address_space *vm) { struct gen6_hw_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm)); + struct gen6_ppgtt_cleanup_work *work = ppgtt->work; - i915_vma_destroy(ppgtt->vma); + /* FIXME remove the struct_mutex to bring the locking under control */ + INIT_WORK(&work->base, gen6_ppgtt_cleanup_work); + work->vma = ppgtt->vma; + work->vma->ops = &nop_vma_ops; + schedule_work(&work->base); gen6_ppgtt_free_pd(ppgtt); gen6_ppgtt_free_scratch(vm); @@ -2012,9 +2062,13 @@ static struct i915_hw_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915) ppgtt->base.vm.pte_encode = ggtt->vm.pte_encode; + ppgtt->work = kmalloc(sizeof(*ppgtt->work), GFP_KERNEL); + if (!ppgtt->work) + goto err_free; + err = gen6_ppgtt_init_scratch(ppgtt); if (err) - goto err_free; + goto err_work; ppgtt->vma = pd_vma_create(ppgtt, GEN6_PD_SIZE); if (IS_ERR(ppgtt->vma)) { @@ -2026,6 +2080,8 @@ static struct i915_hw_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915) err_scratch: gen6_ppgtt_free_scratch(&ppgtt->base.vm); +err_work: + kfree(ppgtt->work); err_free: kfree(ppgtt); return ERR_PTR(err); @@ -2752,6 +2808,12 @@ int i915_gem_init_ggtt(struct drm_i915_private *dev_priv) if (ret) return ret; + if (USES_GUC(dev_priv)) { + ret = intel_guc_reserve_ggtt_top(&dev_priv->guc); + if (ret) + goto err_reserve; + } + /* Clear any non-preallocated blocks */ drm_mm_for_each_hole(entry, &ggtt->vm.mm, hole_start, hole_end) { DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n", @@ -2766,12 +2828,14 @@ int i915_gem_init_ggtt(struct drm_i915_private *dev_priv) if (INTEL_PPGTT(dev_priv) == INTEL_PPGTT_ALIASING) { ret = i915_gem_init_aliasing_ppgtt(dev_priv); if (ret) - goto err; + goto err_appgtt; } return 0; -err: +err_appgtt: + intel_guc_release_ggtt_top(&dev_priv->guc); +err_reserve: drm_mm_remove_node(&ggtt->error_capture); return ret; } @@ -2797,6 +2861,8 @@ void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv) if (drm_mm_node_allocated(&ggtt->error_capture)) drm_mm_remove_node(&ggtt->error_capture); + intel_guc_release_ggtt_top(&dev_priv->guc); + if (drm_mm_initialized(&ggtt->vm.mm)) { intel_vgt_deballoon(dev_priv); i915_address_space_fini(&ggtt->vm); @@ -3280,7 +3346,9 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt) size = gen6_get_total_gtt_size(snb_gmch_ctl); ggtt->vm.total = (size / sizeof(gen6_pte_t)) * I915_GTT_PAGE_SIZE; - ggtt->vm.clear_range = gen6_ggtt_clear_range; + ggtt->vm.clear_range = nop_clear_range; + if (!HAS_FULL_PPGTT(dev_priv) || intel_scanout_needs_vtd_wa(dev_priv)) + ggtt->vm.clear_range = gen6_ggtt_clear_range; ggtt->vm.insert_page = gen6_ggtt_insert_page; ggtt->vm.insert_entries = gen6_ggtt_insert_entries; ggtt->vm.cleanup = gen6_gmch_remove; @@ -3369,17 +3437,6 @@ int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv) if (ret) return ret; - /* Trim the GGTT to fit the GuC mappable upper range (when enabled). - * This is easier than doing range restriction on the fly, as we - * currently don't have any bits spare to pass in this upper - * restriction! - */ - if (USES_GUC(dev_priv)) { - ggtt->vm.total = min_t(u64, ggtt->vm.total, GUC_GGTT_TOP); - ggtt->mappable_end = - min_t(u64, ggtt->mappable_end, ggtt->vm.total); - } - if ((ggtt->vm.total - 1) >> 32) { DRM_ERROR("We never expected a Global GTT with more than 32bits" " of address space! Found %lldM!\n", @@ -3608,6 +3665,89 @@ intel_rotate_pages(struct intel_rotation_info *rot_info, return ERR_PTR(ret); } +static struct scatterlist * +remap_pages(struct drm_i915_gem_object *obj, unsigned int offset, + unsigned int width, unsigned int height, + unsigned int stride, + struct sg_table *st, struct scatterlist *sg) +{ + unsigned int row; + + for (row = 0; row < height; row++) { + unsigned int left = width * I915_GTT_PAGE_SIZE; + + while (left) { + dma_addr_t addr; + unsigned int length; + + /* We don't need the pages, but need to initialize + * the entries so the sg list can be happily traversed. + * The only thing we need are DMA addresses. + */ + + addr = i915_gem_object_get_dma_address_len(obj, offset, &length); + + length = min(left, length); + + st->nents++; + + sg_set_page(sg, NULL, length, 0); + sg_dma_address(sg) = addr; + sg_dma_len(sg) = length; + sg = sg_next(sg); + + offset += length / I915_GTT_PAGE_SIZE; + left -= length; + } + + offset += stride - width; + } + + return sg; +} + +static noinline struct sg_table * +intel_remap_pages(struct intel_remapped_info *rem_info, + struct drm_i915_gem_object *obj) +{ + unsigned int size = intel_remapped_info_size(rem_info); + struct sg_table *st; + struct scatterlist *sg; + int ret = -ENOMEM; + int i; + + /* Allocate target SG list. */ + st = kmalloc(sizeof(*st), GFP_KERNEL); + if (!st) + goto err_st_alloc; + + ret = sg_alloc_table(st, size, GFP_KERNEL); + if (ret) + goto err_sg_alloc; + + st->nents = 0; + sg = st->sgl; + + for (i = 0 ; i < ARRAY_SIZE(rem_info->plane); i++) { + sg = remap_pages(obj, rem_info->plane[i].offset, + rem_info->plane[i].width, rem_info->plane[i].height, + rem_info->plane[i].stride, st, sg); + } + + i915_sg_trim(st); + + return st; + +err_sg_alloc: + kfree(st); +err_st_alloc: + + DRM_DEBUG_DRIVER("Failed to create remapped mapping for object size %zu! (%ux%u tiles, %u pages)\n", + obj->base.size, rem_info->plane[0].width, rem_info->plane[0].height, size); + + return ERR_PTR(ret); +} + static noinline struct sg_table * intel_partial_pages(const struct i915_ggtt_view *view, struct drm_i915_gem_object *obj) @@ -3686,6 +3826,11 @@ i915_get_ggtt_vma_pages(struct i915_vma *vma) intel_rotate_pages(&vma->ggtt_view.rotated, vma->obj); break; + case I915_GGTT_VIEW_REMAPPED: + vma->pages = + intel_remap_pages(&vma->ggtt_view.remapped, vma->obj); + break; + case I915_GGTT_VIEW_PARTIAL: vma->pages = intel_partial_pages(&vma->ggtt_view, vma->obj); break; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index f597f35b109be26f15dee6cae9c1ae97f9db725b..38496039456b7f3430e70836c24ac3ebd074c7cf 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -38,8 +38,8 @@ #include #include +#include "gt/intel_reset.h" #include "i915_request.h" -#include "i915_reset.h" #include "i915_selftest.h" #include "i915_timeline.h" @@ -163,11 +163,18 @@ typedef u64 gen8_ppgtt_pml4e_t; struct sg_table; +struct intel_remapped_plane_info { + /* in gtt pages */ + unsigned int width, height, stride, offset; +} __packed; + +struct intel_remapped_info { + struct intel_remapped_plane_info plane[2]; + unsigned int unused_mbz; +} __packed; + struct intel_rotation_info { - struct intel_rotation_plane_info { - /* tiles */ - unsigned int width, height, stride, offset; - } plane[2]; + struct intel_remapped_plane_info plane[2]; } __packed; struct intel_partial_info { @@ -179,12 +186,20 @@ enum i915_ggtt_view_type { I915_GGTT_VIEW_NORMAL = 0, I915_GGTT_VIEW_ROTATED = sizeof(struct intel_rotation_info), I915_GGTT_VIEW_PARTIAL = sizeof(struct intel_partial_info), + I915_GGTT_VIEW_REMAPPED = sizeof(struct intel_remapped_info), }; static inline void assert_i915_gem_gtt_types(void) { BUILD_BUG_ON(sizeof(struct intel_rotation_info) != 8*sizeof(unsigned int)); BUILD_BUG_ON(sizeof(struct intel_partial_info) != sizeof(u64) + sizeof(unsigned int)); + BUILD_BUG_ON(sizeof(struct intel_remapped_info) != 9*sizeof(unsigned int)); + + /* Check that rotation/remapped shares offsets for simplicity */ + BUILD_BUG_ON(offsetof(struct intel_remapped_info, plane[0]) != + offsetof(struct intel_rotation_info, plane[0])); + BUILD_BUG_ON(offsetofend(struct intel_remapped_info, plane[1]) != + offsetofend(struct intel_rotation_info, plane[1])); /* As we encode the size of each branch inside the union into its type, * we have to be careful that each branch has a unique size. @@ -193,6 +208,7 @@ static inline void assert_i915_gem_gtt_types(void) case I915_GGTT_VIEW_NORMAL: case I915_GGTT_VIEW_PARTIAL: case I915_GGTT_VIEW_ROTATED: + case I915_GGTT_VIEW_REMAPPED: /* gcc complains if these are identical cases */ break; } @@ -204,6 +220,7 @@ struct i915_ggtt_view { /* Members need to contain no holes/padding */ struct intel_partial_info partial; struct intel_rotation_info rotated; + struct intel_remapped_info remapped; }; }; @@ -384,6 +401,7 @@ struct i915_ggtt { u32 pin_bias; struct drm_mm_node error_capture; + struct drm_mm_node uc_fw; }; struct i915_hw_ppgtt { @@ -396,8 +414,6 @@ struct i915_hw_ppgtt { struct i915_page_directory_pointer pdp; /* GEN8+ */ struct i915_page_directory pd; /* GEN6-7 */ }; - - u32 user_handle; }; struct gen6_hw_ppgtt { @@ -408,6 +424,8 @@ struct gen6_hw_ppgtt { unsigned int pin_count; bool scan_for_unused_pt; + + struct gen6_ppgtt_cleanup_work *work; }; #define __to_gen6_ppgtt(base) container_of(base, struct gen6_hw_ppgtt, base) diff --git a/drivers/gpu/drm/i915/i915_gem_internal.c b/drivers/gpu/drm/i915/i915_gem_internal.c index ab627ed1269c701a80670c848825321ee9137bb0..21662176819f209d2ea47ac9597e846c2428bc64 100644 --- a/drivers/gpu/drm/i915/i915_gem_internal.c +++ b/drivers/gpu/drm/i915/i915_gem_internal.c @@ -28,9 +28,6 @@ #define QUIET (__GFP_NORETRY | __GFP_NOWARN) #define MAYFAIL (__GFP_RETRY_MAYFAIL | __GFP_NOWARN) -/* convert swiotlb segment size into sensible units (pages)! */ -#define IO_TLB_SEGPAGES (IO_TLB_SEGSIZE << IO_TLB_SHIFT >> PAGE_SHIFT) - static void internal_free_pages(struct sg_table *st) { struct scatterlist *sg; diff --git a/drivers/gpu/drm/i915/i915_gem_pm.c b/drivers/gpu/drm/i915/i915_gem_pm.c new file mode 100644 index 0000000000000000000000000000000000000000..fa9c2ebd966ab84415161a322125b29121d70a10 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_gem_pm.c @@ -0,0 +1,250 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#include "gt/intel_gt_pm.h" + +#include "i915_drv.h" +#include "i915_gem_pm.h" +#include "i915_globals.h" + +static void i915_gem_park(struct drm_i915_private *i915) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + lockdep_assert_held(&i915->drm.struct_mutex); + + for_each_engine(engine, i915, id) + i915_gem_batch_pool_fini(&engine->batch_pool); + + i915_timelines_park(i915); + i915_vma_parked(i915); + + i915_globals_park(); +} + +static void idle_work_handler(struct work_struct *work) +{ + struct drm_i915_private *i915 = + container_of(work, typeof(*i915), gem.idle_work); + bool restart = true; + + cancel_delayed_work(&i915->gem.retire_work); + mutex_lock(&i915->drm.struct_mutex); + + intel_wakeref_lock(&i915->gt.wakeref); + if (!intel_wakeref_active(&i915->gt.wakeref) && !work_pending(work)) { + i915_gem_park(i915); + restart = false; + } + intel_wakeref_unlock(&i915->gt.wakeref); + + mutex_unlock(&i915->drm.struct_mutex); + if (restart) + queue_delayed_work(i915->wq, + &i915->gem.retire_work, + round_jiffies_up_relative(HZ)); +} + +static void retire_work_handler(struct work_struct *work) +{ + struct drm_i915_private *i915 = + container_of(work, typeof(*i915), gem.retire_work.work); + + /* Come back later if the device is busy... */ + if (mutex_trylock(&i915->drm.struct_mutex)) { + i915_retire_requests(i915); + mutex_unlock(&i915->drm.struct_mutex); + } + + queue_delayed_work(i915->wq, + &i915->gem.retire_work, + round_jiffies_up_relative(HZ)); +} + +static int pm_notifier(struct notifier_block *nb, + unsigned long action, + void *data) +{ + struct drm_i915_private *i915 = + container_of(nb, typeof(*i915), gem.pm_notifier); + + switch (action) { + case INTEL_GT_UNPARK: + i915_globals_unpark(); + queue_delayed_work(i915->wq, + &i915->gem.retire_work, + round_jiffies_up_relative(HZ)); + break; + + case INTEL_GT_PARK: + queue_work(i915->wq, &i915->gem.idle_work); + break; + } + + return NOTIFY_OK; +} + +static bool switch_to_kernel_context_sync(struct drm_i915_private *i915) +{ + bool result = true; + + do { + if (i915_gem_wait_for_idle(i915, + I915_WAIT_LOCKED | + I915_WAIT_FOR_IDLE_BOOST, + I915_GEM_IDLE_TIMEOUT) == -ETIME) { + /* XXX hide warning from gem_eio */ + if (i915_modparams.reset) { + dev_err(i915->drm.dev, + "Failed to idle engines, declaring wedged!\n"); + GEM_TRACE_DUMP(); + } + + /* + * Forcibly cancel outstanding work and leave + * the gpu quiet. + */ + i915_gem_set_wedged(i915); + result = false; + } + } while (i915_retire_requests(i915) && result); + + GEM_BUG_ON(i915->gt.awake); + return result; +} + +bool i915_gem_load_power_context(struct drm_i915_private *i915) +{ + return switch_to_kernel_context_sync(i915); +} + +void i915_gem_suspend(struct drm_i915_private *i915) +{ + GEM_TRACE("\n"); + + flush_workqueue(i915->wq); + + mutex_lock(&i915->drm.struct_mutex); + + /* + * We have to flush all the executing contexts to main memory so + * that they can saved in the hibernation image. To ensure the last + * context image is coherent, we have to switch away from it. That + * leaves the i915->kernel_context still active when + * we actually suspend, and its image in memory may not match the GPU + * state. Fortunately, the kernel_context is disposable and we do + * not rely on its state. + */ + switch_to_kernel_context_sync(i915); + + mutex_unlock(&i915->drm.struct_mutex); + + /* + * Assert that we successfully flushed all the work and + * reset the GPU back to its idle, low power state. + */ + GEM_BUG_ON(i915->gt.awake); + flush_work(&i915->gem.idle_work); + + cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work); + + i915_gem_drain_freed_objects(i915); + + intel_uc_suspend(i915); +} + +void i915_gem_suspend_late(struct drm_i915_private *i915) +{ + struct drm_i915_gem_object *obj; + struct list_head *phases[] = { + &i915->mm.unbound_list, + &i915->mm.bound_list, + NULL + }, **phase; + + /* + * Neither the BIOS, ourselves or any other kernel + * expects the system to be in execlists mode on startup, + * so we need to reset the GPU back to legacy mode. And the only + * known way to disable logical contexts is through a GPU reset. + * + * So in order to leave the system in a known default configuration, + * always reset the GPU upon unload and suspend. Afterwards we then + * clean up the GEM state tracking, flushing off the requests and + * leaving the system in a known idle state. + * + * Note that is of the upmost importance that the GPU is idle and + * all stray writes are flushed *before* we dismantle the backing + * storage for the pinned objects. + * + * However, since we are uncertain that resetting the GPU on older + * machines is a good idea, we don't - just in case it leaves the + * machine in an unusable condition. + */ + + mutex_lock(&i915->drm.struct_mutex); + for (phase = phases; *phase; phase++) { + list_for_each_entry(obj, *phase, mm.link) + WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false)); + } + mutex_unlock(&i915->drm.struct_mutex); + + intel_uc_sanitize(i915); + i915_gem_sanitize(i915); +} + +void i915_gem_resume(struct drm_i915_private *i915) +{ + GEM_TRACE("\n"); + + WARN_ON(i915->gt.awake); + + mutex_lock(&i915->drm.struct_mutex); + intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL); + + i915_gem_restore_gtt_mappings(i915); + i915_gem_restore_fences(i915); + + /* + * As we didn't flush the kernel context before suspend, we cannot + * guarantee that the context image is complete. So let's just reset + * it and start again. + */ + intel_gt_resume(i915); + + if (i915_gem_init_hw(i915)) + goto err_wedged; + + intel_uc_resume(i915); + + /* Always reload a context for powersaving. */ + if (!i915_gem_load_power_context(i915)) + goto err_wedged; + +out_unlock: + intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL); + mutex_unlock(&i915->drm.struct_mutex); + return; + +err_wedged: + if (!i915_reset_failed(i915)) { + dev_err(i915->drm.dev, + "Failed to re-initialize GPU, declaring it wedged!\n"); + i915_gem_set_wedged(i915); + } + goto out_unlock; +} + +void i915_gem_init__pm(struct drm_i915_private *i915) +{ + INIT_WORK(&i915->gem.idle_work, idle_work_handler); + INIT_DELAYED_WORK(&i915->gem.retire_work, retire_work_handler); + + i915->gem.pm_notifier.notifier_call = pm_notifier; + blocking_notifier_chain_register(&i915->gt.pm_notifications, + &i915->gem.pm_notifier); +} diff --git a/drivers/gpu/drm/i915/i915_gem_pm.h b/drivers/gpu/drm/i915/i915_gem_pm.h new file mode 100644 index 0000000000000000000000000000000000000000..6f7d5d11ac3b3695d76d30aa9844c960bd3e0c72 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_gem_pm.h @@ -0,0 +1,25 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2019 Intel Corporation + */ + +#ifndef __I915_GEM_PM_H__ +#define __I915_GEM_PM_H__ + +#include + +struct drm_i915_private; +struct work_struct; + +void i915_gem_init__pm(struct drm_i915_private *i915); + +bool i915_gem_load_power_context(struct drm_i915_private *i915); +void i915_gem_resume(struct drm_i915_private *i915); + +void i915_gem_idle_work_handler(struct work_struct *work); + +void i915_gem_suspend(struct drm_i915_private *i915); +void i915_gem_suspend_late(struct drm_i915_private *i915); + +#endif /* __I915_GEM_PM_H__ */ diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index 6da795c7e62e47a12207cca861efafb1ea0f45f3..588e3898b120f8c776d9effb18954ec0420abaa2 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -114,6 +114,67 @@ static bool unsafe_drop_pages(struct drm_i915_gem_object *obj) return !i915_gem_object_has_pages(obj); } +static void __start_writeback(struct drm_i915_gem_object *obj, + unsigned int flags) +{ + struct address_space *mapping; + struct writeback_control wbc = { + .sync_mode = WB_SYNC_NONE, + .nr_to_write = SWAP_CLUSTER_MAX, + .range_start = 0, + .range_end = LLONG_MAX, + .for_reclaim = 1, + }; + unsigned long i; + + lockdep_assert_held(&obj->mm.lock); + GEM_BUG_ON(i915_gem_object_has_pages(obj)); + + switch (obj->mm.madv) { + case I915_MADV_DONTNEED: + __i915_gem_object_truncate(obj); + case __I915_MADV_PURGED: + return; + } + + if (!obj->base.filp) + return; + + if (!(flags & I915_SHRINK_WRITEBACK)) + return; + + /* + * Leave mmapings intact (GTT will have been revoked on unbinding, + * leaving only CPU mmapings around) and add those pages to the LRU + * instead of invoking writeback so they are aged and paged out + * as normal. + */ + mapping = obj->base.filp->f_mapping; + + /* Begin writeback on each dirty page */ + for (i = 0; i < obj->base.size >> PAGE_SHIFT; i++) { + struct page *page; + + page = find_lock_entry(mapping, i); + if (!page || xa_is_value(page)) + continue; + + if (!page_mapped(page) && clear_page_dirty_for_io(page)) { + int ret; + + SetPageReclaim(page); + ret = mapping->a_ops->writepage(page, &wbc); + if (!PageWriteback(page)) + ClearPageReclaim(page); + if (!ret) + goto put; + } + unlock_page(page); +put: + put_page(page); + } +} + /** * i915_gem_shrink - Shrink buffer object caches * @i915: i915 device @@ -254,7 +315,7 @@ i915_gem_shrink(struct drm_i915_private *i915, mutex_lock_nested(&obj->mm.lock, I915_MM_SHRINKER); if (!i915_gem_object_has_pages(obj)) { - __i915_gem_object_invalidate(obj); + __start_writeback(obj, flags); count += obj->base.size >> PAGE_SHIFT; } mutex_unlock(&obj->mm.lock); @@ -366,13 +427,15 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) &sc->nr_scanned, I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | - I915_SHRINK_PURGEABLE); + I915_SHRINK_PURGEABLE | + I915_SHRINK_WRITEBACK); if (sc->nr_scanned < sc->nr_to_scan) freed += i915_gem_shrink(i915, sc->nr_to_scan - sc->nr_scanned, &sc->nr_scanned, I915_SHRINK_BOUND | - I915_SHRINK_UNBOUND); + I915_SHRINK_UNBOUND | + I915_SHRINK_WRITEBACK); if (sc->nr_scanned < sc->nr_to_scan && current_is_kswapd()) { intel_wakeref_t wakeref; @@ -382,7 +445,8 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) &sc->nr_scanned, I915_SHRINK_ACTIVE | I915_SHRINK_BOUND | - I915_SHRINK_UNBOUND); + I915_SHRINK_UNBOUND | + I915_SHRINK_WRITEBACK); } } @@ -404,7 +468,8 @@ i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr) with_intel_runtime_pm(i915, wakeref) freed_pages += i915_gem_shrink(i915, -1UL, NULL, I915_SHRINK_BOUND | - I915_SHRINK_UNBOUND); + I915_SHRINK_UNBOUND | + I915_SHRINK_WRITEBACK); /* Because we may be allocating inside our own driver, we cannot * assert that there are no objects with pinned pages that are not diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index f51ff683dd2ec8d369a9ea611ee681d873789765..4f85cbdddb0d7cdc11bc2c9b24e8c437636f8c37 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -36,8 +36,11 @@ #include -#include "i915_gpu_error.h" #include "i915_drv.h" +#include "i915_gpu_error.h" +#include "intel_atomic.h" +#include "intel_csr.h" +#include "intel_overlay.h" static inline const struct intel_engine_cs * engine_lookup(const struct drm_i915_private *i915, unsigned int id) diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h index 5dc761e85d9dfce9117df1feedcd96c93f1deb57..2ecd0c6a1c9492329e664a8b170d31a7c70cf580 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.h +++ b/drivers/gpu/drm/i915/i915_gpu_error.h @@ -13,8 +13,9 @@ #include +#include "gt/intel_engine.h" + #include "intel_device_info.h" -#include "intel_ringbuffer.h" #include "intel_uc_fw.h" #include "i915_gem.h" @@ -178,8 +179,6 @@ struct i915_gpu_state { struct scatterlist *sgl, *fit; }; -struct i915_gpu_restart; - struct i915_gpu_error { /* For hangcheck timer */ #define DRM_I915_HANGCHECK_PERIOD 1500 /* in ms */ @@ -240,8 +239,6 @@ struct i915_gpu_error { wait_queue_head_t reset_queue; struct srcu_struct reset_backoff_srcu; - - struct i915_gpu_restart *restart; }; struct drm_i915_error_state_buf { diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index b92cfd69134bb717e61031f5b7e9f6349896e478..233211fde0eabb17e8c4e28d714f5b6c66ff38da 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -38,8 +38,12 @@ #include #include "i915_drv.h" +#include "i915_irq.h" #include "i915_trace.h" #include "intel_drv.h" +#include "intel_fifo_underrun.h" +#include "intel_hotplug.h" +#include "intel_lpe_audio.h" #include "intel_psr.h" /** @@ -1301,7 +1305,7 @@ static void gen6_pm_rps_work(struct work_struct *work) if ((pm_iir & dev_priv->pm_rps_events) == 0 && !client_boost) goto out; - mutex_lock(&dev_priv->pcu_lock); + mutex_lock(&rps->lock); pm_iir |= vlv_wa_c0_ei(dev_priv, pm_iir); @@ -1367,7 +1371,7 @@ static void gen6_pm_rps_work(struct work_struct *work) rps->last_adj = 0; } - mutex_unlock(&dev_priv->pcu_lock); + mutex_unlock(&rps->lock); out: /* Make sure not to corrupt PMIMR state used by ringbuffer on GEN6 */ diff --git a/drivers/gpu/drm/i915/i915_irq.h b/drivers/gpu/drm/i915/i915_irq.h new file mode 100644 index 0000000000000000000000000000000000000000..0ccd0d90919d66d91fb53d28a3881dac2903db50 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_irq.h @@ -0,0 +1,114 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef __I915_IRQ_H__ +#define __I915_IRQ_H__ + +#include + +#include "i915_drv.h" + +struct drm_i915_private; +struct intel_crtc; + +extern void intel_irq_init(struct drm_i915_private *dev_priv); +extern void intel_irq_fini(struct drm_i915_private *dev_priv); +int intel_irq_install(struct drm_i915_private *dev_priv); +void intel_irq_uninstall(struct drm_i915_private *dev_priv); + +u32 i915_pipestat_enable_mask(struct drm_i915_private *dev_priv, + enum pipe pipe); +void +i915_enable_pipestat(struct drm_i915_private *dev_priv, enum pipe pipe, + u32 status_mask); + +void +i915_disable_pipestat(struct drm_i915_private *dev_priv, enum pipe pipe, + u32 status_mask); + +void valleyview_enable_display_irqs(struct drm_i915_private *dev_priv); +void valleyview_disable_display_irqs(struct drm_i915_private *dev_priv); + +void i915_hotplug_interrupt_update(struct drm_i915_private *dev_priv, + u32 mask, + u32 bits); +void ilk_update_display_irq(struct drm_i915_private *dev_priv, + u32 interrupt_mask, + u32 enabled_irq_mask); +static inline void +ilk_enable_display_irq(struct drm_i915_private *dev_priv, u32 bits) +{ + ilk_update_display_irq(dev_priv, bits, bits); +} +static inline void +ilk_disable_display_irq(struct drm_i915_private *dev_priv, u32 bits) +{ + ilk_update_display_irq(dev_priv, bits, 0); +} +void bdw_update_pipe_irq(struct drm_i915_private *dev_priv, + enum pipe pipe, + u32 interrupt_mask, + u32 enabled_irq_mask); +static inline void bdw_enable_pipe_irq(struct drm_i915_private *dev_priv, + enum pipe pipe, u32 bits) +{ + bdw_update_pipe_irq(dev_priv, pipe, bits, bits); +} +static inline void bdw_disable_pipe_irq(struct drm_i915_private *dev_priv, + enum pipe pipe, u32 bits) +{ + bdw_update_pipe_irq(dev_priv, pipe, bits, 0); +} +void ibx_display_interrupt_update(struct drm_i915_private *dev_priv, + u32 interrupt_mask, + u32 enabled_irq_mask); +static inline void +ibx_enable_display_interrupt(struct drm_i915_private *dev_priv, u32 bits) +{ + ibx_display_interrupt_update(dev_priv, bits, bits); +} +static inline void +ibx_disable_display_interrupt(struct drm_i915_private *dev_priv, u32 bits) +{ + ibx_display_interrupt_update(dev_priv, bits, 0); +} + +void gen5_enable_gt_irq(struct drm_i915_private *dev_priv, u32 mask); +void gen5_disable_gt_irq(struct drm_i915_private *dev_priv, u32 mask); +void gen6_mask_pm_irq(struct drm_i915_private *dev_priv, u32 mask); +void gen6_unmask_pm_irq(struct drm_i915_private *dev_priv, u32 mask); +void gen11_reset_rps_interrupts(struct drm_i915_private *dev_priv); +void gen6_reset_rps_interrupts(struct drm_i915_private *dev_priv); +void gen6_enable_rps_interrupts(struct drm_i915_private *dev_priv); +void gen6_disable_rps_interrupts(struct drm_i915_private *dev_priv); +void gen6_rps_reset_ei(struct drm_i915_private *dev_priv); + +static inline u32 gen6_sanitize_rps_pm_mask(const struct drm_i915_private *i915, + u32 mask) +{ + return mask & ~i915->gt_pm.rps.pm_intrmsk_mbz; +} + +void intel_runtime_pm_disable_interrupts(struct drm_i915_private *dev_priv); +void intel_runtime_pm_enable_interrupts(struct drm_i915_private *dev_priv); +static inline bool intel_irqs_enabled(struct drm_i915_private *dev_priv) +{ + /* + * We only use drm_irq_uninstall() at unload and VT switch, so + * this is the only thing we need to check. + */ + return dev_priv->runtime_pm.irqs_enabled; +} + +int intel_get_crtc_scanline(struct intel_crtc *crtc); +void gen8_irq_power_well_post_enable(struct drm_i915_private *dev_priv, + u8 pipe_mask); +void gen8_irq_power_well_pre_disable(struct drm_i915_private *dev_priv, + u8 pipe_mask); +void gen9_reset_guc_interrupts(struct drm_i915_private *dev_priv); +void gen9_enable_guc_interrupts(struct drm_i915_private *dev_priv); +void gen9_disable_guc_interrupts(struct drm_i915_private *dev_priv); + +#endif /* __I915_IRQ_H__ */ diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index f893c2cbce15fd29a226f9476135be4b9a0aede3..d7c07a9474979dcd7e5ebb4ebe0c9d26197b0ec0 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -370,6 +370,7 @@ static const struct intel_device_info intel_ironlake_m_info = { .has_llc = 1, \ .has_rc6 = 1, \ .has_rc6p = 1, \ + .has_rps = true, \ .ppgtt_type = INTEL_PPGTT_ALIASING, \ .ppgtt_size = 31, \ I9XX_PIPE_OFFSETS, \ @@ -417,6 +418,7 @@ static const struct intel_device_info intel_sandybridge_m_gt2_info = { .has_llc = 1, \ .has_rc6 = 1, \ .has_rc6p = 1, \ + .has_rps = true, \ .ppgtt_type = INTEL_PPGTT_FULL, \ .ppgtt_size = 31, \ IVB_PIPE_OFFSETS, \ @@ -470,6 +472,7 @@ static const struct intel_device_info intel_valleyview_info = { .num_pipes = 2, .has_runtime_pm = 1, .has_rc6 = 1, + .has_rps = true, .display.has_gmch = 1, .display.has_hotplug = 1, .ppgtt_type = INTEL_PPGTT_FULL, @@ -565,6 +568,7 @@ static const struct intel_device_info intel_cherryview_info = { .has_64bit_reloc = 1, .has_runtime_pm = 1, .has_rc6 = 1, + .has_rps = true, .has_logical_ring_contexts = 1, .display.has_gmch = 1, .ppgtt_type = INTEL_PPGTT_FULL, @@ -596,8 +600,6 @@ static const struct intel_device_info intel_cherryview_info = { #define SKL_PLATFORM \ GEN9_FEATURES, \ - /* Display WA #0477 WaDisableIPC: skl */ \ - .display.has_ipc = 0, \ PLATFORM(INTEL_SKYLAKE) static const struct intel_device_info intel_skylake_gt1_info = { @@ -640,6 +642,7 @@ static const struct intel_device_info intel_skylake_gt4_info = { .has_runtime_pm = 1, \ .display.has_csr = 1, \ .has_rc6 = 1, \ + .has_rps = true, \ .display.has_dp_mst = 1, \ .has_logical_ring_contexts = 1, \ .has_logical_ring_preemption = 1, \ diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 39a4804091d70d61a5fd63e7dfcb60d3edde4060..c4995d5a16d24f9b0f261671316a1cac50db299d 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -195,6 +195,8 @@ #include #include +#include "gt/intel_lrc_reg.h" + #include "i915_drv.h" #include "i915_oa_hsw.h" #include "i915_oa_bdw.h" @@ -210,7 +212,6 @@ #include "i915_oa_cflgt3.h" #include "i915_oa_cnl.h" #include "i915_oa_icl.h" -#include "intel_lrc_reg.h" /* HW requires this to be a power of two, between 128k and 16M, though driver * is currently generally designed assuming the largest 16M size is used such @@ -1202,28 +1203,35 @@ static int i915_oa_read(struct i915_perf_stream *stream, static struct intel_context *oa_pin_context(struct drm_i915_private *i915, struct i915_gem_context *ctx) { - struct intel_engine_cs *engine = i915->engine[RCS0]; + struct i915_gem_engines_iter it; struct intel_context *ce; - int ret; + int err; - ret = i915_mutex_lock_interruptible(&i915->drm); - if (ret) - return ERR_PTR(ret); + err = i915_mutex_lock_interruptible(&i915->drm); + if (err) + return ERR_PTR(err); - /* - * As the ID is the gtt offset of the context's vma we - * pin the vma to ensure the ID remains fixed. - * - * NB: implied RCS engine... - */ - ce = intel_context_pin(ctx, engine); - mutex_unlock(&i915->drm.struct_mutex); - if (IS_ERR(ce)) - return ce; + for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { + if (ce->engine->class != RENDER_CLASS) + continue; + + /* + * As the ID is the gtt offset of the context's vma we + * pin the vma to ensure the ID remains fixed. + */ + err = intel_context_pin(ce); + if (err == 0) { + i915->perf.oa.pinned_ctx = ce; + break; + } + } + i915_gem_context_unlock_engines(ctx); - i915->perf.oa.pinned_ctx = ce; + mutex_unlock(&i915->drm.struct_mutex); + if (err) + return ERR_PTR(err); - return ce; + return i915->perf.oa.pinned_ctx; } /** @@ -1679,7 +1687,7 @@ gen8_update_reg_state_unlocked(struct intel_context *ce, CTX_REG(reg_state, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, - gen8_make_rpcs(i915, &ce->sseu)); + intel_sseu_make_rpcs(i915, &ce->sseu)); } /* @@ -1709,7 +1717,6 @@ gen8_update_reg_state_unlocked(struct intel_context *ce, static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, const struct i915_oa_config *oa_config) { - struct intel_engine_cs *engine = dev_priv->engine[RCS0]; unsigned int map_type = i915_coherent_map_type(dev_priv); struct i915_gem_context *ctx; struct i915_request *rq; @@ -1738,30 +1745,43 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, /* Update all contexts now that we've stalled the submission. */ list_for_each_entry(ctx, &dev_priv->contexts.list, link) { - struct intel_context *ce = intel_context_lookup(ctx, engine); - u32 *regs; - - /* OA settings will be set upon first use */ - if (!ce || !ce->state) - continue; - - regs = i915_gem_object_pin_map(ce->state->obj, map_type); - if (IS_ERR(regs)) - return PTR_ERR(regs); + struct i915_gem_engines_iter it; + struct intel_context *ce; + + for_each_gem_engine(ce, + i915_gem_context_lock_engines(ctx), + it) { + u32 *regs; + + if (ce->engine->class != RENDER_CLASS) + continue; + + /* OA settings will be set upon first use */ + if (!ce->state) + continue; + + regs = i915_gem_object_pin_map(ce->state->obj, + map_type); + if (IS_ERR(regs)) { + i915_gem_context_unlock_engines(ctx); + return PTR_ERR(regs); + } - ce->state->obj->mm.dirty = true; - regs += LRC_STATE_PN * PAGE_SIZE / sizeof(*regs); + ce->state->obj->mm.dirty = true; + regs += LRC_STATE_PN * PAGE_SIZE / sizeof(*regs); - gen8_update_reg_state_unlocked(ce, regs, oa_config); + gen8_update_reg_state_unlocked(ce, regs, oa_config); - i915_gem_object_unpin_map(ce->state->obj); + i915_gem_object_unpin_map(ce->state->obj); + } + i915_gem_context_unlock_engines(ctx); } /* * Apply the configuration by doing one context restore of the edited * context image. */ - rq = i915_request_alloc(engine, dev_priv->kernel_context); + rq = i915_request_create(dev_priv->engine[RCS0]->kernel_context); if (IS_ERR(rq)) return PTR_ERR(rq); diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 46a52da3db297bec061d874858b8fbb440dc4654..1ccda0ee4ff5dd8b74adbef91185739dcc09e40c 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -6,9 +6,12 @@ #include #include -#include "i915_pmu.h" -#include "intel_ringbuffer.h" + +#include "gt/intel_engine.h" + #include "i915_drv.h" +#include "i915_pmu.h" +#include "intel_pm.h" /* Frequency for the sampling timer for events which need it. */ #define FREQUENCY 200 diff --git a/drivers/gpu/drm/i915/i915_query.c b/drivers/gpu/drm/i915/i915_query.c index 782183b78f49c3ae727ae4dbc8ad75392d229583..414d0a6d1f70d63e0745e82d4ae5c692653d68a7 100644 --- a/drivers/gpu/drm/i915/i915_query.c +++ b/drivers/gpu/drm/i915/i915_query.c @@ -96,9 +96,58 @@ static int query_topology_info(struct drm_i915_private *dev_priv, return total_length; } +static int +query_engine_info(struct drm_i915_private *i915, + struct drm_i915_query_item *query_item) +{ + struct drm_i915_query_engine_info __user *query_ptr = + u64_to_user_ptr(query_item->data_ptr); + struct drm_i915_engine_info __user *info_ptr; + struct drm_i915_query_engine_info query; + struct drm_i915_engine_info info = { }; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int len, ret; + + if (query_item->flags) + return -EINVAL; + + len = sizeof(struct drm_i915_query_engine_info) + + RUNTIME_INFO(i915)->num_engines * + sizeof(struct drm_i915_engine_info); + + ret = copy_query_item(&query, sizeof(query), len, query_item); + if (ret != 0) + return ret; + + if (query.num_engines || query.rsvd[0] || query.rsvd[1] || + query.rsvd[2]) + return -EINVAL; + + info_ptr = &query_ptr->engines[0]; + + for_each_engine(engine, i915, id) { + info.engine.engine_class = engine->uabi_class; + info.engine.engine_instance = engine->instance; + info.capabilities = engine->uabi_capabilities; + + if (__copy_to_user(info_ptr, &info, sizeof(info))) + return -EFAULT; + + query.num_engines++; + info_ptr++; + } + + if (__copy_to_user(query_ptr, &query, sizeof(query))) + return -EFAULT; + + return len; +} + static int (* const i915_query_funcs[])(struct drm_i915_private *dev_priv, struct drm_i915_query_item *query_item) = { query_topology_info, + query_engine_info, }; int i915_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index b74824f0b5b1fcbc17262bc17e85238e88b2ceb3..49dce04dd688e6fe87a6096f8ee4fcc25da5f8e1 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -1813,7 +1813,6 @@ enum i915_power_well_id { #define PWR_DOWN_LN_3 (0x8 << 4) #define PWR_DOWN_LN_2_1_0 (0x7 << 4) #define PWR_DOWN_LN_1_0 (0x3 << 4) -#define PWR_DOWN_LN_1 (0x2 << 4) #define PWR_DOWN_LN_3_1 (0xa << 4) #define PWR_DOWN_LN_3_1_0 (0xb << 4) #define PWR_DOWN_LN_MASK (0xf << 4) @@ -2870,6 +2869,7 @@ enum i915_power_well_id { #define GFX_FLSH_CNTL_GEN6 _MMIO(0x101008) #define GFX_FLSH_CNTL_EN (1 << 0) #define ECOSKPD _MMIO(0x21d0) +#define ECO_CONSTANT_BUFFER_SR_DISABLE REG_BIT(4) #define ECO_GATING_CX_ONLY (1 << 3) #define ECO_FLIP_DONE (1 << 0) @@ -5769,6 +5769,7 @@ enum { #define _PIPE_MISC_B 0x71030 #define PIPEMISC_YUV420_ENABLE (1 << 27) #define PIPEMISC_YUV420_MODE_FULL_BLEND (1 << 26) +#define PIPEMISC_HDR_MODE_PRECISION (1 << 23) /* icl+ */ #define PIPEMISC_OUTPUT_COLORSPACE_YUV (1 << 11) #define PIPEMISC_DITHER_BPC_MASK (7 << 5) #define PIPEMISC_DITHER_8_BPC (0 << 5) @@ -7620,6 +7621,9 @@ enum { #define GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION (1 << 8) #define GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE (1 << 0) +#define GEN8_L3CNTLREG _MMIO(0x7034) + #define GEN8_ERRDETBCTRL (1 << 9) + #define GEN11_COMMON_SLICE_CHICKEN3 _MMIO(0x7304) #define GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC (1 << 11) @@ -8862,6 +8866,7 @@ enum { #define GEN11_LSN_UNSLCVC_GAFS_HALF_SF_MAXALLOC (1 << 7) #define GEN10_SAMPLER_MODE _MMIO(0xE18C) +#define GEN11_SAMPLER_ENABLE_HEADLESS_MSG REG_BIT(5) /* IVYBRIDGE DPF */ #define GEN7_L3CDERRST1(slice) _MMIO(0xB008 + (slice) * 0x200) /* L3CD Error Status 1 */ @@ -9009,32 +9014,32 @@ enum { /* HSW Audio */ #define _HSW_AUD_CONFIG_A 0x65000 #define _HSW_AUD_CONFIG_B 0x65100 -#define HSW_AUD_CFG(pipe) _MMIO_PIPE(pipe, _HSW_AUD_CONFIG_A, _HSW_AUD_CONFIG_B) +#define HSW_AUD_CFG(trans) _MMIO_TRANS(trans, _HSW_AUD_CONFIG_A, _HSW_AUD_CONFIG_B) #define _HSW_AUD_MISC_CTRL_A 0x65010 #define _HSW_AUD_MISC_CTRL_B 0x65110 -#define HSW_AUD_MISC_CTRL(pipe) _MMIO_PIPE(pipe, _HSW_AUD_MISC_CTRL_A, _HSW_AUD_MISC_CTRL_B) +#define HSW_AUD_MISC_CTRL(trans) _MMIO_TRANS(trans, _HSW_AUD_MISC_CTRL_A, _HSW_AUD_MISC_CTRL_B) #define _HSW_AUD_M_CTS_ENABLE_A 0x65028 #define _HSW_AUD_M_CTS_ENABLE_B 0x65128 -#define HSW_AUD_M_CTS_ENABLE(pipe) _MMIO_PIPE(pipe, _HSW_AUD_M_CTS_ENABLE_A, _HSW_AUD_M_CTS_ENABLE_B) +#define HSW_AUD_M_CTS_ENABLE(trans) _MMIO_TRANS(trans, _HSW_AUD_M_CTS_ENABLE_A, _HSW_AUD_M_CTS_ENABLE_B) #define AUD_M_CTS_M_VALUE_INDEX (1 << 21) #define AUD_M_CTS_M_PROG_ENABLE (1 << 20) #define AUD_CONFIG_M_MASK 0xfffff #define _HSW_AUD_DIP_ELD_CTRL_ST_A 0x650b4 #define _HSW_AUD_DIP_ELD_CTRL_ST_B 0x651b4 -#define HSW_AUD_DIP_ELD_CTRL(pipe) _MMIO_PIPE(pipe, _HSW_AUD_DIP_ELD_CTRL_ST_A, _HSW_AUD_DIP_ELD_CTRL_ST_B) +#define HSW_AUD_DIP_ELD_CTRL(trans) _MMIO_TRANS(trans, _HSW_AUD_DIP_ELD_CTRL_ST_A, _HSW_AUD_DIP_ELD_CTRL_ST_B) /* Audio Digital Converter */ #define _HSW_AUD_DIG_CNVT_1 0x65080 #define _HSW_AUD_DIG_CNVT_2 0x65180 -#define AUD_DIG_CNVT(pipe) _MMIO_PIPE(pipe, _HSW_AUD_DIG_CNVT_1, _HSW_AUD_DIG_CNVT_2) +#define AUD_DIG_CNVT(trans) _MMIO_TRANS(trans, _HSW_AUD_DIG_CNVT_1, _HSW_AUD_DIG_CNVT_2) #define DIP_PORT_SEL_MASK 0x3 #define _HSW_AUD_EDID_DATA_A 0x65050 #define _HSW_AUD_EDID_DATA_B 0x65150 -#define HSW_AUD_EDID_DATA(pipe) _MMIO_PIPE(pipe, _HSW_AUD_EDID_DATA_A, _HSW_AUD_EDID_DATA_B) +#define HSW_AUD_EDID_DATA(trans) _MMIO_TRANS(trans, _HSW_AUD_EDID_DATA_A, _HSW_AUD_EDID_DATA_B) #define HSW_AUD_PIPE_CONV_CFG _MMIO(0x6507c) #define HSW_AUD_PIN_ELD_CP_VLD _MMIO(0x650c0) @@ -9523,6 +9528,7 @@ enum skl_power_gate { #define TRANS_MSA_12_BPC (3 << 5) #define TRANS_MSA_16_BPC (4 << 5) #define TRANS_MSA_CEA_RANGE (1 << 3) +#define TRANS_MSA_USE_VSC_SDP (1 << 14) /* LCPLL Control */ #define LCPLL_CTL _MMIO(0x130040) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index c88e538b2ef41cba35348fb7623bf5d0bd9761ad..18b34b0bf87255f9b1ec9fcc5c6f2f7b4668606c 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -32,13 +32,14 @@ #include "i915_active.h" #include "i915_drv.h" #include "i915_globals.h" -#include "i915_reset.h" #include "intel_pm.h" struct execute_cb { struct list_head link; struct irq_work work; struct i915_sw_fence *fence; + void (*hook)(struct i915_request *rq, struct dma_fence *signal); + struct i915_request *signal; }; static struct i915_global_request { @@ -132,19 +133,6 @@ i915_request_remove_from_client(struct i915_request *request) spin_unlock(&file_priv->mm.lock); } -static void reserve_gt(struct drm_i915_private *i915) -{ - if (!i915->gt.active_requests++) - i915_gem_unpark(i915); -} - -static void unreserve_gt(struct drm_i915_private *i915) -{ - GEM_BUG_ON(!i915->gt.active_requests); - if (!--i915->gt.active_requests) - i915_gem_park(i915); -} - static void advance_ring(struct i915_request *request) { struct intel_ring *ring = request->ring; @@ -302,11 +290,10 @@ static void i915_request_retire(struct i915_request *request) i915_request_remove_from_client(request); - intel_context_unpin(request->hw_context); - __retire_engine_upto(request->engine, request); - unreserve_gt(request->i915); + intel_context_exit(request->hw_context); + intel_context_unpin(request->hw_context); i915_sched_node_fini(&request->sched); i915_request_put(request); @@ -344,6 +331,17 @@ static void irq_execute_cb(struct irq_work *wrk) kmem_cache_free(global.slab_execute_cbs, cb); } +static void irq_execute_cb_hook(struct irq_work *wrk) +{ + struct execute_cb *cb = container_of(wrk, typeof(*cb), work); + + cb->hook(container_of(cb->fence, struct i915_request, submit), + &cb->signal->fence); + i915_request_put(cb->signal); + + irq_execute_cb(wrk); +} + static void __notify_execute_cb(struct i915_request *rq) { struct execute_cb *cb; @@ -370,14 +368,19 @@ static void __notify_execute_cb(struct i915_request *rq) } static int -i915_request_await_execution(struct i915_request *rq, - struct i915_request *signal, - gfp_t gfp) +__i915_request_await_execution(struct i915_request *rq, + struct i915_request *signal, + void (*hook)(struct i915_request *rq, + struct dma_fence *signal), + gfp_t gfp) { struct execute_cb *cb; - if (i915_request_is_active(signal)) + if (i915_request_is_active(signal)) { + if (hook) + hook(rq, &signal->fence); return 0; + } cb = kmem_cache_alloc(global.slab_execute_cbs, gfp); if (!cb) @@ -387,8 +390,18 @@ i915_request_await_execution(struct i915_request *rq, i915_sw_fence_await(cb->fence); init_irq_work(&cb->work, irq_execute_cb); + if (hook) { + cb->hook = hook; + cb->signal = i915_request_get(signal); + cb->work.func = irq_execute_cb_hook; + } + spin_lock_irq(&signal->lock); if (i915_request_is_active(signal)) { + if (hook) { + hook(rq, &signal->fence); + i915_request_put(signal); + } i915_sw_fence_complete(cb->fence); kmem_cache_free(global.slab_execute_cbs, cb); } else { @@ -466,6 +479,8 @@ void __i915_request_submit(struct i915_request *request) /* Transfer from per-context onto the global per-engine timeline */ move_to_timeline(request, &engine->timeline); + engine->serial++; + trace_i915_request_execute(request); } @@ -513,6 +528,12 @@ void __i915_request_unsubmit(struct i915_request *request) /* Transfer back from the global per-engine timeline to per-context */ move_to_timeline(request, request->timeline); + /* We've already spun, don't charge on resubmitting. */ + if (request->sched.semaphores && i915_request_started(request)) { + request->sched.attr.priority |= I915_PRIORITY_NOSEMAPHORE; + request->sched.semaphores = 0; + } + /* * We don't need to wake_up any waiters on request->execute, they * will get woken by any other event or us re-adding this request @@ -597,7 +618,7 @@ static void ring_retire_requests(struct intel_ring *ring) } static noinline struct i915_request * -i915_request_alloc_slow(struct intel_context *ce) +request_alloc_slow(struct intel_context *ce, gfp_t gfp) { struct intel_ring *ring = ce->ring; struct i915_request *rq; @@ -605,6 +626,9 @@ i915_request_alloc_slow(struct intel_context *ce) if (list_empty(&ring->request_list)) goto out; + if (!gfpflags_allow_blocking(gfp)) + goto out; + /* Ratelimit ourselves to prevent oom from malicious clients */ rq = list_last_entry(&ring->request_list, typeof(*rq), ring_link); cond_synchronize_rcu(rq->rcustate); @@ -613,62 +637,21 @@ i915_request_alloc_slow(struct intel_context *ce) ring_retire_requests(ring); out: - return kmem_cache_alloc(global.slab_requests, GFP_KERNEL); + return kmem_cache_alloc(global.slab_requests, gfp); } -/** - * i915_request_alloc - allocate a request structure - * - * @engine: engine that we wish to issue the request on. - * @ctx: context that the request will be associated with. - * - * Returns a pointer to the allocated request if successful, - * or an error code if not. - */ struct i915_request * -i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) +__i915_request_create(struct intel_context *ce, gfp_t gfp) { - struct drm_i915_private *i915 = engine->i915; - struct intel_context *ce; - struct i915_timeline *tl; + struct i915_timeline *tl = ce->ring->timeline; struct i915_request *rq; u32 seqno; int ret; - lockdep_assert_held(&i915->drm.struct_mutex); + might_sleep_if(gfpflags_allow_blocking(gfp)); - /* - * Preempt contexts are reserved for exclusive use to inject a - * preemption context switch. They are never to be used for any trivial - * request! - */ - GEM_BUG_ON(ctx == i915->preempt_context); - - /* - * ABI: Before userspace accesses the GPU (e.g. execbuffer), report - * EIO if the GPU is already wedged. - */ - ret = i915_terminally_wedged(i915); - if (ret) - return ERR_PTR(ret); - - /* - * Pinning the contexts may generate requests in order to acquire - * GGTT space, so do this first before we reserve a seqno for - * ourselves. - */ - ce = intel_context_pin(ctx, engine); - if (IS_ERR(ce)) - return ERR_CAST(ce); - - reserve_gt(i915); - mutex_lock(&ce->ring->timeline->mutex); - - /* Move our oldest request to the slab-cache (if not in use!) */ - rq = list_first_entry(&ce->ring->request_list, typeof(*rq), ring_link); - if (!list_is_last(&rq->ring_link, &ce->ring->request_list) && - i915_request_completed(rq)) - i915_request_retire(rq); + /* Check that the caller provided an already pinned context */ + __intel_context_pin(ce); /* * Beware: Dragons be flying overhead. @@ -700,30 +683,26 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) * Do not use kmem_cache_zalloc() here! */ rq = kmem_cache_alloc(global.slab_requests, - GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); + gfp | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); if (unlikely(!rq)) { - rq = i915_request_alloc_slow(ce); + rq = request_alloc_slow(ce, gfp); if (!rq) { ret = -ENOMEM; goto err_unreserve; } } - INIT_LIST_HEAD(&rq->active_list); - INIT_LIST_HEAD(&rq->execute_cb); - - tl = ce->ring->timeline; ret = i915_timeline_get_seqno(tl, rq, &seqno); if (ret) goto err_free; - rq->i915 = i915; - rq->engine = engine; - rq->gem_context = ctx; + rq->i915 = ce->engine->i915; rq->hw_context = ce; + rq->gem_context = ce->gem_context; + rq->engine = ce->engine; rq->ring = ce->ring; rq->timeline = tl; - GEM_BUG_ON(rq->timeline == &engine->timeline); + GEM_BUG_ON(rq->timeline == &ce->engine->timeline); rq->hwsp_seqno = tl->hwsp_seqno; rq->hwsp_cacheline = tl->hwsp_cacheline; rq->rcustate = get_state_synchronize_rcu(); /* acts as smp_mb() */ @@ -743,6 +722,10 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) rq->batch = NULL; rq->capture_list = NULL; rq->waitboost = false; + rq->execution_mask = ALL_ENGINES; + + INIT_LIST_HEAD(&rq->active_list); + INIT_LIST_HEAD(&rq->execute_cb); /* * Reserve space in the ring buffer for all the commands required to @@ -756,7 +739,8 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) * around inside i915_request_add() there is sufficient space at * the beginning of the ring as well. */ - rq->reserved_space = 2 * engine->emit_fini_breadcrumb_dw * sizeof(u32); + rq->reserved_space = + 2 * rq->engine->emit_fini_breadcrumb_dw * sizeof(u32); /* * Record the position of the start of the request so that @@ -766,20 +750,16 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) */ rq->head = rq->ring->emit; - ret = engine->request_alloc(rq); + ret = rq->engine->request_alloc(rq); if (ret) goto err_unwind; - /* Keep a second pin for the dual retirement along engine and ring */ - __intel_context_pin(ce); - rq->infix = rq->ring->emit; /* end of header; start of user payload */ - /* Check that we didn't interrupt ourselves with a new request */ - lockdep_assert_held(&rq->timeline->mutex); - GEM_BUG_ON(rq->timeline->seqno != rq->fence.seqno); - rq->cookie = lockdep_pin_lock(&rq->timeline->mutex); + /* Keep a second pin for the dual retirement along engine and ring */ + __intel_context_pin(ce); + intel_context_mark_active(ce); return rq; err_unwind: @@ -793,12 +773,39 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) err_free: kmem_cache_free(global.slab_requests, rq); err_unreserve: - mutex_unlock(&ce->ring->timeline->mutex); - unreserve_gt(i915); intel_context_unpin(ce); return ERR_PTR(ret); } +struct i915_request * +i915_request_create(struct intel_context *ce) +{ + struct i915_request *rq; + + intel_context_timeline_lock(ce); + + /* Move our oldest request to the slab-cache (if not in use!) */ + rq = list_first_entry(&ce->ring->request_list, typeof(*rq), ring_link); + if (!list_is_last(&rq->ring_link, &ce->ring->request_list) && + i915_request_completed(rq)) + i915_request_retire(rq); + + intel_context_enter(ce); + rq = __i915_request_create(ce, GFP_KERNEL); + intel_context_exit(ce); /* active reference transferred to request */ + if (IS_ERR(rq)) + goto err_unlock; + + /* Check that we do not interrupt ourselves with a new request */ + rq->cookie = lockdep_pin_lock(&ce->ring->timeline->mutex); + + return rq; + +err_unlock: + intel_context_timeline_unlock(ce); + return rq; +} + static int i915_request_await_start(struct i915_request *rq, struct i915_request *signal) { @@ -854,13 +861,13 @@ emit_semaphore_wait(struct i915_request *to, if (err < 0) return err; - /* We need to pin the signaler's HWSP until we are finished reading. */ - err = i915_timeline_read_hwsp(from, to, &hwsp_offset); + /* Only submit our spinner after the signaler is running! */ + err = __i915_request_await_execution(to, from, NULL, gfp); if (err) return err; - /* Only submit our spinner after the signaler is running! */ - err = i915_request_await_execution(to, from, gfp); + /* We need to pin the signaler's HWSP until we are finished reading. */ + err = i915_timeline_read_hwsp(from, to, &hwsp_offset); if (err) return err; @@ -991,6 +998,52 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence) return 0; } +int +i915_request_await_execution(struct i915_request *rq, + struct dma_fence *fence, + void (*hook)(struct i915_request *rq, + struct dma_fence *signal)) +{ + struct dma_fence **child = &fence; + unsigned int nchild = 1; + int ret; + + if (dma_fence_is_array(fence)) { + struct dma_fence_array *array = to_dma_fence_array(fence); + + /* XXX Error for signal-on-any fence arrays */ + + child = array->fences; + nchild = array->num_fences; + GEM_BUG_ON(!nchild); + } + + do { + fence = *child++; + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) + continue; + + /* + * We don't squash repeated fence dependencies here as we + * want to run our callback in all cases. + */ + + if (dma_fence_is_i915(fence)) + ret = __i915_request_await_execution(rq, + to_request(fence), + hook, + I915_FENCE_GFP); + else + ret = i915_sw_fence_await_dma_fence(&rq->submit, fence, + I915_FENCE_TIMEOUT, + GFP_KERNEL); + if (ret < 0) + return ret; + } while (--nchild); + + return 0; +} + /** * i915_request_await_object - set this request to (async) wait upon a bo * @to: request we are wishing to use @@ -1100,8 +1153,7 @@ __i915_request_add_to_timeline(struct i915_request *rq) * precludes optimising to use semaphores serialisation of a single * timeline across engines. */ - prev = i915_active_request_raw(&timeline->last_request, - &rq->i915->drm.struct_mutex); + prev = rcu_dereference_protected(timeline->last_request.request, 1); if (prev && !i915_request_completed(prev)) { if (is_power_of_2(prev->engine->mask | rq->engine->mask)) i915_sw_fence_await_sw_fence(&rq->submit, @@ -1122,6 +1174,11 @@ __i915_request_add_to_timeline(struct i915_request *rq) list_add_tail(&rq->link, &timeline->requests); spin_unlock_irq(&timeline->lock); + /* + * Make sure that no request gazumped us - if it was allocated after + * our i915_request_alloc() and called __i915_request_add() before + * us, the timeline will hold its seqno which is later than ours. + */ GEM_BUG_ON(timeline->seqno != rq->fence.seqno); __i915_active_request_set(&timeline->last_request, rq); @@ -1133,36 +1190,23 @@ __i915_request_add_to_timeline(struct i915_request *rq) * request is not being tracked for completion but the work itself is * going to happen on the hardware. This would be a Bad Thing(tm). */ -void i915_request_add(struct i915_request *request) +struct i915_request *__i915_request_commit(struct i915_request *rq) { - struct intel_engine_cs *engine = request->engine; - struct i915_timeline *timeline = request->timeline; - struct intel_ring *ring = request->ring; + struct intel_engine_cs *engine = rq->engine; + struct intel_ring *ring = rq->ring; struct i915_request *prev; u32 *cs; GEM_TRACE("%s fence %llx:%lld\n", - engine->name, request->fence.context, request->fence.seqno); - - lockdep_assert_held(&request->timeline->mutex); - lockdep_unpin_lock(&request->timeline->mutex, request->cookie); - - trace_i915_request_add(request); - - /* - * Make sure that no request gazumped us - if it was allocated after - * our i915_request_alloc() and called __i915_request_add() before - * us, the timeline will hold its seqno which is later than ours. - */ - GEM_BUG_ON(timeline->seqno != request->fence.seqno); + engine->name, rq->fence.context, rq->fence.seqno); /* * To ensure that this call will not fail, space for its emissions * should already have been reserved in the ring buffer. Let the ring * know that it is time to use that space up. */ - GEM_BUG_ON(request->reserved_space > request->ring->space); - request->reserved_space = 0; + GEM_BUG_ON(rq->reserved_space > ring->space); + rq->reserved_space = 0; /* * Record the position of the start of the breadcrumb so that @@ -1170,17 +1214,16 @@ void i915_request_add(struct i915_request *request) * GPU processing the request, we never over-estimate the * position of the ring's HEAD. */ - cs = intel_ring_begin(request, engine->emit_fini_breadcrumb_dw); + cs = intel_ring_begin(rq, engine->emit_fini_breadcrumb_dw); GEM_BUG_ON(IS_ERR(cs)); - request->postfix = intel_ring_offset(request, cs); + rq->postfix = intel_ring_offset(rq, cs); - prev = __i915_request_add_to_timeline(request); + prev = __i915_request_add_to_timeline(rq); - list_add_tail(&request->ring_link, &ring->request_list); - if (list_is_first(&request->ring_link, &ring->request_list)) - list_add(&ring->active_link, &request->i915->gt.active_rings); - request->i915->gt.active_engines |= request->engine->mask; - request->emitted_jiffies = jiffies; + list_add_tail(&rq->ring_link, &ring->request_list); + if (list_is_first(&rq->ring_link, &ring->request_list)) + list_add(&ring->active_link, &rq->i915->gt.active_rings); + rq->emitted_jiffies = jiffies; /* * Let the backend know a new request has arrived that may need @@ -1194,10 +1237,10 @@ void i915_request_add(struct i915_request *request) * run at the earliest possible convenience. */ local_bh_disable(); - i915_sw_fence_commit(&request->semaphore); + i915_sw_fence_commit(&rq->semaphore); rcu_read_lock(); /* RCU serialisation for set-wedged protection */ if (engine->schedule) { - struct i915_sched_attr attr = request->gem_context->sched; + struct i915_sched_attr attr = rq->gem_context->sched; /* * Boost actual workloads past semaphores! @@ -1211,7 +1254,7 @@ void i915_request_add(struct i915_request *request) * far in the distance past over useful work, we keep a history * of any semaphore use along our dependency chain. */ - if (!(request->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN)) + if (!(rq->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN)) attr.priority |= I915_PRIORITY_NOSEMAPHORE; /* @@ -1220,15 +1263,29 @@ void i915_request_add(struct i915_request *request) * Allow interactive/synchronous clients to jump ahead of * the bulk clients. (FQ_CODEL) */ - if (list_empty(&request->sched.signalers_list)) + if (list_empty(&rq->sched.signalers_list)) attr.priority |= I915_PRIORITY_WAIT; - engine->schedule(request, &attr); + engine->schedule(rq, &attr); } rcu_read_unlock(); - i915_sw_fence_commit(&request->submit); + i915_sw_fence_commit(&rq->submit); local_bh_enable(); /* Kick the execlists tasklet if just scheduled */ + return prev; +} + +void i915_request_add(struct i915_request *rq) +{ + struct i915_request *prev; + + lockdep_assert_held(&rq->timeline->mutex); + lockdep_unpin_lock(&rq->timeline->mutex, rq->cookie); + + trace_i915_request_add(rq); + + prev = __i915_request_commit(rq); + /* * In typical scenarios, we do not expect the previous request on * the timeline to be still tracked by timeline->last_request if it @@ -1249,7 +1306,7 @@ void i915_request_add(struct i915_request *request) if (prev && i915_request_completed(prev)) i915_request_retire_upto(prev); - mutex_unlock(&request->timeline->mutex); + mutex_unlock(&rq->timeline->mutex); } static unsigned long local_clock_us(unsigned int *cpu) @@ -1382,8 +1439,31 @@ long i915_request_wait(struct i915_request *rq, trace_i915_request_wait_begin(rq, flags); - /* Optimistic short spin before touching IRQs */ - if (__i915_spin_request(rq, state, 5)) + /* + * Optimistic spin before touching IRQs. + * + * We may use a rather large value here to offset the penalty of + * switching away from the active task. Frequently, the client will + * wait upon an old swapbuffer to throttle itself to remain within a + * frame of the gpu. If the client is running in lockstep with the gpu, + * then it should not be waiting long at all, and a sleep now will incur + * extra scheduler latency in producing the next frame. To try to + * avoid adding the cost of enabling/disabling the interrupt to the + * short wait, we first spin to see if the request would have completed + * in the time taken to setup the interrupt. + * + * We need upto 5us to enable the irq, and upto 20us to hide the + * scheduler latency of a context switch, ignoring the secondary + * impacts from a context switch such as cache eviction. + * + * The scheme used for low-latency IO is called "hybrid interrupt + * polling". The suggestion there is to sleep until just before you + * expect to be woken by the device interrupt and then poll for its + * completion. That requires having a good predictor for the request + * duration, which we currently lack. + */ + if (CONFIG_DRM_I915_SPIN_REQUEST && + __i915_spin_request(rq, state, CONFIG_DRM_I915_SPIN_REQUEST)) goto out; /* @@ -1401,9 +1481,7 @@ long i915_request_wait(struct i915_request *rq, if (flags & I915_WAIT_PRIORITY) { if (!i915_request_started(rq) && INTEL_GEN(rq->i915) >= 6) gen6_rps_boost(rq); - local_bh_disable(); /* suspend tasklets for reprioritisation */ i915_schedule_bump_priority(rq, I915_PRIORITY_WAIT); - local_bh_enable(); /* kick tasklets en masse */ } wait.tsk = current; @@ -1437,21 +1515,20 @@ long i915_request_wait(struct i915_request *rq, return timeout; } -void i915_retire_requests(struct drm_i915_private *i915) +bool i915_retire_requests(struct drm_i915_private *i915) { struct intel_ring *ring, *tmp; lockdep_assert_held(&i915->drm.struct_mutex); - if (!i915->gt.active_requests) - return; - list_for_each_entry_safe(ring, tmp, &i915->gt.active_rings, active_link) { intel_ring_get(ring); /* last rq holds reference! */ ring_retire_requests(ring); intel_ring_put(ring); } + + return !list_empty(&i915->gt.active_rings); } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index a982664618c2c977d89875dcc1dc4dd575e31106..c9f7d07991c831fa1797bb68aa191ac67045e1d5 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -28,6 +28,8 @@ #include #include +#include "gt/intel_engine_types.h" + #include "i915_gem.h" #include "i915_scheduler.h" #include "i915_selftest.h" @@ -156,6 +158,7 @@ struct i915_request { */ struct i915_sched_node sched; struct i915_dependency dep; + intel_engine_mask_t execution_mask; /* * A convenience pointer to the current breadcrumb value stored in @@ -240,8 +243,12 @@ static inline bool dma_fence_is_i915(const struct dma_fence *fence) } struct i915_request * __must_check -i915_request_alloc(struct intel_engine_cs *engine, - struct i915_gem_context *ctx); +__i915_request_create(struct intel_context *ce, gfp_t gfp); +struct i915_request * __must_check +i915_request_create(struct intel_context *ce); + +struct i915_request *__i915_request_commit(struct i915_request *request); + void i915_request_retire_upto(struct i915_request *rq); static inline struct i915_request * @@ -276,6 +283,10 @@ int i915_request_await_object(struct i915_request *to, bool write); int i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence); +int i915_request_await_execution(struct i915_request *rq, + struct dma_fence *fence, + void (*hook)(struct i915_request *rq, + struct dma_fence *signal)); void i915_request_add(struct i915_request *rq); @@ -418,6 +429,6 @@ static inline void i915_request_mark_complete(struct i915_request *rq) rq->hwsp_seqno = (u32 *)&rq->fence.seqno; /* decouple from HWSP */ } -void i915_retire_requests(struct drm_i915_private *i915); +bool i915_retire_requests(struct drm_i915_private *i915); #endif /* I915_REQUEST_H */ diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index 108f52e1bf359dd72248b616dae39c475a5aba52..78ceb56d78017fac77b6484608ff21a7e9bc46bf 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -150,29 +150,49 @@ sched_lock_engine(const struct i915_sched_node *node, struct intel_engine_cs *locked, struct sched_cache *cache) { - struct intel_engine_cs *engine = node_to_request(node)->engine; + const struct i915_request *rq = node_to_request(node); + struct intel_engine_cs *engine; GEM_BUG_ON(!locked); - if (engine != locked) { + /* + * Virtual engines complicate acquiring the engine timeline lock, + * as their rq->engine pointer is not stable until under that + * engine lock. The simple ploy we use is to take the lock then + * check that the rq still belongs to the newly locked engine. + */ + while (locked != (engine = READ_ONCE(rq->engine))) { spin_unlock(&locked->timeline.lock); memset(cache, 0, sizeof(*cache)); spin_lock(&engine->timeline.lock); + locked = engine; } - return engine; + GEM_BUG_ON(locked != engine); + return locked; } -static bool inflight(const struct i915_request *rq, - const struct intel_engine_cs *engine) +static inline int rq_prio(const struct i915_request *rq) { - const struct i915_request *active; + return rq->sched.attr.priority | __NO_PREEMPTION; +} - if (!i915_request_is_active(rq)) - return false; +static void kick_submission(struct intel_engine_cs *engine, int prio) +{ + const struct i915_request *inflight = + port_request(engine->execlists.port); + + /* + * If we are already the currently executing context, don't + * bother evaluating if we should preempt ourselves, or if + * we expect nothing to change as a result of running the + * tasklet, i.e. we have not change the priority queue + * sufficiently to oust the running context. + */ + if (inflight && !i915_scheduler_need_preempt(prio, rq_prio(inflight))) + return; - active = port_request(engine->execlists.port); - return active->hw_context == rq->hw_context; + tasklet_hi_schedule(&engine->execlists.tasklet); } static void __i915_schedule(struct i915_sched_node *node, @@ -189,10 +209,10 @@ static void __i915_schedule(struct i915_sched_node *node, lockdep_assert_held(&schedule_lock); GEM_BUG_ON(prio == I915_PRIORITY_INVALID); - if (node_signaled(node)) + if (prio <= READ_ONCE(node->attr.priority)) return; - if (prio <= READ_ONCE(node->attr.priority)) + if (node_signaled(node)) return; stack.signaler = node; @@ -261,6 +281,7 @@ static void __i915_schedule(struct i915_sched_node *node, spin_lock(&engine->timeline.lock); /* Fifo and depth-first replacement ensure our deps execute before us */ + engine = sched_lock_engine(node, engine, &cache); list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) { INIT_LIST_HEAD(&dep->dfs_link); @@ -272,8 +293,11 @@ static void __i915_schedule(struct i915_sched_node *node, if (prio <= node->attr.priority || node_signaled(node)) continue; + GEM_BUG_ON(node_to_request(node)->engine != engine); + node->attr.priority = prio; if (!list_empty(&node->link)) { + GEM_BUG_ON(intel_engine_is_virtual(engine)); if (!cache.priolist) cache.priolist = i915_sched_lookup_priolist(engine, @@ -297,15 +321,8 @@ static void __i915_schedule(struct i915_sched_node *node, engine->execlists.queue_priority_hint = prio; - /* - * If we are already the currently executing context, don't - * bother evaluating if we should preempt ourselves. - */ - if (inflight(node_to_request(node), engine)) - continue; - /* Defer (tasklet) submission until after all of our updates. */ - tasklet_hi_schedule(&engine->execlists.tasklet); + kick_submission(engine, prio); } spin_unlock(&engine->timeline.lock); diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h index 07d243acf553b2bc9f018be31d8072e5929c9a2b..7eefccff39bf64551d717c35c86af51053bd9c50 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.h +++ b/drivers/gpu/drm/i915/i915_scheduler.h @@ -52,4 +52,22 @@ static inline void i915_priolist_free(struct i915_priolist *p) __i915_priolist_free(p); } +static inline bool i915_scheduler_need_preempt(int prio, int active) +{ + /* + * Allow preemption of low -> normal -> high, but we do + * not allow low priority tasks to preempt other low priority + * tasks under the impression that latency for low priority + * tasks does not matter (as much as background throughput), + * so kiss. + * + * More naturally we would write + * prio >= max(0, last); + * except that we wish to prevent triggering preemption at the same + * priority level: the task that is running should remain running + * to preserve FIFO ordering of dependencies. + */ + return prio > max(I915_PRIORITY_NORMAL - 1, active); +} + #endif /* _I915_SCHEDULER_H_ */ diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h index 4f2b2eb7c3e513d9320845bdad6af31da65355b6..3e309631bd0bc2fe910b0beecfecfb0ae421753f 100644 --- a/drivers/gpu/drm/i915/i915_scheduler_types.h +++ b/drivers/gpu/drm/i915/i915_scheduler_types.h @@ -9,8 +9,8 @@ #include +#include "gt/intel_engine_types.h" #include "i915_priolist_types.h" -#include "intel_engine_types.h" struct drm_i915_private; struct i915_request; diff --git a/drivers/gpu/drm/i915/i915_suspend.c b/drivers/gpu/drm/i915/i915_suspend.c index 95f3dab1b2292fca7fad52a8b670e43dfb1e0845..581201bcb81ad0cb0375d826f6b969f063998c6f 100644 --- a/drivers/gpu/drm/i915/i915_suspend.c +++ b/drivers/gpu/drm/i915/i915_suspend.c @@ -29,6 +29,7 @@ #include "i915_reg.h" #include "intel_drv.h" #include "intel_fbc.h" +#include "intel_gmbus.h" static void i915_save_display(struct drm_i915_private *dev_priv) { @@ -144,7 +145,7 @@ int i915_restore_state(struct drm_i915_private *dev_priv) mutex_unlock(&dev_priv->drm.struct_mutex); - intel_i2c_reset(dev_priv); + intel_gmbus_reset(dev_priv); return 0; } diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index 41313005af42517873fa29d94016c17cf05cd317..3ef07b987d407d4cbd5b8296b00da2babd236a65 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -29,8 +29,11 @@ #include #include #include -#include "intel_drv.h" + #include "i915_drv.h" +#include "intel_drv.h" +#include "intel_pm.h" +#include "intel_sideband.h" static inline struct drm_i915_private *kdev_minor_to_i915(struct device *kdev) { @@ -259,25 +262,23 @@ static ssize_t gt_act_freq_mhz_show(struct device *kdev, { struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); intel_wakeref_t wakeref; - int ret; + u32 freq; wakeref = intel_runtime_pm_get(dev_priv); - mutex_lock(&dev_priv->pcu_lock); if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { - u32 freq; + vlv_punit_get(dev_priv); freq = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); - ret = intel_gpu_freq(dev_priv, (freq >> 8) & 0xff); + vlv_punit_put(dev_priv); + + freq = (freq >> 8) & 0xff; } else { - ret = intel_gpu_freq(dev_priv, - intel_get_cagf(dev_priv, - I915_READ(GEN6_RPSTAT1))); + freq = intel_get_cagf(dev_priv, I915_READ(GEN6_RPSTAT1)); } - mutex_unlock(&dev_priv->pcu_lock); intel_runtime_pm_put(dev_priv, wakeref); - return snprintf(buf, PAGE_SIZE, "%d\n", ret); + return snprintf(buf, PAGE_SIZE, "%d\n", intel_gpu_freq(dev_priv, freq)); } static ssize_t gt_cur_freq_mhz_show(struct device *kdev, @@ -318,12 +319,12 @@ static ssize_t gt_boost_freq_mhz_store(struct device *kdev, if (val < rps->min_freq || val > rps->max_freq) return -EINVAL; - mutex_lock(&dev_priv->pcu_lock); + mutex_lock(&rps->lock); if (val != rps->boost_freq) { rps->boost_freq = val; boost = atomic_read(&rps->num_waiters); } - mutex_unlock(&dev_priv->pcu_lock); + mutex_unlock(&rps->lock); if (boost) schedule_work(&rps->work); @@ -364,17 +365,14 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev, return ret; wakeref = intel_runtime_pm_get(dev_priv); - - mutex_lock(&dev_priv->pcu_lock); + mutex_lock(&rps->lock); val = intel_freq_opcode(dev_priv, val); - if (val < rps->min_freq || val > rps->max_freq || val < rps->min_freq_softlimit) { - mutex_unlock(&dev_priv->pcu_lock); - intel_runtime_pm_put(dev_priv, wakeref); - return -EINVAL; + ret = -EINVAL; + goto unlock; } if (val > rps->rp0_freq) @@ -392,8 +390,8 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev, * frequency request may be unchanged. */ ret = intel_set_rps(dev_priv, val); - mutex_unlock(&dev_priv->pcu_lock); - +unlock: + mutex_unlock(&rps->lock); intel_runtime_pm_put(dev_priv, wakeref); return ret ?: count; @@ -423,17 +421,14 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev, return ret; wakeref = intel_runtime_pm_get(dev_priv); - - mutex_lock(&dev_priv->pcu_lock); + mutex_lock(&rps->lock); val = intel_freq_opcode(dev_priv, val); - if (val < rps->min_freq || val > rps->max_freq || val > rps->max_freq_softlimit) { - mutex_unlock(&dev_priv->pcu_lock); - intel_runtime_pm_put(dev_priv, wakeref); - return -EINVAL; + ret = -EINVAL; + goto unlock; } rps->min_freq_softlimit = val; @@ -447,8 +442,8 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev, * frequency request may be unchanged. */ ret = intel_set_rps(dev_priv, val); - mutex_unlock(&dev_priv->pcu_lock); - +unlock: + mutex_unlock(&rps->lock); intel_runtime_pm_put(dev_priv, wakeref); return ret ?: count; diff --git a/drivers/gpu/drm/i915/i915_timeline_types.h b/drivers/gpu/drm/i915/i915_timeline_types.h index 5256a0b5c5f77b005199f9d5a2b3e09100ab1070..1688705f4a2b7633db520f40f71c82cd10347ba6 100644 --- a/drivers/gpu/drm/i915/i915_timeline_types.h +++ b/drivers/gpu/drm/i915/i915_timeline_types.h @@ -26,6 +26,7 @@ struct i915_timeline { spinlock_t lock; #define TIMELINE_CLIENT 0 /* default subclass */ #define TIMELINE_ENGINE 1 +#define TIMELINE_VIRTUAL 2 struct mutex mutex; /* protects the flow of requests */ unsigned int pin_count; diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 12893304c8f895e96c44e0349dc2149a63c53721..83b389e34b503eaf35cc07c823e13dde81b5a148 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -8,9 +8,11 @@ #include +#include "gt/intel_engine.h" + #include "i915_drv.h" +#include "i915_irq.h" #include "intel_drv.h" -#include "intel_ringbuffer.h" #undef TRACE_SYSTEM #define TRACE_SYSTEM i915 diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h index 2dbe8933b50a1796b43446d81b4361f846316d69..e52866084891bfe2acf6280bdb94457611d83223 100644 --- a/drivers/gpu/drm/i915/i915_utils.h +++ b/drivers/gpu/drm/i915/i915_utils.h @@ -25,6 +25,12 @@ #ifndef __I915_UTILS_H #define __I915_UTILS_H +#include +#include +#include +#include +#include + #undef WARN_ON /* Many gcc seem to no see through this and fall over :( */ #if 0 @@ -73,6 +79,39 @@ #define overflows_type(x, T) \ (sizeof(x) > sizeof(T) && (x) >> BITS_PER_TYPE(T)) +static inline bool +__check_struct_size(size_t base, size_t arr, size_t count, size_t *size) +{ + size_t sz; + + if (check_mul_overflow(count, arr, &sz)) + return false; + + if (check_add_overflow(sz, base, &sz)) + return false; + + *size = sz; + return true; +} + +/** + * check_struct_size() - Calculate size of structure with trailing array. + * @p: Pointer to the structure. + * @member: Name of the array member. + * @n: Number of elements in the array. + * @sz: Total size of structure and array + * + * Calculates size of memory needed for structure @p followed by an + * array of @n @member elements, like struct_size() but reports + * whether it overflowed, and the resultant size in @sz + * + * Return: false if the calculation overflowed. + */ +#define check_struct_size(p, member, n, sz) \ + likely(__check_struct_size(sizeof(*(p)), \ + sizeof(*(p)->member) + __must_be_array((p)->member), \ + n, sz)) + #define ptr_mask_bits(ptr, n) ({ \ unsigned long __v = (unsigned long)(ptr); \ (typeof(ptr))(__v & -BIT(n)); \ @@ -97,6 +136,8 @@ #define page_pack_bits(ptr, bits) ptr_pack_bits(ptr, bits, PAGE_SHIFT) #define page_unpack_bits(ptr, bits) ptr_unpack_bits(ptr, bits, PAGE_SHIFT) +#define struct_member(T, member) (((T *)0)->member) + #define ptr_offset(ptr, member) offsetof(typeof(*(ptr)), member) #define fetch_and_zero(ptr) ({ \ @@ -113,7 +154,7 @@ */ #define container_of_user(ptr, type, member) ({ \ void __user *__mptr = (void __user *)(ptr); \ - BUILD_BUG_ON_MSG(!__same_type(*(ptr), ((type *)0)->member) && \ + BUILD_BUG_ON_MSG(!__same_type(*(ptr), struct_member(type, member)) && \ !__same_type(*(ptr), void), \ "pointer type mismatch in container_of()"); \ ((type __user *)(__mptr - offsetof(type, member))); }) @@ -152,8 +193,6 @@ static inline u64 ptr_to_u64(const void *ptr) __idx; \ }) -#include - static inline void __list_del_many(struct list_head *head, struct list_head *first) { @@ -174,6 +213,158 @@ static inline void drain_delayed_work(struct delayed_work *dw) } while (delayed_work_pending(dw)); } +static inline unsigned long msecs_to_jiffies_timeout(const unsigned int m) +{ + unsigned long j = msecs_to_jiffies(m); + + return min_t(unsigned long, MAX_JIFFY_OFFSET, j + 1); +} + +static inline unsigned long nsecs_to_jiffies_timeout(const u64 n) +{ + /* nsecs_to_jiffies64() does not guard against overflow */ + if (NSEC_PER_SEC % HZ && + div_u64(n, NSEC_PER_SEC) >= MAX_JIFFY_OFFSET / HZ) + return MAX_JIFFY_OFFSET; + + return min_t(u64, MAX_JIFFY_OFFSET, nsecs_to_jiffies64(n) + 1); +} + +/* + * If you need to wait X milliseconds between events A and B, but event B + * doesn't happen exactly after event A, you record the timestamp (jiffies) of + * when event A happened, then just before event B you call this function and + * pass the timestamp as the first argument, and X as the second argument. + */ +static inline void +wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms) +{ + unsigned long target_jiffies, tmp_jiffies, remaining_jiffies; + + /* + * Don't re-read the value of "jiffies" every time since it may change + * behind our back and break the math. + */ + tmp_jiffies = jiffies; + target_jiffies = timestamp_jiffies + + msecs_to_jiffies_timeout(to_wait_ms); + + if (time_after(target_jiffies, tmp_jiffies)) { + remaining_jiffies = target_jiffies - tmp_jiffies; + while (remaining_jiffies) + remaining_jiffies = + schedule_timeout_uninterruptible(remaining_jiffies); + } +} + +/** + * __wait_for - magic wait macro + * + * Macro to help avoid open coding check/wait/timeout patterns. Note that it's + * important that we check the condition again after having timed out, since the + * timeout could be due to preemption or similar and we've never had a chance to + * check the condition before the timeout. + */ +#define __wait_for(OP, COND, US, Wmin, Wmax) ({ \ + const ktime_t end__ = ktime_add_ns(ktime_get_raw(), 1000ll * (US)); \ + long wait__ = (Wmin); /* recommended min for usleep is 10 us */ \ + int ret__; \ + might_sleep(); \ + for (;;) { \ + const bool expired__ = ktime_after(ktime_get_raw(), end__); \ + OP; \ + /* Guarantee COND check prior to timeout */ \ + barrier(); \ + if (COND) { \ + ret__ = 0; \ + break; \ + } \ + if (expired__) { \ + ret__ = -ETIMEDOUT; \ + break; \ + } \ + usleep_range(wait__, wait__ * 2); \ + if (wait__ < (Wmax)) \ + wait__ <<= 1; \ + } \ + ret__; \ +}) + +#define _wait_for(COND, US, Wmin, Wmax) __wait_for(, (COND), (US), (Wmin), \ + (Wmax)) +#define wait_for(COND, MS) _wait_for((COND), (MS) * 1000, 10, 1000) + +/* If CONFIG_PREEMPT_COUNT is disabled, in_atomic() always reports false. */ +#if defined(CONFIG_DRM_I915_DEBUG) && defined(CONFIG_PREEMPT_COUNT) +# define _WAIT_FOR_ATOMIC_CHECK(ATOMIC) WARN_ON_ONCE((ATOMIC) && !in_atomic()) +#else +# define _WAIT_FOR_ATOMIC_CHECK(ATOMIC) do { } while (0) +#endif + +#define _wait_for_atomic(COND, US, ATOMIC) \ +({ \ + int cpu, ret, timeout = (US) * 1000; \ + u64 base; \ + _WAIT_FOR_ATOMIC_CHECK(ATOMIC); \ + if (!(ATOMIC)) { \ + preempt_disable(); \ + cpu = smp_processor_id(); \ + } \ + base = local_clock(); \ + for (;;) { \ + u64 now = local_clock(); \ + if (!(ATOMIC)) \ + preempt_enable(); \ + /* Guarantee COND check prior to timeout */ \ + barrier(); \ + if (COND) { \ + ret = 0; \ + break; \ + } \ + if (now - base >= timeout) { \ + ret = -ETIMEDOUT; \ + break; \ + } \ + cpu_relax(); \ + if (!(ATOMIC)) { \ + preempt_disable(); \ + if (unlikely(cpu != smp_processor_id())) { \ + timeout -= now - base; \ + cpu = smp_processor_id(); \ + base = local_clock(); \ + } \ + } \ + } \ + ret; \ +}) + +#define wait_for_us(COND, US) \ +({ \ + int ret__; \ + BUILD_BUG_ON(!__builtin_constant_p(US)); \ + if ((US) > 10) \ + ret__ = _wait_for((COND), (US), 10, 10); \ + else \ + ret__ = _wait_for_atomic((COND), (US), 0); \ + ret__; \ +}) + +#define wait_for_atomic_us(COND, US) \ +({ \ + BUILD_BUG_ON(!__builtin_constant_p(US)); \ + BUILD_BUG_ON((US) > 50000); \ + _wait_for_atomic((COND), (US), 1); \ +}) + +#define wait_for_atomic(COND, MS) wait_for_atomic_us((COND), (MS) * 1000) + +#define KHz(x) (1000 * (x)) +#define MHz(x) KHz(1000 * (x)) + +#define KBps(x) (1000 * (x)) +#define MBps(x) KBps(1000 * (x)) +#define GBps(x) ((u64)1000 * MBps((x))) + static inline const char *yesno(bool v) { return v ? "yes" : "no"; diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 961268f66c63c0333ba2643aa12fed67894759a3..cf405ffda045202c2a60406777ac7a1838189ba3 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -22,11 +22,12 @@ * */ +#include "gt/intel_engine.h" + #include "i915_vma.h" #include "i915_drv.h" #include "i915_globals.h" -#include "intel_ringbuffer.h" #include "intel_frontbuffer.h" #include @@ -155,6 +156,9 @@ vma_create(struct drm_i915_gem_object *obj, } else if (view->type == I915_GGTT_VIEW_ROTATED) { vma->size = intel_rotation_info_size(&view->rotated); vma->size <<= PAGE_SHIFT; + } else if (view->type == I915_GGTT_VIEW_REMAPPED) { + vma->size = intel_remapped_info_size(&view->remapped); + vma->size <<= PAGE_SHIFT; } } @@ -476,13 +480,6 @@ void __i915_vma_set_map_and_fenceable(struct i915_vma *vma) GEM_BUG_ON(!i915_vma_is_ggtt(vma)); GEM_BUG_ON(!vma->fence_size); - /* - * Explicitly disable for rotated VMA since the display does not - * need the fence and the VMA is not accessible to other users. - */ - if (vma->ggtt_view.type == I915_GGTT_VIEW_ROTATED) - return; - fenceable = (vma->node.size >= vma->fence_size && IS_ALIGNED(vma->node.start, vma->fence_alignment)); diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index 6eab70953a57f7c254da4f386da1a15de055a81f..8543d2953cd1962b3daab91dc15e112d6c93ed36 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -277,8 +277,11 @@ i915_vma_compare(struct i915_vma *vma, */ BUILD_BUG_ON(I915_GGTT_VIEW_NORMAL >= I915_GGTT_VIEW_PARTIAL); BUILD_BUG_ON(I915_GGTT_VIEW_PARTIAL >= I915_GGTT_VIEW_ROTATED); + BUILD_BUG_ON(I915_GGTT_VIEW_ROTATED >= I915_GGTT_VIEW_REMAPPED); BUILD_BUG_ON(offsetof(typeof(*view), rotated) != offsetof(typeof(*view), partial)); + BUILD_BUG_ON(offsetof(typeof(*view), rotated) != + offsetof(typeof(*view), remapped)); return memcmp(&vma->ggtt_view.partial, &view->partial, view->type); } diff --git a/drivers/gpu/drm/i915/icl_dsi.c b/drivers/gpu/drm/i915/icl_dsi.c index 9d962ea1e635e1ff5d8da5a3fe24450075201acf..1e240ad665b57574a0181093019af0a1211e2b17 100644 --- a/drivers/gpu/drm/i915/icl_dsi.c +++ b/drivers/gpu/drm/i915/icl_dsi.c @@ -28,6 +28,8 @@ #include #include +#include "intel_atomic.h" +#include "intel_combo_phy.h" #include "intel_connector.h" #include "intel_ddi.h" #include "intel_dsi.h" @@ -363,30 +365,10 @@ static void gen11_dsi_power_up_lanes(struct intel_encoder *encoder) struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); enum port port; - u32 tmp; - u32 lane_mask; - - switch (intel_dsi->lane_count) { - case 1: - lane_mask = PWR_DOWN_LN_3_1_0; - break; - case 2: - lane_mask = PWR_DOWN_LN_3_1; - break; - case 3: - lane_mask = PWR_DOWN_LN_3; - break; - case 4: - default: - lane_mask = PWR_UP_ALL_LANES; - break; - } - for_each_dsi_port(port, intel_dsi->ports) { - tmp = I915_READ(ICL_PORT_CL_DW10(port)); - tmp &= ~PWR_DOWN_LN_MASK; - I915_WRITE(ICL_PORT_CL_DW10(port), tmp | lane_mask); - } + for_each_dsi_port(port, intel_dsi->ports) + intel_combo_phy_power_up_lanes(dev_priv, port, true, + intel_dsi->lane_count, false); } static void gen11_dsi_config_phy_lanes_sequence(struct intel_encoder *encoder) @@ -1193,17 +1175,51 @@ static void gen11_dsi_disable(struct intel_encoder *encoder, gen11_dsi_disable_io_power(encoder); } +static void gen11_dsi_get_timings(struct intel_encoder *encoder, + struct intel_crtc_state *pipe_config) +{ + struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + struct drm_display_mode *adjusted_mode = + &pipe_config->base.adjusted_mode; + + if (intel_dsi->dual_link) { + adjusted_mode->crtc_hdisplay *= 2; + if (intel_dsi->dual_link == DSI_DUAL_LINK_FRONT_BACK) + adjusted_mode->crtc_hdisplay -= + intel_dsi->pixel_overlap; + adjusted_mode->crtc_htotal *= 2; + } + adjusted_mode->crtc_hblank_start = adjusted_mode->crtc_hdisplay; + adjusted_mode->crtc_hblank_end = adjusted_mode->crtc_htotal; + + if (intel_dsi->operation_mode == INTEL_DSI_VIDEO_MODE) { + if (intel_dsi->dual_link) { + adjusted_mode->crtc_hsync_start *= 2; + adjusted_mode->crtc_hsync_end *= 2; + } + } + adjusted_mode->crtc_vblank_start = adjusted_mode->crtc_vdisplay; + adjusted_mode->crtc_vblank_end = adjusted_mode->crtc_vtotal; +} + static void gen11_dsi_get_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_crtc *crtc = to_intel_crtc(pipe_config->base.crtc); struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); /* FIXME: adapt icl_ddi_clock_get() for DSI and use that? */ pipe_config->port_clock = cnl_calc_wrpll_link(dev_priv, &pipe_config->dpll_hw_state); + pipe_config->base.adjusted_mode.crtc_clock = intel_dsi->pclk; + if (intel_dsi->dual_link) + pipe_config->base.adjusted_mode.crtc_clock *= 2; + + gen11_dsi_get_timings(encoder, pipe_config); pipe_config->output_types |= BIT(INTEL_OUTPUT_DSI); + pipe_config->pipe_bpp = bdw_get_pipemisc_bpp(crtc); } static int gen11_dsi_compute_config(struct intel_encoder *encoder, @@ -1219,6 +1235,7 @@ static int gen11_dsi_compute_config(struct intel_encoder *encoder, struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode; + pipe_config->output_format = INTEL_OUTPUT_FORMAT_RGB; intel_fixed_panel_mode(fixed_mode, adjusted_mode); intel_pch_panel_fitting(crtc, pipe_config, conn_state->scaling_mode); diff --git a/drivers/gpu/drm/i915/intel_acpi.c b/drivers/gpu/drm/i915/intel_acpi.c index 9d142d038a7d3631ac927280e549528e86756db5..3456d33feb46aa52dd5e3095034886766ea5192c 100644 --- a/drivers/gpu/drm/i915/intel_acpi.c +++ b/drivers/gpu/drm/i915/intel_acpi.c @@ -4,9 +4,12 @@ * * _DSM related code stolen from nouveau_acpi.c. */ + #include #include + #include "i915_drv.h" +#include "intel_acpi.h" #define INTEL_DSM_REVISION_ID 1 /* For Calpella anyway... */ #define INTEL_DSM_FN_PLATFORM_MUX_INFO 1 /* No args */ diff --git a/drivers/gpu/drm/i915/intel_acpi.h b/drivers/gpu/drm/i915/intel_acpi.h new file mode 100644 index 0000000000000000000000000000000000000000..1c576b3fb712fcf4cddc187516e31b9d4f632abd --- /dev/null +++ b/drivers/gpu/drm/i915/intel_acpi.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef __INTEL_ACPI_H__ +#define __INTEL_ACPI_H__ + +#ifdef CONFIG_ACPI +void intel_register_dsm_handler(void); +void intel_unregister_dsm_handler(void); +#else +static inline void intel_register_dsm_handler(void) { return; } +static inline void intel_unregister_dsm_handler(void) { return; } +#endif /* CONFIG_ACPI */ + +#endif /* __INTEL_ACPI_H__ */ diff --git a/drivers/gpu/drm/i915/intel_atomic.c b/drivers/gpu/drm/i915/intel_atomic.c index 8c8fae32ec50985e86c619dc1c5aa296f8f5e49e..58b8049649a0f929b9e5d90637d3b243bcd74a10 100644 --- a/drivers/gpu/drm/i915/intel_atomic.c +++ b/drivers/gpu/drm/i915/intel_atomic.c @@ -34,6 +34,7 @@ #include #include +#include "intel_atomic.h" #include "intel_drv.h" #include "intel_hdcp.h" #include "intel_sprite.h" @@ -411,3 +412,15 @@ void intel_atomic_state_clear(struct drm_atomic_state *s) drm_atomic_state_default_clear(&state->base); state->dpll_set = state->modeset = false; } + +struct intel_crtc_state * +intel_atomic_get_crtc_state(struct drm_atomic_state *state, + struct intel_crtc *crtc) +{ + struct drm_crtc_state *crtc_state; + crtc_state = drm_atomic_get_crtc_state(state, &crtc->base); + if (IS_ERR(crtc_state)) + return ERR_CAST(crtc_state); + + return to_intel_crtc_state(crtc_state); +} diff --git a/drivers/gpu/drm/i915/intel_atomic.h b/drivers/gpu/drm/i915/intel_atomic.h new file mode 100644 index 0000000000000000000000000000000000000000..1c8507da1a690ba7255622a88a4cdf7c9e53d621 --- /dev/null +++ b/drivers/gpu/drm/i915/intel_atomic.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef __INTEL_ATOMIC_H__ +#define __INTEL_ATOMIC_H__ + +#include + +struct drm_atomic_state; +struct drm_connector; +struct drm_connector_state; +struct drm_crtc; +struct drm_crtc_state; +struct drm_device; +struct drm_i915_private; +struct drm_property; +struct intel_crtc; +struct intel_crtc_state; + +int intel_digital_connector_atomic_get_property(struct drm_connector *connector, + const struct drm_connector_state *state, + struct drm_property *property, + u64 *val); +int intel_digital_connector_atomic_set_property(struct drm_connector *connector, + struct drm_connector_state *state, + struct drm_property *property, + u64 val); +int intel_digital_connector_atomic_check(struct drm_connector *conn, + struct drm_connector_state *new_state); +struct drm_connector_state * +intel_digital_connector_duplicate_state(struct drm_connector *connector); + +struct drm_crtc_state *intel_crtc_duplicate_state(struct drm_crtc *crtc); +void intel_crtc_destroy_state(struct drm_crtc *crtc, + struct drm_crtc_state *state); +struct drm_atomic_state *intel_atomic_state_alloc(struct drm_device *dev); +void intel_atomic_state_clear(struct drm_atomic_state *state); + +struct intel_crtc_state * +intel_atomic_get_crtc_state(struct drm_atomic_state *state, + struct intel_crtc *crtc); + +int intel_atomic_setup_scalers(struct drm_i915_private *dev_priv, + struct intel_crtc *intel_crtc, + struct intel_crtc_state *crtc_state); + +#endif /* __INTEL_ATOMIC_H__ */ diff --git a/drivers/gpu/drm/i915/intel_audio.c b/drivers/gpu/drm/i915/intel_audio.c index bca4cc025d3d76de4d95dca4f823b4ef56eecbae..840daff122464e3def177a81c99e62e043156e96 100644 --- a/drivers/gpu/drm/i915/intel_audio.c +++ b/drivers/gpu/drm/i915/intel_audio.c @@ -26,11 +26,11 @@ #include #include -#include #include "i915_drv.h" #include "intel_audio.h" #include "intel_drv.h" +#include "intel_lpe_audio.h" /** * DOC: High Definition Audio over HDMI and Display Port @@ -319,9 +319,8 @@ hsw_dp_audio_config_update(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct i915_audio_component *acomp = dev_priv->audio_component; - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; enum port port = encoder->port; - enum pipe pipe = crtc->pipe; const struct dp_aud_n_m *nm; int rate; u32 tmp; @@ -333,7 +332,7 @@ hsw_dp_audio_config_update(struct intel_encoder *encoder, else DRM_DEBUG_KMS("using automatic Maud, Naud\n"); - tmp = I915_READ(HSW_AUD_CFG(pipe)); + tmp = I915_READ(HSW_AUD_CFG(cpu_transcoder)); tmp &= ~AUD_CONFIG_N_VALUE_INDEX; tmp &= ~AUD_CONFIG_PIXEL_CLOCK_HDMI_MASK; tmp &= ~AUD_CONFIG_N_PROG_ENABLE; @@ -345,9 +344,9 @@ hsw_dp_audio_config_update(struct intel_encoder *encoder, tmp |= AUD_CONFIG_N_PROG_ENABLE; } - I915_WRITE(HSW_AUD_CFG(pipe), tmp); + I915_WRITE(HSW_AUD_CFG(cpu_transcoder), tmp); - tmp = I915_READ(HSW_AUD_M_CTS_ENABLE(pipe)); + tmp = I915_READ(HSW_AUD_M_CTS_ENABLE(cpu_transcoder)); tmp &= ~AUD_CONFIG_M_MASK; tmp &= ~AUD_M_CTS_M_VALUE_INDEX; tmp &= ~AUD_M_CTS_M_PROG_ENABLE; @@ -358,7 +357,7 @@ hsw_dp_audio_config_update(struct intel_encoder *encoder, tmp |= AUD_M_CTS_M_PROG_ENABLE; } - I915_WRITE(HSW_AUD_M_CTS_ENABLE(pipe), tmp); + I915_WRITE(HSW_AUD_M_CTS_ENABLE(cpu_transcoder), tmp); } static void @@ -367,15 +366,14 @@ hsw_hdmi_audio_config_update(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct i915_audio_component *acomp = dev_priv->audio_component; - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; enum port port = encoder->port; - enum pipe pipe = crtc->pipe; int n, rate; u32 tmp; rate = acomp ? acomp->aud_sample_rate[port] : 0; - tmp = I915_READ(HSW_AUD_CFG(pipe)); + tmp = I915_READ(HSW_AUD_CFG(cpu_transcoder)); tmp &= ~AUD_CONFIG_N_VALUE_INDEX; tmp &= ~AUD_CONFIG_PIXEL_CLOCK_HDMI_MASK; tmp &= ~AUD_CONFIG_N_PROG_ENABLE; @@ -392,16 +390,16 @@ hsw_hdmi_audio_config_update(struct intel_encoder *encoder, DRM_DEBUG_KMS("using automatic N\n"); } - I915_WRITE(HSW_AUD_CFG(pipe), tmp); + I915_WRITE(HSW_AUD_CFG(cpu_transcoder), tmp); /* * Let's disable "Enable CTS or M Prog bit" * and let HW calculate the value */ - tmp = I915_READ(HSW_AUD_M_CTS_ENABLE(pipe)); + tmp = I915_READ(HSW_AUD_M_CTS_ENABLE(cpu_transcoder)); tmp &= ~AUD_M_CTS_M_PROG_ENABLE; tmp &= ~AUD_M_CTS_M_VALUE_INDEX; - I915_WRITE(HSW_AUD_M_CTS_ENABLE(pipe), tmp); + I915_WRITE(HSW_AUD_M_CTS_ENABLE(cpu_transcoder), tmp); } static void @@ -419,28 +417,28 @@ static void hsw_audio_codec_disable(struct intel_encoder *encoder, const struct drm_connector_state *old_conn_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc); - enum pipe pipe = crtc->pipe; + enum transcoder cpu_transcoder = old_crtc_state->cpu_transcoder; u32 tmp; - DRM_DEBUG_KMS("Disable audio codec on pipe %c\n", pipe_name(pipe)); + DRM_DEBUG_KMS("Disable audio codec on transcoder %s\n", + transcoder_name(cpu_transcoder)); mutex_lock(&dev_priv->av_mutex); /* Disable timestamps */ - tmp = I915_READ(HSW_AUD_CFG(pipe)); + tmp = I915_READ(HSW_AUD_CFG(cpu_transcoder)); tmp &= ~AUD_CONFIG_N_VALUE_INDEX; tmp |= AUD_CONFIG_N_PROG_ENABLE; tmp &= ~AUD_CONFIG_UPPER_N_MASK; tmp &= ~AUD_CONFIG_LOWER_N_MASK; if (intel_crtc_has_dp_encoder(old_crtc_state)) tmp |= AUD_CONFIG_N_VALUE_INDEX; - I915_WRITE(HSW_AUD_CFG(pipe), tmp); + I915_WRITE(HSW_AUD_CFG(cpu_transcoder), tmp); /* Invalidate ELD */ tmp = I915_READ(HSW_AUD_PIN_ELD_CP_VLD); - tmp &= ~AUDIO_ELD_VALID(pipe); - tmp &= ~AUDIO_OUTPUT_ENABLE(pipe); + tmp &= ~AUDIO_ELD_VALID(cpu_transcoder); + tmp &= ~AUDIO_OUTPUT_ENABLE(cpu_transcoder); I915_WRITE(HSW_AUD_PIN_ELD_CP_VLD, tmp); mutex_unlock(&dev_priv->av_mutex); @@ -451,22 +449,21 @@ static void hsw_audio_codec_enable(struct intel_encoder *encoder, const struct drm_connector_state *conn_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_connector *connector = conn_state->connector; - enum pipe pipe = crtc->pipe; + enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; const u8 *eld = connector->eld; u32 tmp; int len, i; - DRM_DEBUG_KMS("Enable audio codec on pipe %c, %u bytes ELD\n", - pipe_name(pipe), drm_eld_size(eld)); + DRM_DEBUG_KMS("Enable audio codec on transcoder %s, %u bytes ELD\n", + transcoder_name(cpu_transcoder), drm_eld_size(eld)); mutex_lock(&dev_priv->av_mutex); /* Enable audio presence detect, invalidate ELD */ tmp = I915_READ(HSW_AUD_PIN_ELD_CP_VLD); - tmp |= AUDIO_OUTPUT_ENABLE(pipe); - tmp &= ~AUDIO_ELD_VALID(pipe); + tmp |= AUDIO_OUTPUT_ENABLE(cpu_transcoder); + tmp &= ~AUDIO_ELD_VALID(cpu_transcoder); I915_WRITE(HSW_AUD_PIN_ELD_CP_VLD, tmp); /* @@ -477,18 +474,18 @@ static void hsw_audio_codec_enable(struct intel_encoder *encoder, */ /* Reset ELD write address */ - tmp = I915_READ(HSW_AUD_DIP_ELD_CTRL(pipe)); + tmp = I915_READ(HSW_AUD_DIP_ELD_CTRL(cpu_transcoder)); tmp &= ~IBX_ELD_ADDRESS_MASK; - I915_WRITE(HSW_AUD_DIP_ELD_CTRL(pipe), tmp); + I915_WRITE(HSW_AUD_DIP_ELD_CTRL(cpu_transcoder), tmp); /* Up to 84 bytes of hw ELD buffer */ len = min(drm_eld_size(eld), 84); for (i = 0; i < len / 4; i++) - I915_WRITE(HSW_AUD_EDID_DATA(pipe), *((const u32 *)eld + i)); + I915_WRITE(HSW_AUD_EDID_DATA(cpu_transcoder), *((const u32 *)eld + i)); /* ELD valid */ tmp = I915_READ(HSW_AUD_PIN_ELD_CP_VLD); - tmp |= AUDIO_ELD_VALID(pipe); + tmp |= AUDIO_ELD_VALID(cpu_transcoder); I915_WRITE(HSW_AUD_PIN_ELD_CP_VLD, tmp); /* Enable timestamps */ @@ -644,8 +641,10 @@ void intel_audio_codec_enable(struct intel_encoder *encoder, enum port port = encoder->port; enum pipe pipe = crtc->pipe; + /* FIXME precompute the ELD in .compute_config() */ if (!connector->eld[0]) - return; + DRM_DEBUG_KMS("Bogus ELD on [CONNECTOR:%d:%s]\n", + connector->base.id, connector->name); DRM_DEBUG_DRIVER("ELD on [CONNECTOR:%d:%s], [ENCODER:%d:%s]\n", connector->base.id, diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c index 1dc8d03ff1279691f9b0492225183235a13bbd97..a0b708f7f384530deb3204c35e487917fb80b3e0 100644 --- a/drivers/gpu/drm/i915/intel_bios.c +++ b/drivers/gpu/drm/i915/intel_bios.c @@ -27,7 +27,9 @@ #include #include + #include "i915_drv.h" +#include "intel_gmbus.h" #define _INTEL_BIOS_PRIVATE #include "intel_vbt_defs.h" diff --git a/drivers/gpu/drm/i915/intel_bios.h b/drivers/gpu/drm/i915/intel_bios.h index 7e3545f65257c415fa7fab7a65e27a6858e80a09..7bac53f219e116107391fa009f160d9d35344adf 100644 --- a/drivers/gpu/drm/i915/intel_bios.h +++ b/drivers/gpu/drm/i915/intel_bios.h @@ -30,6 +30,12 @@ #ifndef _INTEL_BIOS_H_ #define _INTEL_BIOS_H_ +#include + +#include + +struct drm_i915_private; + enum intel_backlight_type { INTEL_BACKLIGHT_PMIC, INTEL_BACKLIGHT_LPSS, @@ -220,4 +226,19 @@ struct mipi_pps_data { u16 panel_power_cycle_delay; } __packed; +void intel_bios_init(struct drm_i915_private *dev_priv); +void intel_bios_cleanup(struct drm_i915_private *dev_priv); +bool intel_bios_is_valid_vbt(const void *buf, size_t size); +bool intel_bios_is_tv_present(struct drm_i915_private *dev_priv); +bool intel_bios_is_lvds_present(struct drm_i915_private *dev_priv, u8 *i2c_pin); +bool intel_bios_is_port_present(struct drm_i915_private *dev_priv, enum port port); +bool intel_bios_is_port_edp(struct drm_i915_private *dev_priv, enum port port); +bool intel_bios_is_port_dp_dual_mode(struct drm_i915_private *dev_priv, enum port port); +bool intel_bios_is_dsi_present(struct drm_i915_private *dev_priv, enum port *port); +bool intel_bios_is_port_hpd_inverted(struct drm_i915_private *dev_priv, + enum port port); +bool intel_bios_is_lspcon_present(struct drm_i915_private *dev_priv, + enum port port); +enum aux_ch intel_bios_port_aux_ch(struct drm_i915_private *dev_priv, enum port port); + #endif /* _INTEL_BIOS_H_ */ diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c index ae40a8679314ea46236f4cb98fcc4c7179250bb2..78d9f619956c769d4fc23ec5ee548ab9d89fb213 100644 --- a/drivers/gpu/drm/i915/intel_cdclk.c +++ b/drivers/gpu/drm/i915/intel_cdclk.c @@ -23,6 +23,7 @@ #include "intel_cdclk.h" #include "intel_drv.h" +#include "intel_sideband.h" /** * DOC: CDCLK / RAWCLK @@ -464,14 +465,18 @@ static void vlv_get_cdclk(struct drm_i915_private *dev_priv, { u32 val; + vlv_iosf_sb_get(dev_priv, + BIT(VLV_IOSF_SB_CCK) | BIT(VLV_IOSF_SB_PUNIT)); + cdclk_state->vco = vlv_get_hpll_vco(dev_priv); cdclk_state->cdclk = vlv_get_cck_clock(dev_priv, "cdclk", CCK_DISPLAY_CLOCK_CONTROL, cdclk_state->vco); - mutex_lock(&dev_priv->pcu_lock); val = vlv_punit_read(dev_priv, PUNIT_REG_DSPSSPM); - mutex_unlock(&dev_priv->pcu_lock); + + vlv_iosf_sb_put(dev_priv, + BIT(VLV_IOSF_SB_CCK) | BIT(VLV_IOSF_SB_PUNIT)); if (IS_VALLEYVIEW(dev_priv)) cdclk_state->voltage_level = (val & DSPFREQGUAR_MASK) >> @@ -545,7 +550,11 @@ static void vlv_set_cdclk(struct drm_i915_private *dev_priv, */ wakeref = intel_display_power_get(dev_priv, POWER_DOMAIN_PIPE_A); - mutex_lock(&dev_priv->pcu_lock); + vlv_iosf_sb_get(dev_priv, + BIT(VLV_IOSF_SB_CCK) | + BIT(VLV_IOSF_SB_BUNIT) | + BIT(VLV_IOSF_SB_PUNIT)); + val = vlv_punit_read(dev_priv, PUNIT_REG_DSPSSPM); val &= ~DSPFREQGUAR_MASK; val |= (cmd << DSPFREQGUAR_SHIFT); @@ -555,9 +564,6 @@ static void vlv_set_cdclk(struct drm_i915_private *dev_priv, 50)) { DRM_ERROR("timed out waiting for CDclk change\n"); } - mutex_unlock(&dev_priv->pcu_lock); - - mutex_lock(&dev_priv->sb_lock); if (cdclk == 400000) { u32 divider; @@ -591,7 +597,10 @@ static void vlv_set_cdclk(struct drm_i915_private *dev_priv, val |= 3000 / 250; /* 3.0 usec */ vlv_bunit_write(dev_priv, BUNIT_REG_BISOC, val); - mutex_unlock(&dev_priv->sb_lock); + vlv_iosf_sb_put(dev_priv, + BIT(VLV_IOSF_SB_CCK) | + BIT(VLV_IOSF_SB_BUNIT) | + BIT(VLV_IOSF_SB_PUNIT)); intel_update_cdclk(dev_priv); @@ -627,7 +636,7 @@ static void chv_set_cdclk(struct drm_i915_private *dev_priv, */ wakeref = intel_display_power_get(dev_priv, POWER_DOMAIN_PIPE_A); - mutex_lock(&dev_priv->pcu_lock); + vlv_punit_get(dev_priv); val = vlv_punit_read(dev_priv, PUNIT_REG_DSPSSPM); val &= ~DSPFREQGUAR_MASK_CHV; val |= (cmd << DSPFREQGUAR_SHIFT_CHV); @@ -637,7 +646,8 @@ static void chv_set_cdclk(struct drm_i915_private *dev_priv, 50)) { DRM_ERROR("timed out waiting for CDclk change\n"); } - mutex_unlock(&dev_priv->pcu_lock); + + vlv_punit_put(dev_priv); intel_update_cdclk(dev_priv); @@ -716,10 +726,8 @@ static void bdw_set_cdclk(struct drm_i915_private *dev_priv, "trying to change cdclk frequency with cdclk not enabled\n")) return; - mutex_lock(&dev_priv->pcu_lock); ret = sandybridge_pcode_write(dev_priv, BDW_PCODE_DISPLAY_FREQ_CHANGE_REQ, 0x0); - mutex_unlock(&dev_priv->pcu_lock); if (ret) { DRM_ERROR("failed to inform pcode about cdclk change\n"); return; @@ -768,10 +776,8 @@ static void bdw_set_cdclk(struct drm_i915_private *dev_priv, LCPLL_CD_SOURCE_FCLK_DONE) == 0, 1)) DRM_ERROR("Switching back to LCPLL failed\n"); - mutex_lock(&dev_priv->pcu_lock); sandybridge_pcode_write(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ, cdclk_state->voltage_level); - mutex_unlock(&dev_priv->pcu_lock); I915_WRITE(CDCLK_FREQ, DIV_ROUND_CLOSEST(cdclk, 1000) - 1); @@ -1010,12 +1016,10 @@ static void skl_set_cdclk(struct drm_i915_private *dev_priv, */ WARN_ON_ONCE(IS_SKYLAKE(dev_priv) && vco == 8640000); - mutex_lock(&dev_priv->pcu_lock); ret = skl_pcode_request(dev_priv, SKL_PCODE_CDCLK_CONTROL, SKL_CDCLK_PREPARE_FOR_CHANGE, SKL_CDCLK_READY_FOR_CHANGE, SKL_CDCLK_READY_FOR_CHANGE, 3); - mutex_unlock(&dev_priv->pcu_lock); if (ret) { DRM_ERROR("Failed to inform PCU about cdclk change (%d)\n", ret); @@ -1079,10 +1083,8 @@ static void skl_set_cdclk(struct drm_i915_private *dev_priv, POSTING_READ(CDCLK_CTL); /* inform PCU of the change */ - mutex_lock(&dev_priv->pcu_lock); sandybridge_pcode_write(dev_priv, SKL_PCODE_CDCLK_CONTROL, cdclk_state->voltage_level); - mutex_unlock(&dev_priv->pcu_lock); intel_update_cdclk(dev_priv); } @@ -1379,12 +1381,9 @@ static void bxt_set_cdclk(struct drm_i915_private *dev_priv, * requires us to wait up to 150usec, but that leads to timeouts; * the 2ms used here is based on experiment. */ - mutex_lock(&dev_priv->pcu_lock); ret = sandybridge_pcode_write_timeout(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ, 0x80000000, 150, 2); - mutex_unlock(&dev_priv->pcu_lock); - if (ret) { DRM_ERROR("PCode CDCLK freq change notify failed (err %d, freq %d)\n", ret, cdclk); @@ -1414,7 +1413,6 @@ static void bxt_set_cdclk(struct drm_i915_private *dev_priv, if (pipe != INVALID_PIPE) intel_wait_for_vblank(dev_priv, pipe); - mutex_lock(&dev_priv->pcu_lock); /* * The timeout isn't specified, the 2ms used here is based on * experiment. @@ -1424,8 +1422,6 @@ static void bxt_set_cdclk(struct drm_i915_private *dev_priv, ret = sandybridge_pcode_write_timeout(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ, cdclk_state->voltage_level, 150, 2); - mutex_unlock(&dev_priv->pcu_lock); - if (ret) { DRM_ERROR("PCode CDCLK freq set failed, (err %d, freq %d)\n", ret, cdclk); @@ -1648,12 +1644,10 @@ static void cnl_set_cdclk(struct drm_i915_private *dev_priv, u32 val, divider; int ret; - mutex_lock(&dev_priv->pcu_lock); ret = skl_pcode_request(dev_priv, SKL_PCODE_CDCLK_CONTROL, SKL_CDCLK_PREPARE_FOR_CHANGE, SKL_CDCLK_READY_FOR_CHANGE, SKL_CDCLK_READY_FOR_CHANGE, 3); - mutex_unlock(&dev_priv->pcu_lock); if (ret) { DRM_ERROR("Failed to inform PCU about cdclk change (%d)\n", ret); @@ -1692,10 +1686,8 @@ static void cnl_set_cdclk(struct drm_i915_private *dev_priv, intel_wait_for_vblank(dev_priv, pipe); /* inform PCU of the change */ - mutex_lock(&dev_priv->pcu_lock); sandybridge_pcode_write(dev_priv, SKL_PCODE_CDCLK_CONTROL, cdclk_state->voltage_level); - mutex_unlock(&dev_priv->pcu_lock); intel_update_cdclk(dev_priv); @@ -1834,12 +1826,10 @@ static void icl_set_cdclk(struct drm_i915_private *dev_priv, unsigned int vco = cdclk_state->vco; int ret; - mutex_lock(&dev_priv->pcu_lock); ret = skl_pcode_request(dev_priv, SKL_PCODE_CDCLK_CONTROL, SKL_CDCLK_PREPARE_FOR_CHANGE, SKL_CDCLK_READY_FOR_CHANGE, SKL_CDCLK_READY_FOR_CHANGE, 3); - mutex_unlock(&dev_priv->pcu_lock); if (ret) { DRM_ERROR("Failed to inform PCU about cdclk change (%d)\n", ret); @@ -1861,10 +1851,8 @@ static void icl_set_cdclk(struct drm_i915_private *dev_priv, I915_WRITE(CDCLK_CTL, ICL_CDCLK_CD2X_PIPE_NONE | skl_cdclk_decimal(cdclk)); - mutex_lock(&dev_priv->pcu_lock); sandybridge_pcode_write(dev_priv, SKL_PCODE_CDCLK_CONTROL, cdclk_state->voltage_level); - mutex_unlock(&dev_priv->pcu_lock); intel_update_cdclk(dev_priv); @@ -2277,6 +2265,15 @@ int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state) IS_VALLEYVIEW(dev_priv)) min_cdclk = max(320000, min_cdclk); + /* + * On Geminilake once the CDCLK gets as low as 79200 + * picture gets unstable, despite that values are + * correct for DSI PLL and DE PLL. + */ + if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DSI) && + IS_GEMINILAKE(dev_priv)) + min_cdclk = max(158400, min_cdclk); + if (min_cdclk > dev_priv->max_cdclk_freq) { DRM_DEBUG_KMS("required cdclk (%d kHz) exceeds max (%d kHz)\n", min_cdclk, dev_priv->max_cdclk_freq); diff --git a/drivers/gpu/drm/i915/intel_color.c b/drivers/gpu/drm/i915/intel_color.c index 9093daabc290d25324823d8ffec5fa00dab2bdef..962db1236970317441364e45c25ae0f1ab35b967 100644 --- a/drivers/gpu/drm/i915/intel_color.c +++ b/drivers/gpu/drm/i915/intel_color.c @@ -784,56 +784,78 @@ static void icl_load_luts(const struct intel_crtc_state *crtc_state) } } -static void cherryview_load_luts(const struct intel_crtc_state *crtc_state) +static u32 chv_cgm_degamma_ldw(const struct drm_color_lut *color) +{ + return drm_color_lut_extract(color->green, 14) << 16 | + drm_color_lut_extract(color->blue, 14); +} + +static u32 chv_cgm_degamma_udw(const struct drm_color_lut *color) +{ + return drm_color_lut_extract(color->red, 14); +} + +static void chv_load_cgm_degamma(struct intel_crtc *crtc, + const struct drm_property_blob *blob) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - const struct drm_property_blob *gamma_lut = crtc_state->base.gamma_lut; - const struct drm_property_blob *degamma_lut = crtc_state->base.degamma_lut; + const struct drm_color_lut *lut = blob->data; + int i, lut_size = drm_color_lut_size(blob); enum pipe pipe = crtc->pipe; - cherryview_load_csc_matrix(crtc_state); - - if (crtc_state_is_legacy_gamma(crtc_state)) { - i9xx_load_luts(crtc_state); - return; + for (i = 0; i < lut_size; i++) { + I915_WRITE(CGM_PIPE_DEGAMMA(pipe, i, 0), + chv_cgm_degamma_ldw(&lut[i])); + I915_WRITE(CGM_PIPE_DEGAMMA(pipe, i, 1), + chv_cgm_degamma_udw(&lut[i])); } +} - if (degamma_lut) { - const struct drm_color_lut *lut = degamma_lut->data; - int i, lut_size = INTEL_INFO(dev_priv)->color.degamma_lut_size; +static u32 chv_cgm_gamma_ldw(const struct drm_color_lut *color) +{ + return drm_color_lut_extract(color->green, 10) << 16 | + drm_color_lut_extract(color->blue, 10); +} - for (i = 0; i < lut_size; i++) { - u32 word0, word1; +static u32 chv_cgm_gamma_udw(const struct drm_color_lut *color) +{ + return drm_color_lut_extract(color->red, 10); +} - /* Write LUT in U0.14 format. */ - word0 = - (drm_color_lut_extract(lut[i].green, 14) << 16) | - drm_color_lut_extract(lut[i].blue, 14); - word1 = drm_color_lut_extract(lut[i].red, 14); +static void chv_load_cgm_gamma(struct intel_crtc *crtc, + const struct drm_property_blob *blob) +{ + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + const struct drm_color_lut *lut = blob->data; + int i, lut_size = drm_color_lut_size(blob); + enum pipe pipe = crtc->pipe; - I915_WRITE(CGM_PIPE_DEGAMMA(pipe, i, 0), word0); - I915_WRITE(CGM_PIPE_DEGAMMA(pipe, i, 1), word1); - } + for (i = 0; i < lut_size; i++) { + I915_WRITE(CGM_PIPE_GAMMA(pipe, i, 0), + chv_cgm_gamma_ldw(&lut[i])); + I915_WRITE(CGM_PIPE_GAMMA(pipe, i, 1), + chv_cgm_gamma_udw(&lut[i])); } +} - if (gamma_lut) { - const struct drm_color_lut *lut = gamma_lut->data; - int i, lut_size = INTEL_INFO(dev_priv)->color.gamma_lut_size; - - for (i = 0; i < lut_size; i++) { - u32 word0, word1; +static void chv_load_luts(const struct intel_crtc_state *crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + const struct drm_property_blob *gamma_lut = crtc_state->base.gamma_lut; + const struct drm_property_blob *degamma_lut = crtc_state->base.degamma_lut; - /* Write LUT in U0.10 format. */ - word0 = - (drm_color_lut_extract(lut[i].green, 10) << 16) | - drm_color_lut_extract(lut[i].blue, 10); - word1 = drm_color_lut_extract(lut[i].red, 10); + cherryview_load_csc_matrix(crtc_state); - I915_WRITE(CGM_PIPE_GAMMA(pipe, i, 0), word0); - I915_WRITE(CGM_PIPE_GAMMA(pipe, i, 1), word1); - } + if (crtc_state_is_legacy_gamma(crtc_state)) { + i9xx_load_luts(crtc_state); + return; } + + if (degamma_lut) + chv_load_cgm_degamma(crtc, degamma_lut); + + if (gamma_lut) + chv_load_cgm_gamma(crtc, gamma_lut); } void intel_color_load_luts(const struct intel_crtc_state *crtc_state) @@ -1232,7 +1254,7 @@ void intel_color_init(struct intel_crtc *crtc) if (IS_CHERRYVIEW(dev_priv)) { dev_priv->display.color_check = chv_color_check; dev_priv->display.color_commit = i9xx_color_commit; - dev_priv->display.load_luts = cherryview_load_luts; + dev_priv->display.load_luts = chv_load_luts; } else if (INTEL_GEN(dev_priv) >= 4) { dev_priv->display.color_check = i9xx_color_check; dev_priv->display.color_commit = i9xx_color_commit; diff --git a/drivers/gpu/drm/i915/intel_combo_phy.c b/drivers/gpu/drm/i915/intel_combo_phy.c index 2bf4359d7e41d9cbc9cdc21edae2106ab267e2fb..19a9333b727a1a9fdfc07dafc8d70d04227c1174 100644 --- a/drivers/gpu/drm/i915/intel_combo_phy.c +++ b/drivers/gpu/drm/i915/intel_combo_phy.c @@ -3,6 +3,7 @@ * Copyright © 2018 Intel Corporation */ +#include "intel_combo_phy.h" #include "intel_drv.h" #define for_each_combo_port(__dev_priv, __port) \ @@ -147,7 +148,7 @@ static bool cnl_combo_phy_verify_state(struct drm_i915_private *dev_priv) return ret; } -void cnl_combo_phys_init(struct drm_i915_private *dev_priv) +static void cnl_combo_phys_init(struct drm_i915_private *dev_priv) { u32 val; @@ -167,7 +168,7 @@ void cnl_combo_phys_init(struct drm_i915_private *dev_priv) I915_WRITE(CNL_PORT_CL1CM_DW5, val); } -void cnl_combo_phys_uninit(struct drm_i915_private *dev_priv) +static void cnl_combo_phys_uninit(struct drm_i915_private *dev_priv) { u32 val; @@ -203,7 +204,59 @@ static bool icl_combo_phy_verify_state(struct drm_i915_private *dev_priv, return ret; } -void icl_combo_phys_init(struct drm_i915_private *dev_priv) +void intel_combo_phy_power_up_lanes(struct drm_i915_private *dev_priv, + enum port port, bool is_dsi, + int lane_count, bool lane_reversal) +{ + u8 lane_mask; + u32 val; + + if (is_dsi) { + WARN_ON(lane_reversal); + + switch (lane_count) { + case 1: + lane_mask = PWR_DOWN_LN_3_1_0; + break; + case 2: + lane_mask = PWR_DOWN_LN_3_1; + break; + case 3: + lane_mask = PWR_DOWN_LN_3; + break; + default: + MISSING_CASE(lane_count); + /* fall-through */ + case 4: + lane_mask = PWR_UP_ALL_LANES; + break; + } + } else { + switch (lane_count) { + case 1: + lane_mask = lane_reversal ? PWR_DOWN_LN_2_1_0 : + PWR_DOWN_LN_3_2_1; + break; + case 2: + lane_mask = lane_reversal ? PWR_DOWN_LN_1_0 : + PWR_DOWN_LN_3_2; + break; + default: + MISSING_CASE(lane_count); + /* fall-through */ + case 4: + lane_mask = PWR_UP_ALL_LANES; + break; + } + } + + val = I915_READ(ICL_PORT_CL_DW10(port)); + val &= ~PWR_DOWN_LN_MASK; + val |= lane_mask << PWR_DOWN_LN_SHIFT; + I915_WRITE(ICL_PORT_CL_DW10(port), val); +} + +static void icl_combo_phys_init(struct drm_i915_private *dev_priv) { enum port port; @@ -232,7 +285,7 @@ void icl_combo_phys_init(struct drm_i915_private *dev_priv) } } -void icl_combo_phys_uninit(struct drm_i915_private *dev_priv) +static void icl_combo_phys_uninit(struct drm_i915_private *dev_priv) { enum port port; @@ -253,3 +306,19 @@ void icl_combo_phys_uninit(struct drm_i915_private *dev_priv) I915_WRITE(ICL_PORT_COMP_DW0(port), val); } } + +void intel_combo_phy_init(struct drm_i915_private *i915) +{ + if (INTEL_GEN(i915) >= 11) + icl_combo_phys_init(i915); + else if (IS_CANNONLAKE(i915)) + cnl_combo_phys_init(i915); +} + +void intel_combo_phy_uninit(struct drm_i915_private *i915) +{ + if (INTEL_GEN(i915) >= 11) + icl_combo_phys_uninit(i915); + else if (IS_CANNONLAKE(i915)) + cnl_combo_phys_uninit(i915); +} diff --git a/drivers/gpu/drm/i915/intel_combo_phy.h b/drivers/gpu/drm/i915/intel_combo_phy.h new file mode 100644 index 0000000000000000000000000000000000000000..e6e195a83b199f67cc812e537301eff4574f86dd --- /dev/null +++ b/drivers/gpu/drm/i915/intel_combo_phy.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef __INTEL_COMBO_PHY_H__ +#define __INTEL_COMBO_PHY_H__ + +#include +#include + +struct drm_i915_private; + +void intel_combo_phy_init(struct drm_i915_private *dev_priv); +void intel_combo_phy_uninit(struct drm_i915_private *dev_priv); +void intel_combo_phy_power_up_lanes(struct drm_i915_private *dev_priv, + enum port port, bool is_dsi, + int lane_count, bool lane_reversal); + +#endif /* __INTEL_COMBO_PHY_H__ */ diff --git a/drivers/gpu/drm/i915/intel_context.h b/drivers/gpu/drm/i915/intel_context.h deleted file mode 100644 index ebc861b1a49e2bcf91a4c8b68ce7867b1cc3e42b..0000000000000000000000000000000000000000 --- a/drivers/gpu/drm/i915/intel_context.h +++ /dev/null @@ -1,87 +0,0 @@ -/* - * SPDX-License-Identifier: MIT - * - * Copyright © 2019 Intel Corporation - */ - -#ifndef __INTEL_CONTEXT_H__ -#define __INTEL_CONTEXT_H__ - -#include - -#include "intel_context_types.h" -#include "intel_engine_types.h" - -struct intel_context *intel_context_alloc(void); -void intel_context_free(struct intel_context *ce); - -void intel_context_init(struct intel_context *ce, - struct i915_gem_context *ctx, - struct intel_engine_cs *engine); - -/** - * intel_context_lookup - Find the matching HW context for this (ctx, engine) - * @ctx - the parent GEM context - * @engine - the target HW engine - * - * May return NULL if the HW context hasn't been instantiated (i.e. unused). - */ -struct intel_context * -intel_context_lookup(struct i915_gem_context *ctx, - struct intel_engine_cs *engine); - -/** - * intel_context_pin_lock - Stablises the 'pinned' status of the HW context - * @ctx - the parent GEM context - * @engine - the target HW engine - * - * Acquire a lock on the pinned status of the HW context, such that the context - * can neither be bound to the GPU or unbound whilst the lock is held, i.e. - * intel_context_is_pinned() remains stable. - */ -struct intel_context * -intel_context_pin_lock(struct i915_gem_context *ctx, - struct intel_engine_cs *engine); - -static inline bool -intel_context_is_pinned(struct intel_context *ce) -{ - return atomic_read(&ce->pin_count); -} - -static inline void intel_context_pin_unlock(struct intel_context *ce) -__releases(ce->pin_mutex) -{ - mutex_unlock(&ce->pin_mutex); -} - -struct intel_context * -__intel_context_insert(struct i915_gem_context *ctx, - struct intel_engine_cs *engine, - struct intel_context *ce); -void -__intel_context_remove(struct intel_context *ce); - -struct intel_context * -intel_context_pin(struct i915_gem_context *ctx, struct intel_engine_cs *engine); - -static inline void __intel_context_pin(struct intel_context *ce) -{ - GEM_BUG_ON(!intel_context_is_pinned(ce)); - atomic_inc(&ce->pin_count); -} - -void intel_context_unpin(struct intel_context *ce); - -static inline struct intel_context *intel_context_get(struct intel_context *ce) -{ - kref_get(&ce->ref); - return ce; -} - -static inline void intel_context_put(struct intel_context *ce) -{ - kref_put(&ce->ref, ce->ops->destroy); -} - -#endif /* __INTEL_CONTEXT_H__ */ diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c index b665c370111b2801202d5efcaaf451d4912d7fc2..bb56518576a1c0bc7209e5053119500fcc8c4edc 100644 --- a/drivers/gpu/drm/i915/intel_crt.c +++ b/drivers/gpu/drm/i915/intel_crt.c @@ -39,6 +39,9 @@ #include "intel_crt.h" #include "intel_ddi.h" #include "intel_drv.h" +#include "intel_fifo_underrun.h" +#include "intel_gmbus.h" +#include "intel_hotplug.h" /* Here's the desired hotplug mode */ #define ADPA_HOTPLUG_BITS (ADPA_CRT_HOTPLUG_PERIOD_128 | \ diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c index f43c2a2563a5a54b0abf7a8510798049526c0a06..4527b9662330a5a8ffc490d48bce8ee30b7481d9 100644 --- a/drivers/gpu/drm/i915/intel_csr.c +++ b/drivers/gpu/drm/i915/intel_csr.c @@ -529,8 +529,6 @@ void intel_csr_ucode_init(struct drm_i915_private *dev_priv) if (csr->fw_path == NULL) { DRM_DEBUG_KMS("No known CSR firmware for platform, disabling runtime PM\n"); - WARN_ON(!IS_ALPHA_SUPPORT(INTEL_INFO(dev_priv))); - return; } diff --git a/drivers/gpu/drm/i915/intel_csr.h b/drivers/gpu/drm/i915/intel_csr.h index 17a32c1e8a35c2913e2198292d1c61555c382e77..03c64f8af7ab333836ba04f8f2064ea54f0e6b35 100644 --- a/drivers/gpu/drm/i915/intel_csr.h +++ b/drivers/gpu/drm/i915/intel_csr.h @@ -8,6 +8,10 @@ struct drm_i915_private; +#define CSR_VERSION(major, minor) ((major) << 16 | (minor)) +#define CSR_VERSION_MAJOR(version) ((version) >> 16) +#define CSR_VERSION_MINOR(version) ((version) & 0xffff) + void intel_csr_ucode_init(struct drm_i915_private *i915); void intel_csr_load_program(struct drm_i915_private *i915); void intel_csr_ucode_fini(struct drm_i915_private *i915); diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index f181c26f62fd3bded129ded58cfa4da190323692..df06e5bb4764a74f6afc1b7fbe7ea7e4386426ce 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -29,16 +29,23 @@ #include "i915_drv.h" #include "intel_audio.h" +#include "intel_combo_phy.h" #include "intel_connector.h" #include "intel_ddi.h" #include "intel_dp.h" +#include "intel_dp_link_training.h" +#include "intel_dpio_phy.h" #include "intel_drv.h" #include "intel_dsi.h" +#include "intel_fifo_underrun.h" +#include "intel_gmbus.h" #include "intel_hdcp.h" #include "intel_hdmi.h" +#include "intel_hotplug.h" #include "intel_lspcon.h" #include "intel_panel.h" #include "intel_psr.h" +#include "intel_vdsc.h" struct ddi_buf_trans { u32 trans1; /* balance leg enable, de-emph level */ @@ -1450,7 +1457,8 @@ static void ddi_dotclock_get(struct intel_crtc_state *pipe_config) else dotclock = pipe_config->port_clock; - if (pipe_config->output_format == INTEL_OUTPUT_FORMAT_YCBCR420) + if (pipe_config->output_format == INTEL_OUTPUT_FORMAT_YCBCR420 && + !intel_crtc_has_dp_encoder(pipe_config)) dotclock *= 2; if (pipe_config->pixel_multiplier) @@ -1710,6 +1718,14 @@ void intel_ddi_set_pipe_settings(const struct intel_crtc_state *crtc_state) */ if (crtc_state->output_format == INTEL_OUTPUT_FORMAT_YCBCR444) temp |= TRANS_MSA_SAMPLING_444 | TRANS_MSA_CLRSP_YCBCR; + /* + * As per DP 1.4a spec section 2.2.4.3 [MSA Field for Indication + * of Color Encoding Format and Content Color Gamut] while sending + * YCBCR 420 signals we should program MSA MISC1 fields which + * indicate VSC SDP for the Pixel Encoding/Colorimetry Format. + */ + if (crtc_state->output_format == INTEL_OUTPUT_FORMAT_YCBCR420) + temp |= TRANS_MSA_USE_VSC_SDP; I915_WRITE(TRANS_MSA_MISC(cpu_transcoder), temp); } @@ -1772,9 +1788,7 @@ void intel_ddi_enable_transcoder_func(const struct intel_crtc_state *crtc_state) * eDP when not using the panel fitter, and when not * using motion blur mitigation (which we don't * support). */ - if (IS_HASWELL(dev_priv) && - (crtc_state->pch_pfit.enabled || - crtc_state->pch_pfit.force_thru)) + if (crtc_state->pch_pfit.force_thru) temp |= TRANS_DDI_EDP_INPUT_A_ONOFF; else temp |= TRANS_DDI_EDP_INPUT_A_ON; @@ -3111,6 +3125,15 @@ static void intel_ddi_pre_enable_dp(struct intel_encoder *encoder, else intel_prepare_dp_ddi_buffers(encoder, crtc_state); + if (intel_port_is_combophy(dev_priv, port)) { + bool lane_reversal = + dig_port->saved_port_bits & DDI_BUF_PORT_REVERSAL; + + intel_combo_phy_power_up_lanes(dev_priv, port, false, + crtc_state->lane_count, + lane_reversal); + } + intel_ddi_init_dp_buf_reg(encoder); if (!is_mst) intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON); @@ -3375,6 +3398,7 @@ static void intel_enable_ddi_dp(struct intel_encoder *encoder, intel_edp_backlight_on(crtc_state, conn_state); intel_psr_enable(intel_dp, crtc_state); + intel_dp_ycbcr_420_enable(intel_dp, crtc_state); intel_edp_drrs_enable(intel_dp, crtc_state); if (crtc_state->has_audio) @@ -3844,6 +3868,7 @@ static int intel_ddi_compute_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config, struct drm_connector_state *conn_state) { + struct intel_crtc *crtc = to_intel_crtc(pipe_config->base.crtc); struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); enum port port = encoder->port; int ret; @@ -3858,6 +3883,12 @@ static int intel_ddi_compute_config(struct intel_encoder *encoder, if (ret) return ret; + if (IS_HASWELL(dev_priv) && crtc->pipe == PIPE_A && + pipe_config->cpu_transcoder == TRANSCODER_EDP) + pipe_config->pch_pfit.force_thru = + pipe_config->pch_pfit.enabled || + pipe_config->crc_enabled; + if (IS_GEN9_LP(dev_priv)) pipe_config->lane_lat_optim_mask = bxt_ddi_phy_calc_lane_lat_optim_mask(pipe_config->lane_count); @@ -3865,7 +3896,6 @@ static int intel_ddi_compute_config(struct intel_encoder *encoder, intel_ddi_compute_min_voltage_level(dev_priv, pipe_config); return 0; - } static void intel_ddi_encoder_suspend(struct intel_encoder *encoder) diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index 0e579f158016979c5a88e69eab4a92f6ae068925..5a2e17d6146b3469d11810241db049fc919b4be9 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -27,7 +27,10 @@ #include -#include "intel_engine_types.h" +#include "gt/intel_engine_types.h" +#include "gt/intel_context_types.h" +#include "gt/intel_sseu.h" + #include "intel_display.h" struct drm_printer; @@ -118,6 +121,7 @@ enum intel_ppgtt_type { func(has_pooled_eu); \ func(has_rc6); \ func(has_rc6p); \ + func(has_rps); \ func(has_runtime_pm); \ func(has_snoop); \ func(has_coherent_ggtt); \ @@ -139,33 +143,6 @@ enum intel_ppgtt_type { func(overlay_needs_physical); \ func(supports_tv); -#define GEN_MAX_SLICES (6) /* CNL upper bound */ -#define GEN_MAX_SUBSLICES (8) /* ICL upper bound */ - -struct sseu_dev_info { - u8 slice_mask; - u8 subslice_mask[GEN_MAX_SLICES]; - u16 eu_total; - u8 eu_per_subslice; - u8 min_eu_in_pool; - /* For each slice, which subslice(s) has(have) 7 EUs (bitfield)? */ - u8 subslice_7eu[3]; - u8 has_slice_pg:1; - u8 has_subslice_pg:1; - u8 has_eu_pg:1; - - /* Topology fields */ - u8 max_slices; - u8 max_subslices; - u8 max_eus_per_subslice; - - /* We don't have more than 8 eus per subslice at the moment and as we - * store eus enabled using bits, no need to multiply by eus per - * subslice. - */ - u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES]; -}; - struct intel_device_info { u16 gen_mask; diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 2eb62ed9e7cbd52905cab3f0ee60e7f0a50451c0..012ad08f38c338a3239f92c0127bdf4e84f46fd1 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -46,8 +46,9 @@ #include "i915_drv.h" #include "i915_gem_clflush.h" -#include "i915_reset.h" #include "i915_trace.h" +#include "intel_acpi.h" +#include "intel_atomic.h" #include "intel_atomic_plane.h" #include "intel_color.h" #include "intel_cdclk.h" @@ -59,16 +60,23 @@ #include "intel_dvo.h" #include "intel_fbc.h" #include "intel_fbdev.h" +#include "intel_fifo_underrun.h" #include "intel_frontbuffer.h" +#include "intel_gmbus.h" #include "intel_hdcp.h" #include "intel_hdmi.h" +#include "intel_hotplug.h" #include "intel_lvds.h" +#include "intel_overlay.h" #include "intel_pipe_crc.h" #include "intel_pm.h" #include "intel_psr.h" +#include "intel_quirks.h" #include "intel_sdvo.h" +#include "intel_sideband.h" #include "intel_sprite.h" #include "intel_tv.h" +#include "intel_vdsc.h" /* Primary plane formats for gen <= 3 */ static const u32 i8xx_primary_formats[] = { @@ -120,7 +128,7 @@ static void intel_cpu_transcoder_set_m_n(const struct intel_crtc_state *crtc_sta static void i9xx_set_pipeconf(const struct intel_crtc_state *crtc_state); static void ironlake_set_pipeconf(const struct intel_crtc_state *crtc_state); static void haswell_set_pipeconf(const struct intel_crtc_state *crtc_state); -static void haswell_set_pipemisc(const struct intel_crtc_state *crtc_state); +static void bdw_set_pipemisc(const struct intel_crtc_state *crtc_state); static void vlv_prepare_pll(struct intel_crtc *crtc, const struct intel_crtc_state *pipe_config); static void chv_prepare_pll(struct intel_crtc *crtc, @@ -153,10 +161,8 @@ int vlv_get_hpll_vco(struct drm_i915_private *dev_priv) int hpll_freq, vco_freq[] = { 800, 1600, 2000, 2400 }; /* Obtain SKU information */ - mutex_lock(&dev_priv->sb_lock); hpll_freq = vlv_cck_read(dev_priv, CCK_FUSE_REG) & CCK_FUSE_HPLL_FREQ_MASK; - mutex_unlock(&dev_priv->sb_lock); return vco_freq[hpll_freq] * 1000; } @@ -167,10 +173,7 @@ int vlv_get_cck_clock(struct drm_i915_private *dev_priv, u32 val; int divider; - mutex_lock(&dev_priv->sb_lock); val = vlv_cck_read(dev_priv, reg); - mutex_unlock(&dev_priv->sb_lock); - divider = val & CCK_FREQUENCY_VALUES; WARN((val & CCK_FREQUENCY_STATUS) != @@ -183,11 +186,18 @@ int vlv_get_cck_clock(struct drm_i915_private *dev_priv, int vlv_get_cck_clock_hpll(struct drm_i915_private *dev_priv, const char *name, u32 reg) { + int hpll; + + vlv_cck_get(dev_priv); + if (dev_priv->hpll_freq == 0) dev_priv->hpll_freq = vlv_get_hpll_vco(dev_priv); - return vlv_get_cck_clock(dev_priv, name, reg, - dev_priv->hpll_freq); + hpll = vlv_get_cck_clock(dev_priv, name, reg, dev_priv->hpll_freq); + + vlv_cck_put(dev_priv); + + return hpll; } static void intel_update_czclk(struct drm_i915_private *dev_priv) @@ -476,6 +486,7 @@ static const struct intel_limit intel_limits_bxt = { .p2 = { .p2_slow = 1, .p2_fast = 20 }, }; +/* WA Display #0827: Gen9:all */ static void skl_wa_827(struct drm_i915_private *dev_priv, int pipe, bool enable) { @@ -489,6 +500,19 @@ skl_wa_827(struct drm_i915_private *dev_priv, int pipe, bool enable) ~(DUPS1_GATING_DIS | DUPS2_GATING_DIS)); } +/* Wa_2006604312:icl */ +static void +icl_wa_scalerclkgating(struct drm_i915_private *dev_priv, enum pipe pipe, + bool enable) +{ + if (enable) + I915_WRITE(CLKGATE_DIS_PSL(pipe), + I915_READ(CLKGATE_DIS_PSL(pipe)) | DPFR_GATING_DIS); + else + I915_WRITE(CLKGATE_DIS_PSL(pipe), + I915_READ(CLKGATE_DIS_PSL(pipe)) & ~DPFR_GATING_DIS); +} + static bool needs_modeset(const struct drm_crtc_state *state) { @@ -551,7 +575,7 @@ int chv_calc_dpll_params(int refclk, struct dpll *clock) clock->p = clock->p1 * clock->p2; if (WARN_ON(clock->n == 0 || clock->p == 0)) return 0; - clock->vco = DIV_ROUND_CLOSEST_ULL((u64)refclk * clock->m, + clock->vco = DIV_ROUND_CLOSEST_ULL(mul_u32_u32(refclk, clock->m), clock->n << 22); clock->dot = DIV_ROUND_CLOSEST(clock->vco, clock->p); @@ -936,8 +960,8 @@ chv_find_best_dpll(const struct intel_limit *limit, clock.p = clock.p1 * clock.p2; - m2 = DIV_ROUND_CLOSEST_ULL(((u64)target * clock.p * - clock.n) << 22, refclk * clock.m1); + m2 = DIV_ROUND_CLOSEST_ULL(mul_u32_u32(target, clock.p * clock.n) << 22, + refclk * clock.m1); if (m2 > INT_MAX/clock.m1) continue; @@ -1080,9 +1104,9 @@ void assert_dsi_pll(struct drm_i915_private *dev_priv, bool state) u32 val; bool cur_state; - mutex_lock(&dev_priv->sb_lock); + vlv_cck_get(dev_priv); val = vlv_cck_read(dev_priv, CCK_REG_DSI_PLL_CONTROL); - mutex_unlock(&dev_priv->sb_lock); + vlv_cck_put(dev_priv); cur_state = val & DSI_PLL_VCO_EN; I915_STATE_WARN(cur_state != state, @@ -1392,14 +1416,14 @@ static void _chv_enable_pll(struct intel_crtc *crtc, enum dpio_channel port = vlv_pipe_to_channel(pipe); u32 tmp; - mutex_lock(&dev_priv->sb_lock); + vlv_dpio_get(dev_priv); /* Enable back the 10bit clock to display controller */ tmp = vlv_dpio_read(dev_priv, pipe, CHV_CMN_DW14(port)); tmp |= DPIO_DCLKP_EN; vlv_dpio_write(dev_priv, pipe, CHV_CMN_DW14(port), tmp); - mutex_unlock(&dev_priv->sb_lock); + vlv_dpio_put(dev_priv); /* * Need to wait > 100ns between dclkp clock enable bit and PLL enable. @@ -1556,14 +1580,14 @@ static void chv_disable_pll(struct drm_i915_private *dev_priv, enum pipe pipe) I915_WRITE(DPLL(pipe), val); POSTING_READ(DPLL(pipe)); - mutex_lock(&dev_priv->sb_lock); + vlv_dpio_get(dev_priv); /* Disable 10bit clock to display controller */ val = vlv_dpio_read(dev_priv, pipe, CHV_CMN_DW14(port)); val &= ~DPIO_DCLKP_EN; vlv_dpio_write(dev_priv, pipe, CHV_CMN_DW14(port), val); - mutex_unlock(&dev_priv->sb_lock); + vlv_dpio_put(dev_priv); } void vlv_wait_port_ready(struct drm_i915_private *dev_priv, @@ -1891,7 +1915,7 @@ intel_tile_width_bytes(const struct drm_framebuffer *fb, int color_plane) switch (fb->modifier) { case DRM_FORMAT_MOD_LINEAR: - return cpp; + return intel_tile_size(dev_priv); case I915_FORMAT_MOD_X_TILED: if (IS_GEN(dev_priv, 2)) return 128; @@ -1934,11 +1958,8 @@ intel_tile_width_bytes(const struct drm_framebuffer *fb, int color_plane) static unsigned int intel_tile_height(const struct drm_framebuffer *fb, int color_plane) { - if (fb->modifier == DRM_FORMAT_MOD_LINEAR) - return 1; - else - return intel_tile_size(to_i915(fb->dev)) / - intel_tile_width_bytes(fb, color_plane); + return intel_tile_size(to_i915(fb->dev)) / + intel_tile_width_bytes(fb, color_plane); } /* Return the tile dimensions in pixel units */ @@ -1973,6 +1994,17 @@ unsigned int intel_rotation_info_size(const struct intel_rotation_info *rot_info return size; } +unsigned int intel_remapped_info_size(const struct intel_remapped_info *rem_info) +{ + unsigned int size = 0; + int i; + + for (i = 0 ; i < ARRAY_SIZE(rem_info->plane); i++) + size += rem_info->plane[i].width * rem_info->plane[i].height; + + return size; +} + static void intel_fill_fb_ggtt_view(struct i915_ggtt_view *view, const struct drm_framebuffer *fb, @@ -2042,7 +2074,9 @@ static bool intel_plane_uses_fence(const struct intel_plane_state *plane_state) struct intel_plane *plane = to_intel_plane(plane_state->base.plane); struct drm_i915_private *dev_priv = to_i915(plane->base.dev); - return INTEL_GEN(dev_priv) < 4 || plane->has_fbc; + return INTEL_GEN(dev_priv) < 4 || + (plane->has_fbc && + plane_state->view.type == I915_GGTT_VIEW_NORMAL); } struct i915_vma * @@ -2183,16 +2217,8 @@ void intel_add_fb_offsets(int *x, int *y, int color_plane) { - const struct intel_framebuffer *intel_fb = to_intel_framebuffer(state->base.fb); - unsigned int rotation = state->base.rotation; - - if (drm_rotation_90_or_270(rotation)) { - *x += intel_fb->rotated[color_plane].x; - *y += intel_fb->rotated[color_plane].y; - } else { - *x += intel_fb->normal[color_plane].x; - *y += intel_fb->normal[color_plane].y; - } + *x += state->color_plane[color_plane].x; + *y += state->color_plane[color_plane].y; } static u32 intel_adjust_tile_offset(int *x, int *y, @@ -2472,6 +2498,134 @@ bool is_ccs_modifier(u64 modifier) modifier == I915_FORMAT_MOD_Yf_TILED_CCS; } +u32 intel_plane_fb_max_stride(struct drm_i915_private *dev_priv, + u32 pixel_format, u64 modifier) +{ + struct intel_crtc *crtc; + struct intel_plane *plane; + + /* + * We assume the primary plane for pipe A has + * the highest stride limits of them all. + */ + crtc = intel_get_crtc_for_pipe(dev_priv, PIPE_A); + plane = to_intel_plane(crtc->base.primary); + + return plane->max_stride(plane, pixel_format, modifier, + DRM_MODE_ROTATE_0); +} + +static +u32 intel_fb_max_stride(struct drm_i915_private *dev_priv, + u32 pixel_format, u64 modifier) +{ + /* + * Arbitrary limit for gen4+ chosen to match the + * render engine max stride. + * + * The new CCS hash mode makes remapping impossible + */ + if (!is_ccs_modifier(modifier)) { + if (INTEL_GEN(dev_priv) >= 7) + return 256*1024; + else if (INTEL_GEN(dev_priv) >= 4) + return 128*1024; + } + + return intel_plane_fb_max_stride(dev_priv, pixel_format, modifier); +} + +static u32 +intel_fb_stride_alignment(const struct drm_framebuffer *fb, int color_plane) +{ + struct drm_i915_private *dev_priv = to_i915(fb->dev); + + if (fb->modifier == DRM_FORMAT_MOD_LINEAR) { + u32 max_stride = intel_plane_fb_max_stride(dev_priv, + fb->format->format, + fb->modifier); + + /* + * To make remapping with linear generally feasible + * we need the stride to be page aligned. + */ + if (fb->pitches[color_plane] > max_stride) + return intel_tile_size(dev_priv); + else + return 64; + } else { + return intel_tile_width_bytes(fb, color_plane); + } +} + +bool intel_plane_can_remap(const struct intel_plane_state *plane_state) +{ + struct intel_plane *plane = to_intel_plane(plane_state->base.plane); + struct drm_i915_private *dev_priv = to_i915(plane->base.dev); + const struct drm_framebuffer *fb = plane_state->base.fb; + int i; + + /* We don't want to deal with remapping with cursors */ + if (plane->id == PLANE_CURSOR) + return false; + + /* + * The display engine limits already match/exceed the + * render engine limits, so not much point in remapping. + * Would also need to deal with the fence POT alignment + * and gen2 2KiB GTT tile size. + */ + if (INTEL_GEN(dev_priv) < 4) + return false; + + /* + * The new CCS hash mode isn't compatible with remapping as + * the virtual address of the pages affects the compressed data. + */ + if (is_ccs_modifier(fb->modifier)) + return false; + + /* Linear needs a page aligned stride for remapping */ + if (fb->modifier == DRM_FORMAT_MOD_LINEAR) { + unsigned int alignment = intel_tile_size(dev_priv) - 1; + + for (i = 0; i < fb->format->num_planes; i++) { + if (fb->pitches[i] & alignment) + return false; + } + } + + return true; +} + +static bool intel_plane_needs_remap(const struct intel_plane_state *plane_state) +{ + struct intel_plane *plane = to_intel_plane(plane_state->base.plane); + const struct drm_framebuffer *fb = plane_state->base.fb; + unsigned int rotation = plane_state->base.rotation; + u32 stride, max_stride; + + /* + * No remapping for invisible planes since we don't have + * an actual source viewport to remap. + */ + if (!plane_state->base.visible) + return false; + + if (!intel_plane_can_remap(plane_state)) + return false; + + /* + * FIXME: aux plane limits on gen9+ are + * unclear in Bspec, for now no checking. + */ + stride = intel_fb_pitch(fb, 0, rotation); + max_stride = plane->max_stride(plane, fb->format->format, + fb->modifier, rotation); + + return stride > max_stride; +} + static int intel_fill_fb_info(struct drm_i915_private *dev_priv, struct drm_framebuffer *fb) @@ -2637,6 +2791,168 @@ intel_fill_fb_info(struct drm_i915_private *dev_priv, return 0; } +static void +intel_plane_remap_gtt(struct intel_plane_state *plane_state) +{ + struct drm_i915_private *dev_priv = + to_i915(plane_state->base.plane->dev); + struct drm_framebuffer *fb = plane_state->base.fb; + struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb); + struct intel_rotation_info *info = &plane_state->view.rotated; + unsigned int rotation = plane_state->base.rotation; + int i, num_planes = fb->format->num_planes; + unsigned int tile_size = intel_tile_size(dev_priv); + unsigned int src_x, src_y; + unsigned int src_w, src_h; + u32 gtt_offset = 0; + + memset(&plane_state->view, 0, sizeof(plane_state->view)); + plane_state->view.type = drm_rotation_90_or_270(rotation) ? + I915_GGTT_VIEW_ROTATED : I915_GGTT_VIEW_REMAPPED; + + src_x = plane_state->base.src.x1 >> 16; + src_y = plane_state->base.src.y1 >> 16; + src_w = drm_rect_width(&plane_state->base.src) >> 16; + src_h = drm_rect_height(&plane_state->base.src) >> 16; + + WARN_ON(is_ccs_modifier(fb->modifier)); + + /* Make src coordinates relative to the viewport */ + drm_rect_translate(&plane_state->base.src, + -(src_x << 16), -(src_y << 16)); + + /* Rotate src coordinates to match rotated GTT view */ + if (drm_rotation_90_or_270(rotation)) + drm_rect_rotate(&plane_state->base.src, + src_w << 16, src_h << 16, + DRM_MODE_ROTATE_270); + + for (i = 0; i < num_planes; i++) { + unsigned int hsub = i ? fb->format->hsub : 1; + unsigned int vsub = i ? fb->format->vsub : 1; + unsigned int cpp = fb->format->cpp[i]; + unsigned int tile_width, tile_height; + unsigned int width, height; + unsigned int pitch_tiles; + unsigned int x, y; + u32 offset; + + intel_tile_dims(fb, i, &tile_width, &tile_height); + + x = src_x / hsub; + y = src_y / vsub; + width = src_w / hsub; + height = src_h / vsub; + + /* + * First pixel of the src viewport from the + * start of the normal gtt mapping. + */ + x += intel_fb->normal[i].x; + y += intel_fb->normal[i].y; + + offset = intel_compute_aligned_offset(dev_priv, &x, &y, + fb, i, fb->pitches[i], + DRM_MODE_ROTATE_0, tile_size); + offset /= tile_size; + + info->plane[i].offset = offset; + info->plane[i].stride = DIV_ROUND_UP(fb->pitches[i], + tile_width * cpp); + info->plane[i].width = DIV_ROUND_UP(x + width, tile_width); + info->plane[i].height = DIV_ROUND_UP(y + height, tile_height); + + if (drm_rotation_90_or_270(rotation)) { + struct drm_rect r; + + /* rotate the x/y offsets to match the GTT view */ + r.x1 = x; + r.y1 = y; + r.x2 = x + width; + r.y2 = y + height; + drm_rect_rotate(&r, + info->plane[i].width * tile_width, + info->plane[i].height * tile_height, + DRM_MODE_ROTATE_270); + x = r.x1; + y = r.y1; + + pitch_tiles = info->plane[i].height; + plane_state->color_plane[i].stride = pitch_tiles * tile_height; + + /* rotate the tile dimensions to match the GTT view */ + swap(tile_width, tile_height); + } else { + pitch_tiles = info->plane[i].width; + plane_state->color_plane[i].stride = pitch_tiles * tile_width * cpp; + } + + /* + * We only keep the x/y offsets, so push all of the + * gtt offset into the x/y offsets. + */ + intel_adjust_tile_offset(&x, &y, + tile_width, tile_height, + tile_size, pitch_tiles, + gtt_offset * tile_size, 0); + + gtt_offset += info->plane[i].width * info->plane[i].height; + + plane_state->color_plane[i].offset = 0; + plane_state->color_plane[i].x = x; + plane_state->color_plane[i].y = y; + } +} + +static int +intel_plane_compute_gtt(struct intel_plane_state *plane_state) +{ + const struct intel_framebuffer *fb = + to_intel_framebuffer(plane_state->base.fb); + unsigned int rotation = plane_state->base.rotation; + int i, num_planes; + + if (!fb) + return 0; + + num_planes = fb->base.format->num_planes; + + if (intel_plane_needs_remap(plane_state)) { + intel_plane_remap_gtt(plane_state); + + /* + * Sometimes even remapping can't overcome + * the stride limitations :( Can happen with + * big plane sizes and suitably misaligned + * offsets. + */ + return intel_plane_check_stride(plane_state); + } + + intel_fill_fb_ggtt_view(&plane_state->view, &fb->base, rotation); + + for (i = 0; i < num_planes; i++) { + plane_state->color_plane[i].stride = intel_fb_pitch(&fb->base, i, rotation); + plane_state->color_plane[i].offset = 0; + + if (drm_rotation_90_or_270(rotation)) { + plane_state->color_plane[i].x = fb->rotated[i].x; + plane_state->color_plane[i].y = fb->rotated[i].y; + } else { + plane_state->color_plane[i].x = fb->normal[i].x; + plane_state->color_plane[i].y = fb->normal[i].y; + } + } + + /* Rotate src coordinates to match rotated GTT view */ + if (drm_rotation_90_or_270(rotation)) + drm_rect_rotate(&plane_state->base.src, + fb->base.width << 16, fb->base.height << 16, + DRM_MODE_ROTATE_270); + + return intel_plane_check_stride(plane_state); +} + static int i9xx_format_to_fourcc(int format) { switch (format) { @@ -2964,41 +3280,55 @@ static int skl_max_plane_width(const struct drm_framebuffer *fb, switch (fb->modifier) { case DRM_FORMAT_MOD_LINEAR: case I915_FORMAT_MOD_X_TILED: - switch (cpp) { - case 8: - return 4096; - case 4: - case 2: - case 1: - return 8192; - default: - MISSING_CASE(cpp); - break; - } - break; + return 4096; case I915_FORMAT_MOD_Y_TILED_CCS: case I915_FORMAT_MOD_Yf_TILED_CCS: /* FIXME AUX plane? */ case I915_FORMAT_MOD_Y_TILED: case I915_FORMAT_MOD_Yf_TILED: - switch (cpp) { - case 8: + if (cpp == 8) return 2048; - case 4: + else return 4096; - case 2: - case 1: - return 8192; - default: - MISSING_CASE(cpp); - break; - } - break; default: MISSING_CASE(fb->modifier); + return 2048; } +} - return 2048; +static int glk_max_plane_width(const struct drm_framebuffer *fb, + int color_plane, + unsigned int rotation) +{ + int cpp = fb->format->cpp[color_plane]; + + switch (fb->modifier) { + case DRM_FORMAT_MOD_LINEAR: + case I915_FORMAT_MOD_X_TILED: + if (cpp == 8) + return 4096; + else + return 5120; + case I915_FORMAT_MOD_Y_TILED_CCS: + case I915_FORMAT_MOD_Yf_TILED_CCS: + /* FIXME AUX plane? */ + case I915_FORMAT_MOD_Y_TILED: + case I915_FORMAT_MOD_Yf_TILED: + if (cpp == 8) + return 2048; + else + return 5120; + default: + MISSING_CASE(fb->modifier); + return 2048; + } +} + +static int icl_max_plane_width(const struct drm_framebuffer *fb, + int color_plane, + unsigned int rotation) +{ + return 5120; } static bool skl_check_main_ccs_coordinates(struct intel_plane_state *plane_state, @@ -3041,16 +3371,24 @@ static bool skl_check_main_ccs_coordinates(struct intel_plane_state *plane_state static int skl_check_main_surface(struct intel_plane_state *plane_state) { + struct drm_i915_private *dev_priv = to_i915(plane_state->base.plane->dev); const struct drm_framebuffer *fb = plane_state->base.fb; unsigned int rotation = plane_state->base.rotation; int x = plane_state->base.src.x1 >> 16; int y = plane_state->base.src.y1 >> 16; int w = drm_rect_width(&plane_state->base.src) >> 16; int h = drm_rect_height(&plane_state->base.src) >> 16; - int max_width = skl_max_plane_width(fb, 0, rotation); + int max_width; int max_height = 4096; u32 alignment, offset, aux_offset = plane_state->color_plane[1].offset; + if (INTEL_GEN(dev_priv) >= 11) + max_width = icl_max_plane_width(fb, 0, rotation); + else if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) + max_width = glk_max_plane_width(fb, 0, rotation); + else + max_width = skl_max_plane_width(fb, 0, rotation); + if (w > max_width || h > max_height) { DRM_DEBUG_KMS("requested Y/RGB source size %dx%d too big (limit %dx%d)\n", w, h, max_width, max_height); @@ -3113,6 +3451,14 @@ static int skl_check_main_surface(struct intel_plane_state *plane_state) plane_state->color_plane[0].x = x; plane_state->color_plane[0].y = y; + /* + * Put the final coordinates back so that the src + * coordinate checks will see the right values. + */ + drm_rect_translate(&plane_state->base.src, + (x << 16) - plane_state->base.src.x1, + (y << 16) - plane_state->base.src.y1); + return 0; } @@ -3169,26 +3515,15 @@ static int skl_check_ccs_aux_surface(struct intel_plane_state *plane_state) int skl_check_plane_surface(struct intel_plane_state *plane_state) { const struct drm_framebuffer *fb = plane_state->base.fb; - unsigned int rotation = plane_state->base.rotation; int ret; - intel_fill_fb_ggtt_view(&plane_state->view, fb, rotation); - plane_state->color_plane[0].stride = intel_fb_pitch(fb, 0, rotation); - plane_state->color_plane[1].stride = intel_fb_pitch(fb, 1, rotation); - - ret = intel_plane_check_stride(plane_state); + ret = intel_plane_compute_gtt(plane_state); if (ret) return ret; if (!plane_state->base.visible) return 0; - /* Rotate src coordinates to match rotated GTT view */ - if (drm_rotation_90_or_270(rotation)) - drm_rect_rotate(&plane_state->base.src, - fb->width << 16, fb->height << 16, - DRM_MODE_ROTATE_270); - /* * Handle the AUX surface first since * the main surface setup depends on it. @@ -3318,20 +3653,20 @@ int i9xx_check_plane_surface(struct intel_plane_state *plane_state) { struct drm_i915_private *dev_priv = to_i915(plane_state->base.plane->dev); - const struct drm_framebuffer *fb = plane_state->base.fb; - unsigned int rotation = plane_state->base.rotation; - int src_x = plane_state->base.src.x1 >> 16; - int src_y = plane_state->base.src.y1 >> 16; + int src_x, src_y; u32 offset; int ret; - intel_fill_fb_ggtt_view(&plane_state->view, fb, rotation); - plane_state->color_plane[0].stride = intel_fb_pitch(fb, 0, rotation); - - ret = intel_plane_check_stride(plane_state); + ret = intel_plane_compute_gtt(plane_state); if (ret) return ret; + if (!plane_state->base.visible) + return 0; + + src_x = plane_state->base.src.x1 >> 16; + src_y = plane_state->base.src.y1 >> 16; + intel_add_fb_offsets(&src_x, &src_y, plane_state, 0); if (INTEL_GEN(dev_priv) >= 4) @@ -3340,8 +3675,17 @@ int i9xx_check_plane_surface(struct intel_plane_state *plane_state) else offset = 0; + /* + * Put the final coordinates back so that the src + * coordinate checks will see the right values. + */ + drm_rect_translate(&plane_state->base.src, + (src_x << 16) - plane_state->base.src.x1, + (src_y << 16) - plane_state->base.src.y1); + /* HSW/BDW do this automagically in hardware */ if (!IS_HASWELL(dev_priv) && !IS_BROADWELL(dev_priv)) { + unsigned int rotation = plane_state->base.rotation; int src_w = drm_rect_width(&plane_state->base.src) >> 16; int src_h = drm_rect_height(&plane_state->base.src) >> 16; @@ -3378,6 +3722,10 @@ i9xx_plane_check(struct intel_crtc_state *crtc_state, if (ret) return ret; + ret = i9xx_check_plane_surface(plane_state); + if (ret) + return ret; + if (!plane_state->base.visible) return 0; @@ -3385,10 +3733,6 @@ i9xx_plane_check(struct intel_crtc_state *crtc_state, if (ret) return ret; - ret = i9xx_check_plane_surface(plane_state); - if (ret) - return ret; - plane_state->ctl = i9xx_plane_ctl(crtc_state, plane_state); return 0; @@ -3527,15 +3871,6 @@ static bool i9xx_plane_get_hw_state(struct intel_plane *plane, return ret; } -static u32 -intel_fb_stride_alignment(const struct drm_framebuffer *fb, int color_plane) -{ - if (fb->modifier == DRM_FORMAT_MOD_LINEAR) - return 64; - else - return intel_tile_width_bytes(fb, color_plane); -} - static void skl_detach_scaler(struct intel_crtc *intel_crtc, int id) { struct drm_device *dev = intel_crtc->base.dev; @@ -5015,6 +5350,21 @@ u16 skl_scaler_calc_phase(int sub, int scale, bool chroma_cosited) return ((phase >> 2) & PS_PHASE_MASK) | trip; } +#define SKL_MIN_SRC_W 8 +#define SKL_MAX_SRC_W 4096 +#define SKL_MIN_SRC_H 8 +#define SKL_MAX_SRC_H 4096 +#define SKL_MIN_DST_W 8 +#define SKL_MAX_DST_W 4096 +#define SKL_MIN_DST_H 8 +#define SKL_MAX_DST_H 4096 +#define ICL_MAX_SRC_W 5120 +#define ICL_MAX_SRC_H 4096 +#define ICL_MAX_DST_W 5120 +#define ICL_MAX_DST_H 4096 +#define SKL_MIN_YUV_420_SRC_W 16 +#define SKL_MIN_YUV_420_SRC_H 16 + static int skl_update_scaler(struct intel_crtc_state *crtc_state, bool force_detach, unsigned int scaler_user, int *scaler_id, @@ -5294,10 +5644,8 @@ void hsw_enable_ips(const struct intel_crtc_state *crtc_state) WARN_ON(!(crtc_state->active_planes & ~BIT(PLANE_CURSOR))); if (IS_BROADWELL(dev_priv)) { - mutex_lock(&dev_priv->pcu_lock); WARN_ON(sandybridge_pcode_write(dev_priv, DISPLAY_IPS_CONTROL, IPS_ENABLE | IPS_PCODE_CONTROL)); - mutex_unlock(&dev_priv->pcu_lock); /* Quoting Art Runyan: "its not safe to expect any particular * value in IPS_CTL bit 31 after enabling IPS through the * mailbox." Moreover, the mailbox may return a bogus state, @@ -5327,9 +5675,7 @@ void hsw_disable_ips(const struct intel_crtc_state *crtc_state) return; if (IS_BROADWELL(dev_priv)) { - mutex_lock(&dev_priv->pcu_lock); WARN_ON(sandybridge_pcode_write(dev_priv, DISPLAY_IPS_CONTROL, 0)); - mutex_unlock(&dev_priv->pcu_lock); /* * Wait for PCODE to finish disabling IPS. The BSpec specified * 42ms timeout value leads to occasional timeouts so use 100ms @@ -5505,6 +5851,16 @@ static bool needs_nv12_wa(struct drm_i915_private *dev_priv, return false; } +static bool needs_scalerclk_wa(struct drm_i915_private *dev_priv, + const struct intel_crtc_state *crtc_state) +{ + /* Wa_2006604312:icl */ + if (crtc_state->scaler_state.scaler_users > 0 && IS_ICELAKE(dev_priv)) + return true; + + return false; +} + static void intel_post_plane_update(struct intel_crtc_state *old_crtc_state) { struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc); @@ -5538,11 +5894,13 @@ static void intel_post_plane_update(struct intel_crtc_state *old_crtc_state) intel_post_enable_primary(&crtc->base, pipe_config); } - /* Display WA 827 */ if (needs_nv12_wa(dev_priv, old_crtc_state) && - !needs_nv12_wa(dev_priv, pipe_config)) { + !needs_nv12_wa(dev_priv, pipe_config)) skl_wa_827(dev_priv, crtc->pipe, false); - } + + if (needs_scalerclk_wa(dev_priv, old_crtc_state) && + !needs_scalerclk_wa(dev_priv, pipe_config)) + icl_wa_scalerclkgating(dev_priv, crtc->pipe, false); } static void intel_pre_plane_update(struct intel_crtc_state *old_crtc_state, @@ -5579,9 +5937,13 @@ static void intel_pre_plane_update(struct intel_crtc_state *old_crtc_state, /* Display WA 827 */ if (!needs_nv12_wa(dev_priv, old_crtc_state) && - needs_nv12_wa(dev_priv, pipe_config)) { + needs_nv12_wa(dev_priv, pipe_config)) skl_wa_827(dev_priv, crtc->pipe, true); - } + + /* Wa_2006604312:icl */ + if (!needs_scalerclk_wa(dev_priv, old_crtc_state) && + needs_scalerclk_wa(dev_priv, pipe_config)) + icl_wa_scalerclkgating(dev_priv, crtc->pipe, true); /* * Vblank time updates from the shadow to live plane control register @@ -5987,7 +6349,8 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, if (!transcoder_is_dsi(cpu_transcoder)) haswell_set_pipeconf(pipe_config); - haswell_set_pipemisc(pipe_config); + if (INTEL_GEN(dev_priv) >= 9 || IS_BROADWELL(dev_priv)) + bdw_set_pipemisc(pipe_config); intel_crtc->active = true; @@ -6289,7 +6652,7 @@ static u64 get_crtc_power_domains(struct drm_crtc *crtc, mask |= BIT_ULL(POWER_DOMAIN_AUDIO); if (crtc_state->shared_dpll) - mask |= BIT_ULL(POWER_DOMAIN_PLLS); + mask |= BIT_ULL(POWER_DOMAIN_DISPLAY_CORE); return mask; } @@ -6872,7 +7235,7 @@ static u32 ilk_pipe_pixel_rate(const struct intel_crtc_state *pipe_config) if (WARN_ON(!pfit_w || !pfit_h)) return pixel_rate; - pixel_rate = div_u64((u64)pixel_rate * pipe_w * pipe_h, + pixel_rate = div_u64(mul_u32_u32(pixel_rate, pipe_w * pipe_h), pfit_w * pfit_h); } @@ -6992,7 +7355,7 @@ static void compute_m_n(unsigned int m, unsigned int n, else *ret_n = min_t(unsigned int, roundup_pow_of_two(n), DATA_LINK_N_MAX); - *ret_m = div_u64((u64)m * *ret_n, n); + *ret_m = div_u64(mul_u32_u32(m, *ret_n), n); intel_reduce_m_n_ratio(ret_m, ret_n); } @@ -7225,7 +7588,7 @@ static void vlv_prepare_pll(struct intel_crtc *crtc, if ((pipe_config->dpll_hw_state.dpll & DPLL_VCO_ENABLE) == 0) return; - mutex_lock(&dev_priv->sb_lock); + vlv_dpio_get(dev_priv); bestn = pipe_config->dpll.n; bestm1 = pipe_config->dpll.m1; @@ -7302,7 +7665,8 @@ static void vlv_prepare_pll(struct intel_crtc *crtc, vlv_dpio_write(dev_priv, pipe, VLV_PLL_DW7(pipe), coreclk); vlv_dpio_write(dev_priv, pipe, VLV_PLL_DW11(pipe), 0x87871000); - mutex_unlock(&dev_priv->sb_lock); + + vlv_dpio_put(dev_priv); } static void chv_prepare_pll(struct intel_crtc *crtc, @@ -7335,7 +7699,7 @@ static void chv_prepare_pll(struct intel_crtc *crtc, dpio_val = 0; loopfilter = 0; - mutex_lock(&dev_priv->sb_lock); + vlv_dpio_get(dev_priv); /* p1 and p2 divider */ vlv_dpio_write(dev_priv, pipe, CHV_CMN_DW13(port), @@ -7407,7 +7771,7 @@ static void chv_prepare_pll(struct intel_crtc *crtc, vlv_dpio_read(dev_priv, pipe, CHV_CMN_DW14(port)) | DPIO_AFC_RECAL); - mutex_unlock(&dev_priv->sb_lock); + vlv_dpio_put(dev_priv); } /** @@ -7675,9 +8039,14 @@ static void intel_get_pipe_timings(struct intel_crtc *crtc, tmp = I915_READ(HTOTAL(cpu_transcoder)); pipe_config->base.adjusted_mode.crtc_hdisplay = (tmp & 0xffff) + 1; pipe_config->base.adjusted_mode.crtc_htotal = ((tmp >> 16) & 0xffff) + 1; - tmp = I915_READ(HBLANK(cpu_transcoder)); - pipe_config->base.adjusted_mode.crtc_hblank_start = (tmp & 0xffff) + 1; - pipe_config->base.adjusted_mode.crtc_hblank_end = ((tmp >> 16) & 0xffff) + 1; + + if (!transcoder_is_dsi(cpu_transcoder)) { + tmp = I915_READ(HBLANK(cpu_transcoder)); + pipe_config->base.adjusted_mode.crtc_hblank_start = + (tmp & 0xffff) + 1; + pipe_config->base.adjusted_mode.crtc_hblank_end = + ((tmp >> 16) & 0xffff) + 1; + } tmp = I915_READ(HSYNC(cpu_transcoder)); pipe_config->base.adjusted_mode.crtc_hsync_start = (tmp & 0xffff) + 1; pipe_config->base.adjusted_mode.crtc_hsync_end = ((tmp >> 16) & 0xffff) + 1; @@ -7685,9 +8054,14 @@ static void intel_get_pipe_timings(struct intel_crtc *crtc, tmp = I915_READ(VTOTAL(cpu_transcoder)); pipe_config->base.adjusted_mode.crtc_vdisplay = (tmp & 0xffff) + 1; pipe_config->base.adjusted_mode.crtc_vtotal = ((tmp >> 16) & 0xffff) + 1; - tmp = I915_READ(VBLANK(cpu_transcoder)); - pipe_config->base.adjusted_mode.crtc_vblank_start = (tmp & 0xffff) + 1; - pipe_config->base.adjusted_mode.crtc_vblank_end = ((tmp >> 16) & 0xffff) + 1; + + if (!transcoder_is_dsi(cpu_transcoder)) { + tmp = I915_READ(VBLANK(cpu_transcoder)); + pipe_config->base.adjusted_mode.crtc_vblank_start = + (tmp & 0xffff) + 1; + pipe_config->base.adjusted_mode.crtc_vblank_end = + ((tmp >> 16) & 0xffff) + 1; + } tmp = I915_READ(VSYNC(cpu_transcoder)); pipe_config->base.adjusted_mode.crtc_vsync_start = (tmp & 0xffff) + 1; pipe_config->base.adjusted_mode.crtc_vsync_end = ((tmp >> 16) & 0xffff) + 1; @@ -8033,9 +8407,9 @@ static void vlv_crtc_clock_get(struct intel_crtc *crtc, if ((pipe_config->dpll_hw_state.dpll & DPLL_VCO_ENABLE) == 0) return; - mutex_lock(&dev_priv->sb_lock); + vlv_dpio_get(dev_priv); mdiv = vlv_dpio_read(dev_priv, pipe, VLV_PLL_DW3(pipe)); - mutex_unlock(&dev_priv->sb_lock); + vlv_dpio_put(dev_priv); clock.m1 = (mdiv >> DPIO_M1DIV_SHIFT) & 7; clock.m2 = mdiv & DPIO_M2DIV_MASK; @@ -8144,13 +8518,13 @@ static void chv_crtc_clock_get(struct intel_crtc *crtc, if ((pipe_config->dpll_hw_state.dpll & DPLL_VCO_ENABLE) == 0) return; - mutex_lock(&dev_priv->sb_lock); + vlv_dpio_get(dev_priv); cmn_dw13 = vlv_dpio_read(dev_priv, pipe, CHV_CMN_DW13(port)); pll_dw0 = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW0(port)); pll_dw1 = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW1(port)); pll_dw2 = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW2(port)); pll_dw3 = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW3(port)); - mutex_unlock(&dev_priv->sb_lock); + vlv_dpio_put(dev_priv); clock.m1 = (pll_dw1 & 0x7) == DPIO_CHV_M1_DIV_BY_2 ? 2 : 0; clock.m2 = (pll_dw0 & 0xff) << 22; @@ -8650,7 +9024,7 @@ static void lpt_enable_clkout_dp(struct drm_i915_private *dev_priv, } /* Sequence to disable CLKOUT_DP */ -static void lpt_disable_clkout_dp(struct drm_i915_private *dev_priv) +void lpt_disable_clkout_dp(struct drm_i915_private *dev_priv) { u32 reg, tmp; @@ -8833,44 +9207,68 @@ static void haswell_set_pipeconf(const struct intel_crtc_state *crtc_state) POSTING_READ(PIPECONF(cpu_transcoder)); } -static void haswell_set_pipemisc(const struct intel_crtc_state *crtc_state) +static void bdw_set_pipemisc(const struct intel_crtc_state *crtc_state) { - struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc); - struct drm_i915_private *dev_priv = to_i915(intel_crtc->base.dev); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + u32 val = 0; - if (IS_BROADWELL(dev_priv) || INTEL_GEN(dev_priv) >= 9) { - u32 val = 0; + switch (crtc_state->pipe_bpp) { + case 18: + val |= PIPEMISC_DITHER_6_BPC; + break; + case 24: + val |= PIPEMISC_DITHER_8_BPC; + break; + case 30: + val |= PIPEMISC_DITHER_10_BPC; + break; + case 36: + val |= PIPEMISC_DITHER_12_BPC; + break; + default: + MISSING_CASE(crtc_state->pipe_bpp); + break; + } - switch (crtc_state->pipe_bpp) { - case 18: - val |= PIPEMISC_DITHER_6_BPC; - break; - case 24: - val |= PIPEMISC_DITHER_8_BPC; - break; - case 30: - val |= PIPEMISC_DITHER_10_BPC; - break; - case 36: - val |= PIPEMISC_DITHER_12_BPC; - break; - default: - /* Case prevented by pipe_config_set_bpp. */ - BUG(); - } + if (crtc_state->dither) + val |= PIPEMISC_DITHER_ENABLE | PIPEMISC_DITHER_TYPE_SP; + + if (crtc_state->output_format == INTEL_OUTPUT_FORMAT_YCBCR420 || + crtc_state->output_format == INTEL_OUTPUT_FORMAT_YCBCR444) + val |= PIPEMISC_OUTPUT_COLORSPACE_YUV; - if (crtc_state->dither) - val |= PIPEMISC_DITHER_ENABLE | PIPEMISC_DITHER_TYPE_SP; + if (crtc_state->output_format == INTEL_OUTPUT_FORMAT_YCBCR420) + val |= PIPEMISC_YUV420_ENABLE | + PIPEMISC_YUV420_MODE_FULL_BLEND; + + if (INTEL_GEN(dev_priv) >= 11 && + (crtc_state->active_planes & ~(icl_hdr_plane_mask() | + BIT(PLANE_CURSOR))) == 0) + val |= PIPEMISC_HDR_MODE_PRECISION; - if (crtc_state->output_format == INTEL_OUTPUT_FORMAT_YCBCR420 || - crtc_state->output_format == INTEL_OUTPUT_FORMAT_YCBCR444) - val |= PIPEMISC_OUTPUT_COLORSPACE_YUV; + I915_WRITE(PIPEMISC(crtc->pipe), val); +} - if (crtc_state->output_format == INTEL_OUTPUT_FORMAT_YCBCR420) - val |= PIPEMISC_YUV420_ENABLE | - PIPEMISC_YUV420_MODE_FULL_BLEND; +int bdw_get_pipemisc_bpp(struct intel_crtc *crtc) +{ + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + u32 tmp; - I915_WRITE(PIPEMISC(intel_crtc->pipe), val); + tmp = I915_READ(PIPEMISC(crtc->pipe)); + + switch (tmp & PIPEMISC_DITHER_BPC_MASK) { + case PIPEMISC_DITHER_6_BPC: + return 18; + case PIPEMISC_DITHER_8_BPC: + return 24; + case PIPEMISC_DITHER_10_BPC: + return 30; + case PIPEMISC_DITHER_12_BPC: + return 36; + default: + MISSING_CASE(tmp); + return 0; } } @@ -9405,228 +9803,6 @@ static bool ironlake_get_pipe_config(struct intel_crtc *crtc, return ret; } - -static void assert_can_disable_lcpll(struct drm_i915_private *dev_priv) -{ - struct drm_device *dev = &dev_priv->drm; - struct intel_crtc *crtc; - - for_each_intel_crtc(dev, crtc) - I915_STATE_WARN(crtc->active, "CRTC for pipe %c enabled\n", - pipe_name(crtc->pipe)); - - I915_STATE_WARN(I915_READ(HSW_PWR_WELL_CTL2), - "Display power well on\n"); - I915_STATE_WARN(I915_READ(SPLL_CTL) & SPLL_PLL_ENABLE, "SPLL enabled\n"); - I915_STATE_WARN(I915_READ(WRPLL_CTL(0)) & WRPLL_PLL_ENABLE, "WRPLL1 enabled\n"); - I915_STATE_WARN(I915_READ(WRPLL_CTL(1)) & WRPLL_PLL_ENABLE, "WRPLL2 enabled\n"); - I915_STATE_WARN(I915_READ(PP_STATUS(0)) & PP_ON, "Panel power on\n"); - I915_STATE_WARN(I915_READ(BLC_PWM_CPU_CTL2) & BLM_PWM_ENABLE, - "CPU PWM1 enabled\n"); - if (IS_HASWELL(dev_priv)) - I915_STATE_WARN(I915_READ(HSW_BLC_PWM2_CTL) & BLM_PWM_ENABLE, - "CPU PWM2 enabled\n"); - I915_STATE_WARN(I915_READ(BLC_PWM_PCH_CTL1) & BLM_PCH_PWM_ENABLE, - "PCH PWM1 enabled\n"); - I915_STATE_WARN(I915_READ(UTIL_PIN_CTL) & UTIL_PIN_ENABLE, - "Utility pin enabled\n"); - I915_STATE_WARN(I915_READ(PCH_GTC_CTL) & PCH_GTC_ENABLE, "PCH GTC enabled\n"); - - /* - * In theory we can still leave IRQs enabled, as long as only the HPD - * interrupts remain enabled. We used to check for that, but since it's - * gen-specific and since we only disable LCPLL after we fully disable - * the interrupts, the check below should be enough. - */ - I915_STATE_WARN(intel_irqs_enabled(dev_priv), "IRQs enabled\n"); -} - -static u32 hsw_read_dcomp(struct drm_i915_private *dev_priv) -{ - if (IS_HASWELL(dev_priv)) - return I915_READ(D_COMP_HSW); - else - return I915_READ(D_COMP_BDW); -} - -static void hsw_write_dcomp(struct drm_i915_private *dev_priv, u32 val) -{ - if (IS_HASWELL(dev_priv)) { - mutex_lock(&dev_priv->pcu_lock); - if (sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_D_COMP, - val)) - DRM_DEBUG_KMS("Failed to write to D_COMP\n"); - mutex_unlock(&dev_priv->pcu_lock); - } else { - I915_WRITE(D_COMP_BDW, val); - POSTING_READ(D_COMP_BDW); - } -} - -/* - * This function implements pieces of two sequences from BSpec: - * - Sequence for display software to disable LCPLL - * - Sequence for display software to allow package C8+ - * The steps implemented here are just the steps that actually touch the LCPLL - * register. Callers should take care of disabling all the display engine - * functions, doing the mode unset, fixing interrupts, etc. - */ -static void hsw_disable_lcpll(struct drm_i915_private *dev_priv, - bool switch_to_fclk, bool allow_power_down) -{ - u32 val; - - assert_can_disable_lcpll(dev_priv); - - val = I915_READ(LCPLL_CTL); - - if (switch_to_fclk) { - val |= LCPLL_CD_SOURCE_FCLK; - I915_WRITE(LCPLL_CTL, val); - - if (wait_for_us(I915_READ(LCPLL_CTL) & - LCPLL_CD_SOURCE_FCLK_DONE, 1)) - DRM_ERROR("Switching to FCLK failed\n"); - - val = I915_READ(LCPLL_CTL); - } - - val |= LCPLL_PLL_DISABLE; - I915_WRITE(LCPLL_CTL, val); - POSTING_READ(LCPLL_CTL); - - if (intel_wait_for_register(&dev_priv->uncore, - LCPLL_CTL, LCPLL_PLL_LOCK, 0, 1)) - DRM_ERROR("LCPLL still locked\n"); - - val = hsw_read_dcomp(dev_priv); - val |= D_COMP_COMP_DISABLE; - hsw_write_dcomp(dev_priv, val); - ndelay(100); - - if (wait_for((hsw_read_dcomp(dev_priv) & D_COMP_RCOMP_IN_PROGRESS) == 0, - 1)) - DRM_ERROR("D_COMP RCOMP still in progress\n"); - - if (allow_power_down) { - val = I915_READ(LCPLL_CTL); - val |= LCPLL_POWER_DOWN_ALLOW; - I915_WRITE(LCPLL_CTL, val); - POSTING_READ(LCPLL_CTL); - } -} - -/* - * Fully restores LCPLL, disallowing power down and switching back to LCPLL - * source. - */ -static void hsw_restore_lcpll(struct drm_i915_private *dev_priv) -{ - u32 val; - - val = I915_READ(LCPLL_CTL); - - if ((val & (LCPLL_PLL_LOCK | LCPLL_PLL_DISABLE | LCPLL_CD_SOURCE_FCLK | - LCPLL_POWER_DOWN_ALLOW)) == LCPLL_PLL_LOCK) - return; - - /* - * Make sure we're not on PC8 state before disabling PC8, otherwise - * we'll hang the machine. To prevent PC8 state, just enable force_wake. - */ - intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL); - - if (val & LCPLL_POWER_DOWN_ALLOW) { - val &= ~LCPLL_POWER_DOWN_ALLOW; - I915_WRITE(LCPLL_CTL, val); - POSTING_READ(LCPLL_CTL); - } - - val = hsw_read_dcomp(dev_priv); - val |= D_COMP_COMP_FORCE; - val &= ~D_COMP_COMP_DISABLE; - hsw_write_dcomp(dev_priv, val); - - val = I915_READ(LCPLL_CTL); - val &= ~LCPLL_PLL_DISABLE; - I915_WRITE(LCPLL_CTL, val); - - if (intel_wait_for_register(&dev_priv->uncore, - LCPLL_CTL, LCPLL_PLL_LOCK, LCPLL_PLL_LOCK, - 5)) - DRM_ERROR("LCPLL not locked yet\n"); - - if (val & LCPLL_CD_SOURCE_FCLK) { - val = I915_READ(LCPLL_CTL); - val &= ~LCPLL_CD_SOURCE_FCLK; - I915_WRITE(LCPLL_CTL, val); - - if (wait_for_us((I915_READ(LCPLL_CTL) & - LCPLL_CD_SOURCE_FCLK_DONE) == 0, 1)) - DRM_ERROR("Switching back to LCPLL failed\n"); - } - - intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); - - intel_update_cdclk(dev_priv); - intel_dump_cdclk_state(&dev_priv->cdclk.hw, "Current CDCLK"); -} - -/* - * Package states C8 and deeper are really deep PC states that can only be - * reached when all the devices on the system allow it, so even if the graphics - * device allows PC8+, it doesn't mean the system will actually get to these - * states. Our driver only allows PC8+ when going into runtime PM. - * - * The requirements for PC8+ are that all the outputs are disabled, the power - * well is disabled and most interrupts are disabled, and these are also - * requirements for runtime PM. When these conditions are met, we manually do - * the other conditions: disable the interrupts, clocks and switch LCPLL refclk - * to Fclk. If we're in PC8+ and we get an non-hotplug interrupt, we can hard - * hang the machine. - * - * When we really reach PC8 or deeper states (not just when we allow it) we lose - * the state of some registers, so when we come back from PC8+ we need to - * restore this state. We don't get into PC8+ if we're not in RC6, so we don't - * need to take care of the registers kept by RC6. Notice that this happens even - * if we don't put the device in PCI D3 state (which is what currently happens - * because of the runtime PM support). - * - * For more, read "Display Sequences for Package C8" on the hardware - * documentation. - */ -void hsw_enable_pc8(struct drm_i915_private *dev_priv) -{ - u32 val; - - DRM_DEBUG_KMS("Enabling package C8+\n"); - - if (HAS_PCH_LPT_LP(dev_priv)) { - val = I915_READ(SOUTH_DSPCLK_GATE_D); - val &= ~PCH_LP_PARTITION_LEVEL_DISABLE; - I915_WRITE(SOUTH_DSPCLK_GATE_D, val); - } - - lpt_disable_clkout_dp(dev_priv); - hsw_disable_lcpll(dev_priv, true, true); -} - -void hsw_disable_pc8(struct drm_i915_private *dev_priv) -{ - u32 val; - - DRM_DEBUG_KMS("Disabling package C8+\n"); - - hsw_restore_lcpll(dev_priv); - lpt_init_pch_refclk(dev_priv); - - if (HAS_PCH_LPT_LP(dev_priv)) { - val = I915_READ(SOUTH_DSPCLK_GATE_D); - val |= PCH_LP_PARTITION_LEVEL_DISABLE; - I915_WRITE(SOUTH_DSPCLK_GATE_D, val); - } -} - static int haswell_crtc_compute_clock(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state) { @@ -9797,6 +9973,7 @@ static bool hsw_get_transcoder_state(struct intel_crtc *crtc, for_each_set_bit(panel_transcoder, &panel_transcoder_mask, ARRAY_SIZE(INTEL_INFO(dev_priv)->trans_offsets)) { + bool force_thru = false; enum pipe trans_pipe; tmp = I915_READ(TRANS_DDI_FUNC_CTL(panel_transcoder)); @@ -9818,6 +9995,8 @@ static bool hsw_get_transcoder_state(struct intel_crtc *crtc, transcoder_name(panel_transcoder)); /* fall through */ case TRANS_DDI_EDP_INPUT_A_ONOFF: + force_thru = true; + /* fall through */ case TRANS_DDI_EDP_INPUT_A_ON: trans_pipe = PIPE_A; break; @@ -9829,8 +10008,10 @@ static bool hsw_get_transcoder_state(struct intel_crtc *crtc, break; } - if (trans_pipe == crtc->pipe) + if (trans_pipe == crtc->pipe) { pipe_config->cpu_transcoder = panel_transcoder; + pipe_config->pch_pfit.force_thru = force_thru; + } } /* @@ -10115,19 +10296,17 @@ static bool intel_cursor_size_ok(const struct intel_plane_state *plane_state) static int intel_cursor_check_surface(struct intel_plane_state *plane_state) { - const struct drm_framebuffer *fb = plane_state->base.fb; - unsigned int rotation = plane_state->base.rotation; int src_x, src_y; u32 offset; int ret; - intel_fill_fb_ggtt_view(&plane_state->view, fb, rotation); - plane_state->color_plane[0].stride = intel_fb_pitch(fb, 0, rotation); - - ret = intel_plane_check_stride(plane_state); + ret = intel_plane_compute_gtt(plane_state); if (ret) return ret; + if (!plane_state->base.visible) + return 0; + src_x = plane_state->base.src_x >> 16; src_y = plane_state->base.src_y >> 16; @@ -10164,6 +10343,10 @@ static int intel_check_cursor(struct intel_crtc_state *crtc_state, if (ret) return ret; + ret = intel_cursor_check_surface(plane_state); + if (ret) + return ret; + if (!plane_state->base.visible) return 0; @@ -10171,10 +10354,6 @@ static int intel_check_cursor(struct intel_crtc_state *crtc_state, if (ret) return ret; - ret = intel_cursor_check_surface(plane_state); - if (ret) - return ret; - return 0; } @@ -11665,10 +11844,11 @@ static void intel_dump_pipe_config(struct intel_crtc *crtc, pipe_config->gmch_pfit.pgm_ratios, pipe_config->gmch_pfit.lvds_border_bits); else - DRM_DEBUG_KMS("pch pfit: pos: 0x%08x, size: 0x%08x, %s\n", + DRM_DEBUG_KMS("pch pfit: pos: 0x%08x, size: 0x%08x, %s, force thru: %s\n", pipe_config->pch_pfit.pos, pipe_config->pch_pfit.size, - enableddisabled(pipe_config->pch_pfit.enabled)); + enableddisabled(pipe_config->pch_pfit.enabled), + yesno(pipe_config->pch_pfit.force_thru)); DRM_DEBUG_KMS("ips: %i, double wide: %i\n", pipe_config->ips_enabled, pipe_config->double_wide); @@ -11790,7 +11970,6 @@ clear_intel_crtc_state(struct intel_crtc_state *crtc_state) saved_state->scaler_state = crtc_state->scaler_state; saved_state->shared_dpll = crtc_state->shared_dpll; saved_state->dpll_hw_state = crtc_state->dpll_hw_state; - saved_state->pch_pfit.force_thru = crtc_state->pch_pfit.force_thru; saved_state->crc_enabled = crtc_state->crc_enabled; if (IS_G4X(dev_priv) || IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) @@ -12082,7 +12261,6 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv, struct intel_crtc_state *pipe_config, bool adjust) { - struct intel_crtc *crtc = to_intel_crtc(current_config->base.crtc); bool ret = true; bool fixup_inherited = adjust && (current_config->base.mode.private_flags & I915_MODE_FLAG_INHERITED) && @@ -12308,9 +12486,7 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv, * Changing the EDP transcoder input mux * (A_ONOFF vs. A_ON) requires a full modeset. */ - if (IS_HASWELL(dev_priv) && crtc->pipe == PIPE_A && - current_config->cpu_transcoder == TRANSCODER_EDP) - PIPE_CONF_CHECK_BOOL(pch_pfit.enabled); + PIPE_CONF_CHECK_BOOL(pch_pfit.force_thru); if (!adjust) { PIPE_CONF_CHECK_I(pipe_src_w); @@ -14105,6 +14281,9 @@ static void intel_begin_crtc_commit(struct intel_atomic_state *state, else if (INTEL_GEN(dev_priv) >= 9) skl_detach_scalers(new_crtc_state); + if (INTEL_GEN(dev_priv) >= 9 || IS_BROADWELL(dev_priv)) + bdw_set_pipemisc(new_crtc_state); + out: if (dev_priv->display.atomic_update_watermarks) dev_priv->display.atomic_update_watermarks(state, @@ -15059,31 +15238,13 @@ static const struct drm_framebuffer_funcs intel_fb_funcs = { .dirty = intel_user_framebuffer_dirty, }; -static -u32 intel_fb_pitch_limit(struct drm_i915_private *dev_priv, - u32 pixel_format, u64 fb_modifier) -{ - struct intel_crtc *crtc; - struct intel_plane *plane; - - /* - * We assume the primary plane for pipe A has - * the highest stride limits of them all. - */ - crtc = intel_get_crtc_for_pipe(dev_priv, PIPE_A); - plane = to_intel_plane(crtc->base.primary); - - return plane->max_stride(plane, pixel_format, fb_modifier, - DRM_MODE_ROTATE_0); -} - static int intel_framebuffer_init(struct intel_framebuffer *intel_fb, struct drm_i915_gem_object *obj, struct drm_mode_fb_cmd2 *mode_cmd) { struct drm_i915_private *dev_priv = to_i915(obj->base.dev); struct drm_framebuffer *fb = &intel_fb->base; - u32 pitch_limit; + u32 max_stride; unsigned int tiling, stride; int ret = -EINVAL; int i; @@ -15135,13 +15296,13 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb, goto err; } - pitch_limit = intel_fb_pitch_limit(dev_priv, mode_cmd->pixel_format, - mode_cmd->modifier[0]); - if (mode_cmd->pitches[0] > pitch_limit) { + max_stride = intel_fb_max_stride(dev_priv, mode_cmd->pixel_format, + mode_cmd->modifier[0]); + if (mode_cmd->pitches[0] > max_stride) { DRM_DEBUG_KMS("%s pitch (%u) must be at most %d\n", mode_cmd->modifier[0] != DRM_FORMAT_MOD_LINEAR ? "tiled" : "linear", - mode_cmd->pitches[0], pitch_limit); + mode_cmd->pitches[0], max_stride); goto err; } @@ -15418,6 +15579,16 @@ void intel_init_display_hooks(struct drm_i915_private *dev_priv) dev_priv->display.update_crtcs = intel_update_crtcs; } +static i915_reg_t i915_vgacntrl_reg(struct drm_i915_private *dev_priv) +{ + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + return VLV_VGACNTRL; + else if (INTEL_GEN(dev_priv) >= 5) + return CPU_VGACNTRL; + else + return VGACNTRL; +} + /* Disable the VGA plane that we never use */ static void i915_disable_vga(struct drm_i915_private *dev_priv) { @@ -15655,16 +15826,22 @@ int intel_modeset_init(struct drm_device *dev) } } - /* maximum framebuffer dimensions */ - if (IS_GEN(dev_priv, 2)) { - dev->mode_config.max_width = 2048; - dev->mode_config.max_height = 2048; + /* + * Maximum framebuffer dimensions, chosen to match + * the maximum render engine surface size on gen4+. + */ + if (INTEL_GEN(dev_priv) >= 7) { + dev->mode_config.max_width = 16384; + dev->mode_config.max_height = 16384; + } else if (INTEL_GEN(dev_priv) >= 4) { + dev->mode_config.max_width = 8192; + dev->mode_config.max_height = 8192; } else if (IS_GEN(dev_priv, 3)) { dev->mode_config.max_width = 4096; dev->mode_config.max_height = 4096; } else { - dev->mode_config.max_width = 8192; - dev->mode_config.max_height = 8192; + dev->mode_config.max_width = 2048; + dev->mode_config.max_height = 2048; } if (IS_I845G(dev_priv) || IS_I865G(dev_priv)) { @@ -16563,7 +16740,7 @@ void intel_modeset_cleanup(struct drm_device *dev) intel_overlay_cleanup(dev_priv); - intel_teardown_gmbus(dev_priv); + intel_gmbus_teardown(dev_priv); destroy_workqueue(dev_priv->modeset_wq); diff --git a/drivers/gpu/drm/i915/intel_display.h b/drivers/gpu/drm/i915/intel_display.h index 2220588e86ac75e4f7af1705c05a8eb7d67c8ce0..a43d54089be359caaa19ab0be9e0b85817d0b698 100644 --- a/drivers/gpu/drm/i915/intel_display.h +++ b/drivers/gpu/drm/i915/intel_display.h @@ -28,6 +28,9 @@ #include #include +struct drm_i915_private; +struct intel_plane_state; + enum i915_gpio { GPIOA, GPIOB, @@ -218,6 +221,7 @@ enum aux_ch { #define aux_ch_name(a) ((a) + 'A') enum intel_display_power_domain { + POWER_DOMAIN_DISPLAY_CORE, POWER_DOMAIN_PIPE_A, POWER_DOMAIN_PIPE_B, POWER_DOMAIN_PIPE_C, @@ -248,7 +252,6 @@ enum intel_display_power_domain { POWER_DOMAIN_PORT_OTHER, POWER_DOMAIN_VGA, POWER_DOMAIN_AUDIO, - POWER_DOMAIN_PLLS, POWER_DOMAIN_AUX_A, POWER_DOMAIN_AUX_B, POWER_DOMAIN_AUX_C, @@ -432,4 +435,9 @@ void intel_link_compute_m_n(u16 bpp, int nlanes, struct intel_link_m_n *m_n, bool constant_n); bool is_ccs_modifier(u64 modifier); +void lpt_disable_clkout_dp(struct drm_i915_private *dev_priv); +u32 intel_plane_fb_max_stride(struct drm_i915_private *dev_priv, + u32 pixel_format, u64 modifier); +bool intel_plane_can_remap(const struct intel_plane_state *plane_state); + #endif diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 560274d1c50b270367f22fb6344f1f5a49f4009d..24b56b2a76c8592fb94e57cc24fcdb9554f3d108 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -31,6 +31,7 @@ #include #include #include + #include #include @@ -41,18 +42,27 @@ #include #include +#include "i915_debugfs.h" #include "i915_drv.h" +#include "intel_atomic.h" #include "intel_audio.h" #include "intel_connector.h" #include "intel_ddi.h" #include "intel_dp.h" +#include "intel_dp_link_training.h" +#include "intel_dp_mst.h" +#include "intel_dpio_phy.h" #include "intel_drv.h" +#include "intel_fifo_underrun.h" #include "intel_hdcp.h" #include "intel_hdmi.h" +#include "intel_hotplug.h" #include "intel_lspcon.h" #include "intel_lvds.h" #include "intel_panel.h" #include "intel_psr.h" +#include "intel_sideband.h" +#include "intel_vdsc.h" #define DP_DPRX_ESI_LEN 14 @@ -206,14 +216,17 @@ static int intel_dp_get_fia_supported_lane_count(struct intel_dp *intel_dp) struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev); enum tc_port tc_port = intel_port_to_tc(dev_priv, dig_port->base.port); + intel_wakeref_t wakeref; u32 lane_info; if (tc_port == PORT_TC_NONE || dig_port->tc_type != TC_PORT_TYPEC) return 4; - lane_info = (I915_READ(PORT_TX_DFLEXDPSP) & - DP_LANE_ASSIGNMENT_MASK(tc_port)) >> - DP_LANE_ASSIGNMENT_SHIFT(tc_port); + lane_info = 0; + with_intel_display_power(dev_priv, POWER_DOMAIN_DISPLAY_CORE, wakeref) + lane_info = (I915_READ(PORT_TX_DFLEXDPSP) & + DP_LANE_ASSIGNMENT_MASK(tc_port)) >> + DP_LANE_ASSIGNMENT_SHIFT(tc_port); switch (lane_info) { default: @@ -1211,7 +1224,10 @@ intel_dp_aux_xfer(struct intel_dp *intel_dp, to_i915(intel_dig_port->base.base.dev); i915_reg_t ch_ctl, ch_data[5]; u32 aux_clock_divider; - intel_wakeref_t wakeref; + enum intel_display_power_domain aux_domain = + intel_aux_power_domain(intel_dig_port); + intel_wakeref_t aux_wakeref; + intel_wakeref_t pps_wakeref; int i, ret, recv_bytes; int try, clock = 0; u32 status; @@ -1221,7 +1237,8 @@ intel_dp_aux_xfer(struct intel_dp *intel_dp, for (i = 0; i < ARRAY_SIZE(ch_data); i++) ch_data[i] = intel_dp->aux_ch_data_reg(intel_dp, i); - wakeref = pps_lock(intel_dp); + aux_wakeref = intel_display_power_get(dev_priv, aux_domain); + pps_wakeref = pps_lock(intel_dp); /* * We will be called with VDD already enabled for dpcd/edid/oui reads. @@ -1367,7 +1384,8 @@ intel_dp_aux_xfer(struct intel_dp *intel_dp, if (vdd) edp_panel_vdd_off(intel_dp, false); - pps_unlock(intel_dp, wakeref); + pps_unlock(intel_dp, pps_wakeref); + intel_display_power_put_async(dev_priv, aux_domain, aux_wakeref); return ret; } @@ -1832,6 +1850,19 @@ intel_dp_adjust_compliance_config(struct intel_dp *intel_dp, } } +static int intel_dp_output_bpp(const struct intel_crtc_state *crtc_state, int bpp) +{ + /* + * bpp value was assumed to RGB format. And YCbCr 4:2:0 output + * format of the number of bytes per pixel will be half the number + * of bytes of RGB pixel. + */ + if (crtc_state->output_format == INTEL_OUTPUT_FORMAT_YCBCR420) + bpp /= 2; + + return bpp; +} + /* Optimize link config in order: max bpp, min clock, min lanes */ static int intel_dp_compute_link_config_wide(struct intel_dp *intel_dp, @@ -2075,6 +2106,36 @@ intel_dp_compute_link_config(struct intel_encoder *encoder, return 0; } +static int +intel_dp_ycbcr420_config(struct intel_dp *intel_dp, + struct drm_connector *connector, + struct intel_crtc_state *crtc_state) +{ + const struct drm_display_info *info = &connector->display_info; + const struct drm_display_mode *adjusted_mode = + &crtc_state->base.adjusted_mode; + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + int ret; + + if (!drm_mode_is_420_only(info, adjusted_mode) || + !intel_dp_get_colorimetry_status(intel_dp) || + !connector->ycbcr_420_allowed) + return 0; + + crtc_state->output_format = INTEL_OUTPUT_FORMAT_YCBCR420; + + /* YCBCR 420 output conversion needs a scaler */ + ret = skl_update_scaler_crtc(crtc_state); + if (ret) { + DRM_DEBUG_KMS("Scaler allocation for output failed\n"); + return ret; + } + + intel_pch_panel_fitting(crtc, crtc_state, DRM_MODE_SCALE_FULLSCREEN); + + return 0; +} + bool intel_dp_limited_color_range(const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state) { @@ -2114,7 +2175,7 @@ intel_dp_compute_config(struct intel_encoder *encoder, to_intel_digital_connector_state(conn_state); bool constant_n = drm_dp_has_quirk(&intel_dp->desc, DP_DPCD_QUIRK_CONSTANT_N); - int ret, output_bpp; + int ret = 0, output_bpp; if (HAS_PCH_SPLIT(dev_priv) && !HAS_DDI(dev_priv) && port != PORT_A) pipe_config->has_pch_encoder = true; @@ -2122,6 +2183,12 @@ intel_dp_compute_config(struct intel_encoder *encoder, pipe_config->output_format = INTEL_OUTPUT_FORMAT_RGB; if (lspcon->active) lspcon_ycbcr420_config(&intel_connector->base, pipe_config); + else + ret = intel_dp_ycbcr420_config(intel_dp, &intel_connector->base, + pipe_config); + + if (ret) + return ret; pipe_config->has_drrs = false; if (IS_G4X(dev_priv) || port == PORT_A) @@ -2169,7 +2236,7 @@ intel_dp_compute_config(struct intel_encoder *encoder, if (pipe_config->dsc_params.compression_enable) output_bpp = pipe_config->dsc_params.compressed_bpp; else - output_bpp = pipe_config->pipe_bpp; + output_bpp = intel_dp_output_bpp(pipe_config, pipe_config->pipe_bpp); intel_link_compute_m_n(output_bpp, pipe_config->lane_count, @@ -3148,12 +3215,12 @@ static void chv_post_disable_dp(struct intel_encoder *encoder, intel_dp_link_down(encoder, old_crtc_state); - mutex_lock(&dev_priv->sb_lock); + vlv_dpio_get(dev_priv); /* Assert data lane reset */ chv_data_lane_soft_reset(encoder, old_crtc_state, true); - mutex_unlock(&dev_priv->sb_lock); + vlv_dpio_put(dev_priv); } static void @@ -4041,6 +4108,16 @@ intel_dp_read_dpcd(struct intel_dp *intel_dp) return intel_dp->dpcd[DP_DPCD_REV] != 0; } +bool intel_dp_get_colorimetry_status(struct intel_dp *intel_dp) +{ + u8 dprx = 0; + + if (drm_dp_dpcd_readb(&intel_dp->aux, DP_DPRX_FEATURE_ENUMERATION_LIST, + &dprx) != 1) + return false; + return dprx & DP_VSC_SDP_EXT_FOR_COLORIMETRY_SUPPORTED; +} + static void intel_dp_get_dsc_sink_cap(struct intel_dp *intel_dp) { /* @@ -4351,6 +4428,96 @@ u8 intel_dp_dsc_get_slice_count(struct intel_dp *intel_dp, return 0; } +static void +intel_pixel_encoding_setup_vsc(struct intel_dp *intel_dp, + const struct intel_crtc_state *crtc_state) +{ + struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); + struct dp_sdp vsc_sdp = {}; + + /* Prepare VSC Header for SU as per DP 1.4a spec, Table 2-119 */ + vsc_sdp.sdp_header.HB0 = 0; + vsc_sdp.sdp_header.HB1 = 0x7; + + /* + * VSC SDP supporting 3D stereo, PSR2, and Pixel Encoding/ + * Colorimetry Format indication. + */ + vsc_sdp.sdp_header.HB2 = 0x5; + + /* + * VSC SDP supporting 3D stereo, + PSR2, + Pixel Encoding/ + * Colorimetry Format indication (HB2 = 05h). + */ + vsc_sdp.sdp_header.HB3 = 0x13; + + /* + * YCbCr 420 = 3h DB16[7:4] ITU-R BT.601 = 0h, ITU-R BT.709 = 1h + * DB16[3:0] DP 1.4a spec, Table 2-120 + */ + vsc_sdp.db[16] = 0x3 << 4; /* 0x3 << 4 , YCbCr 420*/ + /* RGB->YCBCR color conversion uses the BT.709 color space. */ + vsc_sdp.db[16] |= 0x1; /* 0x1, ITU-R BT.709 */ + + /* + * For pixel encoding formats YCbCr444, YCbCr422, YCbCr420, and Y Only, + * the following Component Bit Depth values are defined: + * 001b = 8bpc. + * 010b = 10bpc. + * 011b = 12bpc. + * 100b = 16bpc. + */ + switch (crtc_state->pipe_bpp) { + case 24: /* 8bpc */ + vsc_sdp.db[17] = 0x1; + break; + case 30: /* 10bpc */ + vsc_sdp.db[17] = 0x2; + break; + case 36: /* 12bpc */ + vsc_sdp.db[17] = 0x3; + break; + case 48: /* 16bpc */ + vsc_sdp.db[17] = 0x4; + break; + default: + MISSING_CASE(crtc_state->pipe_bpp); + break; + } + + /* + * Dynamic Range (Bit 7) + * 0 = VESA range, 1 = CTA range. + * all YCbCr are always limited range + */ + vsc_sdp.db[17] |= 0x80; + + /* + * Content Type (Bits 2:0) + * 000b = Not defined. + * 001b = Graphics. + * 010b = Photo. + * 011b = Video. + * 100b = Game + * All other values are RESERVED. + * Note: See CTA-861-G for the definition and expected + * processing by a stream sink for the above contect types. + */ + vsc_sdp.db[18] = 0; + + intel_dig_port->write_infoframe(&intel_dig_port->base, + crtc_state, DP_SDP_VSC, &vsc_sdp, sizeof(vsc_sdp)); +} + +void intel_dp_ycbcr_420_enable(struct intel_dp *intel_dp, + const struct intel_crtc_state *crtc_state) +{ + if (crtc_state->output_format != INTEL_OUTPUT_FORMAT_YCBCR420) + return; + + intel_pixel_encoding_setup_vsc(intel_dp, crtc_state); +} + static u8 intel_dp_autotest_link_training(struct intel_dp *intel_dp) { int status = 0; @@ -4829,15 +4996,15 @@ intel_dp_detect_dpcd(struct intel_dp *intel_dp) u8 *dpcd = intel_dp->dpcd; u8 type; + if (WARN_ON(intel_dp_is_edp(intel_dp))) + return connector_status_connected; + if (lspcon->active) lspcon_resume(lspcon); if (!intel_dp_get_dpcd(intel_dp)) return connector_status_disconnected; - if (intel_dp_is_edp(intel_dp)) - return connector_status_connected; - /* if there's no downstream port, we're done */ if (!drm_dp_is_branch(dpcd)) return connector_status_connected; @@ -5219,12 +5386,15 @@ static bool icl_tc_port_connected(struct drm_i915_private *dev_priv, u32 dpsp; /* - * WARN if we got a legacy port HPD, but VBT didn't mark the port as + * Complain if we got a legacy port HPD, but VBT didn't mark the port as * legacy. Treat the port as legacy from now on. */ - if (WARN_ON(!intel_dig_port->tc_legacy_port && - I915_READ(SDEISR) & SDE_TC_HOTPLUG_ICP(tc_port))) + if (!intel_dig_port->tc_legacy_port && + I915_READ(SDEISR) & SDE_TC_HOTPLUG_ICP(tc_port)) { + DRM_ERROR("VBT incorrectly claims port %c is not TypeC legacy\n", + port_name(port)); intel_dig_port->tc_legacy_port = true; + } is_legacy = intel_dig_port->tc_legacy_port; /* @@ -5276,7 +5446,7 @@ static bool icl_digital_port_connected(struct intel_encoder *encoder) * * Return %true if port is connected, %false otherwise. */ -bool intel_digital_port_connected(struct intel_encoder *encoder) +static bool __intel_digital_port_connected(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); @@ -5306,6 +5476,18 @@ bool intel_digital_port_connected(struct intel_encoder *encoder) return false; } +bool intel_digital_port_connected(struct intel_encoder *encoder) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + bool is_connected = false; + intel_wakeref_t wakeref; + + with_intel_display_power(dev_priv, POWER_DOMAIN_DISPLAY_CORE, wakeref) + is_connected = __intel_digital_port_connected(encoder); + + return is_connected; +} + static struct edid * intel_dp_get_edid(struct intel_dp *intel_dp) { @@ -5359,16 +5541,11 @@ intel_dp_detect(struct drm_connector *connector, struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); struct intel_encoder *encoder = &dig_port->base; enum drm_connector_status status; - enum intel_display_power_domain aux_domain = - intel_aux_power_domain(dig_port); - intel_wakeref_t wakeref; DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n", connector->base.id, connector->name); WARN_ON(!drm_modeset_is_locked(&dev_priv->drm.mode_config.connection_mutex)); - wakeref = intel_display_power_get(dev_priv, aux_domain); - /* Can't disconnect eDP */ if (intel_dp_is_edp(intel_dp)) status = edp_detect(intel_dp); @@ -5432,10 +5609,8 @@ intel_dp_detect(struct drm_connector *connector, int ret; ret = intel_dp_retrain_link(encoder, ctx); - if (ret) { - intel_display_power_put(dev_priv, aux_domain, wakeref); + if (ret) return ret; - } } /* @@ -5457,7 +5632,6 @@ intel_dp_detect(struct drm_connector *connector, if (status != connector_status_connected && !intel_dp->is_mst) intel_dp_unset_edid(intel_dp); - intel_display_power_put(dev_priv, aux_domain, wakeref); return status; } @@ -6235,6 +6409,10 @@ void intel_dp_encoder_reset(struct drm_encoder *encoder) intel_dp->reset_link_params = true; + if (!IS_VALLEYVIEW(dev_priv) && !IS_CHERRYVIEW(dev_priv) && + !intel_dp_is_edp(intel_dp)) + return; + with_pps_lock(intel_dp, wakeref) { if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) intel_dp->active_pipe = vlv_active_pipe(intel_dp); @@ -6278,9 +6456,6 @@ enum irqreturn intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd) { struct intel_dp *intel_dp = &intel_dig_port->dp; - struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); - enum irqreturn ret = IRQ_NONE; - intel_wakeref_t wakeref; if (long_hpd && intel_dig_port->base.type == INTEL_OUTPUT_EDP) { /* @@ -6303,9 +6478,6 @@ intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd) return IRQ_NONE; } - wakeref = intel_display_power_get(dev_priv, - intel_aux_power_domain(intel_dig_port)); - if (intel_dp->is_mst) { if (intel_dp_check_mst_status(intel_dp) == -EINVAL) { /* @@ -6317,7 +6489,8 @@ intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd) intel_dp->is_mst = false; drm_dp_mst_topology_mgr_set_mst(&intel_dp->mst_mgr, intel_dp->is_mst); - goto put_power; + + return IRQ_NONE; } } @@ -6327,17 +6500,10 @@ intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd) handled = intel_dp_short_pulse(intel_dp); if (!handled) - goto put_power; + return IRQ_NONE; } - ret = IRQ_HANDLED; - -put_power: - intel_display_power_put(dev_priv, - intel_aux_power_domain(intel_dig_port), - wakeref); - - return ret; + return IRQ_HANDLED; } /* check the VBT to see whether the eDP is on another port */ @@ -7190,10 +7356,16 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port, intel_dp->DP = I915_READ(intel_dp->output_reg); intel_dp->attached_connector = intel_connector; - if (intel_dp_is_port_edp(dev_priv, port)) + if (intel_dp_is_port_edp(dev_priv, port)) { + /* + * Currently we don't support eDP on TypeC ports, although in + * theory it could work on TypeC legacy ports. + */ + WARN_ON(intel_port_is_tc(dev_priv, port)); type = DRM_MODE_CONNECTOR_eDP; - else + } else { type = DRM_MODE_CONNECTOR_DisplayPort; + } if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) intel_dp->active_pipe = vlv_active_pipe(intel_dp); @@ -7223,6 +7395,9 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port, connector->interlace_allowed = true; connector->doublescan_allowed = 0; + if (INTEL_GEN(dev_priv) >= 11) + connector->ycbcr_420_allowed = true; + intel_encoder->hpd_pin = intel_hpd_pin_default(dev_priv, port); intel_dp_aux_init(intel_dp); diff --git a/drivers/gpu/drm/i915/intel_dp.h b/drivers/gpu/drm/i915/intel_dp.h index 5e9e8d13de6eb9052df90b29c38a3d0f58addbdf..da70b1a41c834c4472d932f317f17fd6bcc8aa25 100644 --- a/drivers/gpu/drm/i915/intel_dp.h +++ b/drivers/gpu/drm/i915/intel_dp.h @@ -108,6 +108,7 @@ u8 intel_dp_dsc_get_slice_count(struct intel_dp *intel_dp, int mode_clock, int mode_hdisplay); bool intel_dp_read_dpcd(struct intel_dp *intel_dp); +bool intel_dp_get_colorimetry_status(struct intel_dp *intel_dp); int intel_dp_link_required(int pixel_clock, int bpp); int intel_dp_max_data_rate(int max_link_clock, int max_lanes); bool intel_digital_port_connected(struct intel_encoder *encoder); diff --git a/drivers/gpu/drm/i915/intel_dp_aux_backlight.c b/drivers/gpu/drm/i915/intel_dp_aux_backlight.c index 357136f17f85330742b846d1c2ea27985bf75a30..7ded95a334db5b89bba90def1e1098e98a79f8b2 100644 --- a/drivers/gpu/drm/i915/intel_dp_aux_backlight.c +++ b/drivers/gpu/drm/i915/intel_dp_aux_backlight.c @@ -22,6 +22,7 @@ * */ +#include "intel_dp_aux_backlight.h" #include "intel_drv.h" static void set_aux_backlight_enable(struct intel_dp *intel_dp, bool enable) diff --git a/drivers/gpu/drm/i915/intel_dp_aux_backlight.h b/drivers/gpu/drm/i915/intel_dp_aux_backlight.h new file mode 100644 index 0000000000000000000000000000000000000000..ed60c2858967cf60c03925c11cd7a773506d2fda --- /dev/null +++ b/drivers/gpu/drm/i915/intel_dp_aux_backlight.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef __INTEL_DP_AUX_BACKLIGHT_H__ +#define __INTEL_DP_AUX_BACKLIGHT_H__ + +struct intel_connector; + +int intel_dp_aux_init_backlight_funcs(struct intel_connector *intel_connector); + +#endif /* __INTEL_DP_AUX_BACKLIGHT_H__ */ diff --git a/drivers/gpu/drm/i915/intel_dp_link_training.c b/drivers/gpu/drm/i915/intel_dp_link_training.c index 54b069333e2f9471f55583d940d1a1aee6b26e7f..9b1fccea966b512db0753427125a391665d359a5 100644 --- a/drivers/gpu/drm/i915/intel_dp_link_training.c +++ b/drivers/gpu/drm/i915/intel_dp_link_training.c @@ -22,6 +22,7 @@ */ #include "intel_dp.h" +#include "intel_dp_link_training.h" #include "intel_drv.h" static void diff --git a/drivers/gpu/drm/i915/intel_dp_link_training.h b/drivers/gpu/drm/i915/intel_dp_link_training.h new file mode 100644 index 0000000000000000000000000000000000000000..174566adcc92b9069ac33cabeeb32e50c73f48b7 --- /dev/null +++ b/drivers/gpu/drm/i915/intel_dp_link_training.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef __INTEL_DP_LINK_TRAINING_H__ +#define __INTEL_DP_LINK_TRAINING_H__ + +struct intel_dp; + +void intel_dp_start_link_train(struct intel_dp *intel_dp); +void intel_dp_stop_link_train(struct intel_dp *intel_dp); + +#endif /* __INTEL_DP_LINK_TRAINING_H__ */ diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c index 8839eaea8371544cd62aa280111550229509929a..0caf645fbbb841b38a63e05c73ea198e4ed70d22 100644 --- a/drivers/gpu/drm/i915/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/intel_dp_mst.c @@ -28,10 +28,13 @@ #include #include "i915_drv.h" +#include "intel_atomic.h" #include "intel_audio.h" #include "intel_connector.h" #include "intel_ddi.h" #include "intel_dp.h" +#include "intel_dp_mst.h" +#include "intel_dpio_phy.h" #include "intel_drv.h" static int intel_dp_mst_compute_link_config(struct intel_encoder *encoder, diff --git a/drivers/gpu/drm/i915/intel_dp_mst.h b/drivers/gpu/drm/i915/intel_dp_mst.h new file mode 100644 index 0000000000000000000000000000000000000000..1470c6e0514b1fcdcadf15fb23bef935cd061a89 --- /dev/null +++ b/drivers/gpu/drm/i915/intel_dp_mst.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef __INTEL_DP_MST_H__ +#define __INTEL_DP_MST_H__ + +struct intel_digital_port; + +int intel_dp_mst_encoder_init(struct intel_digital_port *intel_dig_port, int conn_id); +void intel_dp_mst_encoder_cleanup(struct intel_digital_port *intel_dig_port); + +#endif /* __INTEL_DP_MST_H__ */ diff --git a/drivers/gpu/drm/i915/intel_dpio_phy.c b/drivers/gpu/drm/i915/intel_dpio_phy.c index ab4ac7158b7981bab2c4c3ad4aadbb567ef79779..bdbe4175982710711569f8485745885e37f593c1 100644 --- a/drivers/gpu/drm/i915/intel_dpio_phy.c +++ b/drivers/gpu/drm/i915/intel_dpio_phy.c @@ -22,7 +22,9 @@ */ #include "intel_dp.h" +#include "intel_dpio_phy.h" #include "intel_drv.h" +#include "intel_sideband.h" /** * DOC: DPIO @@ -648,7 +650,7 @@ void chv_set_phy_signal_level(struct intel_encoder *encoder, u32 val; int i; - mutex_lock(&dev_priv->sb_lock); + vlv_dpio_get(dev_priv); /* Clear calc init */ val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW10(ch)); @@ -729,8 +731,7 @@ void chv_set_phy_signal_level(struct intel_encoder *encoder, vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW10(ch), val); } - mutex_unlock(&dev_priv->sb_lock); - + vlv_dpio_put(dev_priv); } void chv_data_lane_soft_reset(struct intel_encoder *encoder, @@ -800,7 +801,7 @@ void chv_phy_pre_pll_enable(struct intel_encoder *encoder, chv_phy_powergate_lanes(encoder, true, lane_mask); - mutex_lock(&dev_priv->sb_lock); + vlv_dpio_get(dev_priv); /* Assert data lane reset */ chv_data_lane_soft_reset(encoder, crtc_state, true); @@ -855,7 +856,7 @@ void chv_phy_pre_pll_enable(struct intel_encoder *encoder, val |= CHV_CMN_USEDCLKCHANNEL; vlv_dpio_write(dev_priv, pipe, CHV_CMN_DW19(ch), val); - mutex_unlock(&dev_priv->sb_lock); + vlv_dpio_put(dev_priv); } void chv_phy_pre_encoder_enable(struct intel_encoder *encoder, @@ -870,7 +871,7 @@ void chv_phy_pre_encoder_enable(struct intel_encoder *encoder, int data, i, stagger; u32 val; - mutex_lock(&dev_priv->sb_lock); + vlv_dpio_get(dev_priv); /* allow hardware to manage TX FIFO reset source */ val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW11(ch)); @@ -935,7 +936,7 @@ void chv_phy_pre_encoder_enable(struct intel_encoder *encoder, /* Deassert data lane reset */ chv_data_lane_soft_reset(encoder, crtc_state, false); - mutex_unlock(&dev_priv->sb_lock); + vlv_dpio_put(dev_priv); } void chv_phy_release_cl2_override(struct intel_encoder *encoder) @@ -956,7 +957,7 @@ void chv_phy_post_pll_disable(struct intel_encoder *encoder, enum pipe pipe = to_intel_crtc(old_crtc_state->base.crtc)->pipe; u32 val; - mutex_lock(&dev_priv->sb_lock); + vlv_dpio_get(dev_priv); /* disable left/right clock distribution */ if (pipe != PIPE_B) { @@ -969,7 +970,7 @@ void chv_phy_post_pll_disable(struct intel_encoder *encoder, vlv_dpio_write(dev_priv, pipe, _CHV_CMN_DW1_CH1, val); } - mutex_unlock(&dev_priv->sb_lock); + vlv_dpio_put(dev_priv); /* * Leave the power down bit cleared for at least one @@ -993,7 +994,8 @@ void vlv_set_phy_signal_level(struct intel_encoder *encoder, enum dpio_channel port = vlv_dport_to_channel(dport); enum pipe pipe = intel_crtc->pipe; - mutex_lock(&dev_priv->sb_lock); + vlv_dpio_get(dev_priv); + vlv_dpio_write(dev_priv, pipe, VLV_TX_DW5(port), 0x00000000); vlv_dpio_write(dev_priv, pipe, VLV_TX_DW4(port), demph_reg_value); vlv_dpio_write(dev_priv, pipe, VLV_TX_DW2(port), @@ -1006,7 +1008,8 @@ void vlv_set_phy_signal_level(struct intel_encoder *encoder, vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW11(port), 0x00030000); vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW9(port), preemph_reg_value); vlv_dpio_write(dev_priv, pipe, VLV_TX_DW5(port), DPIO_TX_OCALINIT_EN); - mutex_unlock(&dev_priv->sb_lock); + + vlv_dpio_put(dev_priv); } void vlv_phy_pre_pll_enable(struct intel_encoder *encoder, @@ -1019,7 +1022,8 @@ void vlv_phy_pre_pll_enable(struct intel_encoder *encoder, enum pipe pipe = crtc->pipe; /* Program Tx lane resets to default */ - mutex_lock(&dev_priv->sb_lock); + vlv_dpio_get(dev_priv); + vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW0(port), DPIO_PCS_TX_LANE2_RESET | DPIO_PCS_TX_LANE1_RESET); @@ -1033,7 +1037,8 @@ void vlv_phy_pre_pll_enable(struct intel_encoder *encoder, vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW12(port), 0x00750f00); vlv_dpio_write(dev_priv, pipe, VLV_TX_DW11(port), 0x00001500); vlv_dpio_write(dev_priv, pipe, VLV_TX_DW14(port), 0x40400000); - mutex_unlock(&dev_priv->sb_lock); + + vlv_dpio_put(dev_priv); } void vlv_phy_pre_encoder_enable(struct intel_encoder *encoder, @@ -1047,7 +1052,7 @@ void vlv_phy_pre_encoder_enable(struct intel_encoder *encoder, enum pipe pipe = crtc->pipe; u32 val; - mutex_lock(&dev_priv->sb_lock); + vlv_dpio_get(dev_priv); /* Enable clock channels for this port */ val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW8(port)); @@ -1063,7 +1068,7 @@ void vlv_phy_pre_encoder_enable(struct intel_encoder *encoder, vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW14(port), 0x00760018); vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW23(port), 0x00400888); - mutex_unlock(&dev_priv->sb_lock); + vlv_dpio_put(dev_priv); } void vlv_phy_reset_lanes(struct intel_encoder *encoder, @@ -1075,8 +1080,8 @@ void vlv_phy_reset_lanes(struct intel_encoder *encoder, enum dpio_channel port = vlv_dport_to_channel(dport); enum pipe pipe = crtc->pipe; - mutex_lock(&dev_priv->sb_lock); + vlv_dpio_get(dev_priv); vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW0(port), 0x00000000); vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW1(port), 0x00e00060); - mutex_unlock(&dev_priv->sb_lock); + vlv_dpio_put(dev_priv); } diff --git a/drivers/gpu/drm/i915/intel_dpio_phy.h b/drivers/gpu/drm/i915/intel_dpio_phy.h new file mode 100644 index 0000000000000000000000000000000000000000..f418aab90b7ed19f8ca46ed4836c3796738527bf --- /dev/null +++ b/drivers/gpu/drm/i915/intel_dpio_phy.h @@ -0,0 +1,58 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef __INTEL_DPIO_PHY_H__ +#define __INTEL_DPIO_PHY_H__ + +#include + +enum dpio_channel; +enum dpio_phy; +enum port; +struct drm_i915_private; +struct intel_crtc_state; +struct intel_encoder; + +void bxt_port_to_phy_channel(struct drm_i915_private *dev_priv, enum port port, + enum dpio_phy *phy, enum dpio_channel *ch); +void bxt_ddi_phy_set_signal_level(struct drm_i915_private *dev_priv, + enum port port, u32 margin, u32 scale, + u32 enable, u32 deemphasis); +void bxt_ddi_phy_init(struct drm_i915_private *dev_priv, enum dpio_phy phy); +void bxt_ddi_phy_uninit(struct drm_i915_private *dev_priv, enum dpio_phy phy); +bool bxt_ddi_phy_is_enabled(struct drm_i915_private *dev_priv, + enum dpio_phy phy); +bool bxt_ddi_phy_verify_state(struct drm_i915_private *dev_priv, + enum dpio_phy phy); +u8 bxt_ddi_phy_calc_lane_lat_optim_mask(u8 lane_count); +void bxt_ddi_phy_set_lane_optim_mask(struct intel_encoder *encoder, + u8 lane_lat_optim_mask); +u8 bxt_ddi_phy_get_lane_lat_optim_mask(struct intel_encoder *encoder); + +void chv_set_phy_signal_level(struct intel_encoder *encoder, + u32 deemph_reg_value, u32 margin_reg_value, + bool uniq_trans_scale); +void chv_data_lane_soft_reset(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + bool reset); +void chv_phy_pre_pll_enable(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state); +void chv_phy_pre_encoder_enable(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state); +void chv_phy_release_cl2_override(struct intel_encoder *encoder); +void chv_phy_post_pll_disable(struct intel_encoder *encoder, + const struct intel_crtc_state *old_crtc_state); + +void vlv_set_phy_signal_level(struct intel_encoder *encoder, + u32 demph_reg_value, u32 preemph_reg_value, + u32 uniqtranscale_reg_value, u32 tx3_demph); +void vlv_phy_pre_pll_enable(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state); +void vlv_phy_pre_encoder_enable(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state); +void vlv_phy_reset_lanes(struct intel_encoder *encoder, + const struct intel_crtc_state *old_crtc_state); + +#endif /* __INTEL_DPIO_PHY_H__ */ diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c b/drivers/gpu/drm/i915/intel_dpll_mgr.c index e01c057ce50bee7a30d83f1858ee5858c6932954..897d93537414431f8686b032756b3ac3ede56ab5 100644 --- a/drivers/gpu/drm/i915/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/intel_dpll_mgr.c @@ -21,6 +21,8 @@ * DEALINGS IN THE SOFTWARE. */ +#include "intel_dpio_phy.h" +#include "intel_dpll_mgr.h" #include "intel_drv.h" /** @@ -349,7 +351,7 @@ static bool ibx_pch_dpll_get_hw_state(struct drm_i915_private *dev_priv, u32 val; wakeref = intel_display_power_get_if_enabled(dev_priv, - POWER_DOMAIN_PLLS); + POWER_DOMAIN_DISPLAY_CORE); if (!wakeref) return false; @@ -358,7 +360,7 @@ static bool ibx_pch_dpll_get_hw_state(struct drm_i915_private *dev_priv, hw_state->fp0 = I915_READ(PCH_FP0(id)); hw_state->fp1 = I915_READ(PCH_FP1(id)); - intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS, wakeref); + intel_display_power_put(dev_priv, POWER_DOMAIN_DISPLAY_CORE, wakeref); return val & DPLL_VCO_ENABLE; } @@ -517,14 +519,14 @@ static bool hsw_ddi_wrpll_get_hw_state(struct drm_i915_private *dev_priv, u32 val; wakeref = intel_display_power_get_if_enabled(dev_priv, - POWER_DOMAIN_PLLS); + POWER_DOMAIN_DISPLAY_CORE); if (!wakeref) return false; val = I915_READ(WRPLL_CTL(id)); hw_state->wrpll = val; - intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS, wakeref); + intel_display_power_put(dev_priv, POWER_DOMAIN_DISPLAY_CORE, wakeref); return val & WRPLL_PLL_ENABLE; } @@ -537,14 +539,14 @@ static bool hsw_ddi_spll_get_hw_state(struct drm_i915_private *dev_priv, u32 val; wakeref = intel_display_power_get_if_enabled(dev_priv, - POWER_DOMAIN_PLLS); + POWER_DOMAIN_DISPLAY_CORE); if (!wakeref) return false; val = I915_READ(SPLL_CTL); hw_state->spll = val; - intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS, wakeref); + intel_display_power_put(dev_priv, POWER_DOMAIN_DISPLAY_CORE, wakeref); return val & SPLL_PLL_ENABLE; } @@ -1002,7 +1004,7 @@ static bool skl_ddi_pll_get_hw_state(struct drm_i915_private *dev_priv, bool ret; wakeref = intel_display_power_get_if_enabled(dev_priv, - POWER_DOMAIN_PLLS); + POWER_DOMAIN_DISPLAY_CORE); if (!wakeref) return false; @@ -1023,7 +1025,7 @@ static bool skl_ddi_pll_get_hw_state(struct drm_i915_private *dev_priv, ret = true; out: - intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS, wakeref); + intel_display_power_put(dev_priv, POWER_DOMAIN_DISPLAY_CORE, wakeref); return ret; } @@ -1039,7 +1041,7 @@ static bool skl_ddi_dpll0_get_hw_state(struct drm_i915_private *dev_priv, bool ret; wakeref = intel_display_power_get_if_enabled(dev_priv, - POWER_DOMAIN_PLLS); + POWER_DOMAIN_DISPLAY_CORE); if (!wakeref) return false; @@ -1056,7 +1058,7 @@ static bool skl_ddi_dpll0_get_hw_state(struct drm_i915_private *dev_priv, ret = true; out: - intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS, wakeref); + intel_display_power_put(dev_priv, POWER_DOMAIN_DISPLAY_CORE, wakeref); return ret; } @@ -1600,7 +1602,7 @@ static bool bxt_ddi_pll_get_hw_state(struct drm_i915_private *dev_priv, bxt_port_to_phy_channel(dev_priv, port, &phy, &ch); wakeref = intel_display_power_get_if_enabled(dev_priv, - POWER_DOMAIN_PLLS); + POWER_DOMAIN_DISPLAY_CORE); if (!wakeref) return false; @@ -1658,7 +1660,7 @@ static bool bxt_ddi_pll_get_hw_state(struct drm_i915_private *dev_priv, ret = true; out: - intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS, wakeref); + intel_display_power_put(dev_priv, POWER_DOMAIN_DISPLAY_CORE, wakeref); return ret; } @@ -1879,27 +1881,6 @@ static const struct intel_shared_dpll_funcs bxt_ddi_pll_funcs = { .get_hw_state = bxt_ddi_pll_get_hw_state, }; -static void intel_ddi_pll_init(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = to_i915(dev); - - if (INTEL_GEN(dev_priv) < 9) { - u32 val = I915_READ(LCPLL_CTL); - - /* - * The LCPLL register should be turned on by the BIOS. For now - * let's just check its state and print errors in case - * something is wrong. Don't even try to turn it on. - */ - - if (val & LCPLL_CD_SOURCE_FCLK) - DRM_ERROR("CDCLK source is not LCPLL\n"); - - if (val & LCPLL_PLL_DISABLE) - DRM_ERROR("LCPLL is disabled\n"); - } -} - struct intel_dpll_mgr { const struct dpll_info *dpll_info; @@ -2106,7 +2087,7 @@ static bool cnl_ddi_pll_get_hw_state(struct drm_i915_private *dev_priv, bool ret; wakeref = intel_display_power_get_if_enabled(dev_priv, - POWER_DOMAIN_PLLS); + POWER_DOMAIN_DISPLAY_CORE); if (!wakeref) return false; @@ -2126,7 +2107,7 @@ static bool cnl_ddi_pll_get_hw_state(struct drm_i915_private *dev_priv, ret = true; out: - intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS, wakeref); + intel_display_power_put(dev_priv, POWER_DOMAIN_DISPLAY_CORE, wakeref); return ret; } @@ -2741,11 +2722,11 @@ static bool icl_calc_mg_pll_state(struct intel_crtc_state *crtc_state) } if (use_ssc) { - tmp = (u64)dco_khz * 47 * 32; + tmp = mul_u32_u32(dco_khz, 47 * 32); do_div(tmp, refclk_khz * m1div * 10000); ssc_stepsize = tmp; - tmp = (u64)dco_khz * 1000; + tmp = mul_u32_u32(dco_khz, 1000); ssc_steplen = DIV_ROUND_UP_ULL(tmp, 32 * 2 * 32); } else { ssc_stepsize = 0; @@ -2881,7 +2862,7 @@ static bool mg_pll_get_hw_state(struct drm_i915_private *dev_priv, u32 val; wakeref = intel_display_power_get_if_enabled(dev_priv, - POWER_DOMAIN_PLLS); + POWER_DOMAIN_DISPLAY_CORE); if (!wakeref) return false; @@ -2928,7 +2909,7 @@ static bool mg_pll_get_hw_state(struct drm_i915_private *dev_priv, ret = true; out: - intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS, wakeref); + intel_display_power_put(dev_priv, POWER_DOMAIN_DISPLAY_CORE, wakeref); return ret; } @@ -2943,7 +2924,7 @@ static bool icl_pll_get_hw_state(struct drm_i915_private *dev_priv, u32 val; wakeref = intel_display_power_get_if_enabled(dev_priv, - POWER_DOMAIN_PLLS); + POWER_DOMAIN_DISPLAY_CORE); if (!wakeref) return false; @@ -2956,7 +2937,7 @@ static bool icl_pll_get_hw_state(struct drm_i915_private *dev_priv, ret = true; out: - intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS, wakeref); + intel_display_power_put(dev_priv, POWER_DOMAIN_DISPLAY_CORE, wakeref); return ret; } @@ -3303,10 +3284,6 @@ void intel_shared_dpll_init(struct drm_device *dev) mutex_init(&dev_priv->dpll_lock); BUG_ON(dev_priv->num_shared_dpll > I915_NUM_PLLS); - - /* FIXME: Move this to a more suitable place */ - if (HAS_DDI(dev_priv)) - intel_ddi_pll_init(dev); } /** diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.h b/drivers/gpu/drm/i915/intel_dpll_mgr.h index bd8124cc81edff33e15244b936c6e77ba92a12cb..8835dd20f1d27e05497a67ddbc24aec6560d9167 100644 --- a/drivers/gpu/drm/i915/intel_dpll_mgr.h +++ b/drivers/gpu/drm/i915/intel_dpll_mgr.h @@ -25,6 +25,10 @@ #ifndef _INTEL_DPLL_MGR_H_ #define _INTEL_DPLL_MGR_H_ +#include + +#include "intel_display.h" + /*FIXME: Move this to a more appropriate place. */ #define abs_diff(a, b) ({ \ typeof(a) __a = (a); \ @@ -32,13 +36,13 @@ (void) (&__a == &__b); \ __a > __b ? (__a - __b) : (__b - __a); }) +struct drm_atomic_state; +struct drm_device; struct drm_i915_private; struct intel_crtc; struct intel_crtc_state; struct intel_encoder; - struct intel_shared_dpll; -struct intel_dpll_mgr; /** * enum intel_dpll_id - possible DPLL ids diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index a38b9cff5cd0e9960f44e23ffdf88875386aaa48..0fbdbe559b92a87d2e6d01acfbac6d7ea3986805 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -28,7 +28,6 @@ #include #include #include -#include #include #include @@ -47,127 +46,10 @@ struct drm_printer; -/** - * __wait_for - magic wait macro - * - * Macro to help avoid open coding check/wait/timeout patterns. Note that it's - * important that we check the condition again after having timed out, since the - * timeout could be due to preemption or similar and we've never had a chance to - * check the condition before the timeout. - */ -#define __wait_for(OP, COND, US, Wmin, Wmax) ({ \ - const ktime_t end__ = ktime_add_ns(ktime_get_raw(), 1000ll * (US)); \ - long wait__ = (Wmin); /* recommended min for usleep is 10 us */ \ - int ret__; \ - might_sleep(); \ - for (;;) { \ - const bool expired__ = ktime_after(ktime_get_raw(), end__); \ - OP; \ - /* Guarantee COND check prior to timeout */ \ - barrier(); \ - if (COND) { \ - ret__ = 0; \ - break; \ - } \ - if (expired__) { \ - ret__ = -ETIMEDOUT; \ - break; \ - } \ - usleep_range(wait__, wait__ * 2); \ - if (wait__ < (Wmax)) \ - wait__ <<= 1; \ - } \ - ret__; \ -}) - -#define _wait_for(COND, US, Wmin, Wmax) __wait_for(, (COND), (US), (Wmin), \ - (Wmax)) -#define wait_for(COND, MS) _wait_for((COND), (MS) * 1000, 10, 1000) - -/* If CONFIG_PREEMPT_COUNT is disabled, in_atomic() always reports false. */ -#if defined(CONFIG_DRM_I915_DEBUG) && defined(CONFIG_PREEMPT_COUNT) -# define _WAIT_FOR_ATOMIC_CHECK(ATOMIC) WARN_ON_ONCE((ATOMIC) && !in_atomic()) -#else -# define _WAIT_FOR_ATOMIC_CHECK(ATOMIC) do { } while (0) -#endif - -#define _wait_for_atomic(COND, US, ATOMIC) \ -({ \ - int cpu, ret, timeout = (US) * 1000; \ - u64 base; \ - _WAIT_FOR_ATOMIC_CHECK(ATOMIC); \ - if (!(ATOMIC)) { \ - preempt_disable(); \ - cpu = smp_processor_id(); \ - } \ - base = local_clock(); \ - for (;;) { \ - u64 now = local_clock(); \ - if (!(ATOMIC)) \ - preempt_enable(); \ - /* Guarantee COND check prior to timeout */ \ - barrier(); \ - if (COND) { \ - ret = 0; \ - break; \ - } \ - if (now - base >= timeout) { \ - ret = -ETIMEDOUT; \ - break; \ - } \ - cpu_relax(); \ - if (!(ATOMIC)) { \ - preempt_disable(); \ - if (unlikely(cpu != smp_processor_id())) { \ - timeout -= now - base; \ - cpu = smp_processor_id(); \ - base = local_clock(); \ - } \ - } \ - } \ - ret; \ -}) - -#define wait_for_us(COND, US) \ -({ \ - int ret__; \ - BUILD_BUG_ON(!__builtin_constant_p(US)); \ - if ((US) > 10) \ - ret__ = _wait_for((COND), (US), 10, 10); \ - else \ - ret__ = _wait_for_atomic((COND), (US), 0); \ - ret__; \ -}) - -#define wait_for_atomic_us(COND, US) \ -({ \ - BUILD_BUG_ON(!__builtin_constant_p(US)); \ - BUILD_BUG_ON((US) > 50000); \ - _wait_for_atomic((COND), (US), 1); \ -}) - -#define wait_for_atomic(COND, MS) wait_for_atomic_us((COND), (MS) * 1000) - -#define KHz(x) (1000 * (x)) -#define MHz(x) KHz(1000 * (x)) - -#define KBps(x) (1000 * (x)) -#define MBps(x) KBps(1000 * (x)) -#define GBps(x) ((u64)1000 * MBps((x))) - /* * Display related stuff */ -/* store information about an Ixxx DVO */ -/* The i830->i865 use multiple DVOs with multiple i2cs */ -/* the i915, i945 have a single sDVO i2c bus - which is different */ -#define MAX_OUTPUTS 6 -/* maximum connectors per crtcs in the mode set */ - -#define INTEL_I2C_BUS_DVO 1 -#define INTEL_I2C_BUS_SDVO 2 - /* these are outputs from the chip - integrated only external chips are via DVO or SDVO output */ enum intel_output_type { @@ -185,14 +67,6 @@ enum intel_output_type { INTEL_OUTPUT_DP_MST = 11, }; -#define INTEL_DVO_CHIP_NONE 0 -#define INTEL_DVO_CHIP_LVDS 1 -#define INTEL_DVO_CHIP_TMDS 2 -#define INTEL_DVO_CHIP_TVOUT 4 - -#define INTEL_DSI_VIDEO_MODE 0 -#define INTEL_DSI_COMMAND_MODE 1 - struct intel_framebuffer { struct drm_framebuffer base; struct intel_rotation_info rot_info; @@ -677,21 +551,6 @@ struct intel_initial_plane_config { u8 rotation; }; -#define SKL_MIN_SRC_W 8 -#define SKL_MAX_SRC_W 4096 -#define SKL_MIN_SRC_H 8 -#define SKL_MAX_SRC_H 4096 -#define SKL_MIN_DST_W 8 -#define SKL_MAX_DST_W 4096 -#define SKL_MIN_DST_H 8 -#define SKL_MAX_DST_H 4096 -#define ICL_MAX_SRC_W 5120 -#define ICL_MAX_SRC_H 4096 -#define ICL_MAX_DST_W 5120 -#define ICL_MAX_DST_H 4096 -#define SKL_MIN_YUV_420_SRC_W 16 -#define SKL_MIN_YUV_420_SRC_H 16 - struct intel_scaler { int in_use; u32 mode; @@ -1581,56 +1440,6 @@ intel_atomic_get_new_crtc_state(struct intel_atomic_state *state, &crtc->base)); } -/* intel_fifo_underrun.c */ -bool intel_set_cpu_fifo_underrun_reporting(struct drm_i915_private *dev_priv, - enum pipe pipe, bool enable); -bool intel_set_pch_fifo_underrun_reporting(struct drm_i915_private *dev_priv, - enum pipe pch_transcoder, - bool enable); -void intel_cpu_fifo_underrun_irq_handler(struct drm_i915_private *dev_priv, - enum pipe pipe); -void intel_pch_fifo_underrun_irq_handler(struct drm_i915_private *dev_priv, - enum pipe pch_transcoder); -void intel_check_cpu_fifo_underruns(struct drm_i915_private *dev_priv); -void intel_check_pch_fifo_underruns(struct drm_i915_private *dev_priv); - -/* i915_irq.c */ -void gen5_enable_gt_irq(struct drm_i915_private *dev_priv, u32 mask); -void gen5_disable_gt_irq(struct drm_i915_private *dev_priv, u32 mask); -void gen6_mask_pm_irq(struct drm_i915_private *dev_priv, u32 mask); -void gen6_unmask_pm_irq(struct drm_i915_private *dev_priv, u32 mask); -void gen11_reset_rps_interrupts(struct drm_i915_private *dev_priv); -void gen6_reset_rps_interrupts(struct drm_i915_private *dev_priv); -void gen6_enable_rps_interrupts(struct drm_i915_private *dev_priv); -void gen6_disable_rps_interrupts(struct drm_i915_private *dev_priv); -void gen6_rps_reset_ei(struct drm_i915_private *dev_priv); - -static inline u32 gen6_sanitize_rps_pm_mask(const struct drm_i915_private *i915, - u32 mask) -{ - return mask & ~i915->gt_pm.rps.pm_intrmsk_mbz; -} - -void intel_runtime_pm_disable_interrupts(struct drm_i915_private *dev_priv); -void intel_runtime_pm_enable_interrupts(struct drm_i915_private *dev_priv); -static inline bool intel_irqs_enabled(struct drm_i915_private *dev_priv) -{ - /* - * We only use drm_irq_uninstall() at unload and VT switch, so - * this is the only thing we need to check. - */ - return dev_priv->runtime_pm.irqs_enabled; -} - -int intel_get_crtc_scanline(struct intel_crtc *crtc); -void gen8_irq_power_well_post_enable(struct drm_i915_private *dev_priv, - u8 pipe_mask); -void gen8_irq_power_well_pre_disable(struct drm_i915_private *dev_priv, - u8 pipe_mask); -void gen9_reset_guc_interrupts(struct drm_i915_private *dev_priv); -void gen9_enable_guc_interrupts(struct drm_i915_private *dev_priv); -void gen9_disable_guc_interrupts(struct drm_i915_private *dev_priv); - /* intel_display.c */ void intel_plane_destroy(struct drm_plane *plane); void i830_enable_pipe(struct drm_i915_private *dev_priv, enum pipe pipe); @@ -1652,6 +1461,7 @@ unsigned int intel_fb_align_height(const struct drm_framebuffer *fb, void intel_add_fb_offsets(int *x, int *y, const struct intel_plane_state *state, int plane); unsigned int intel_rotation_info_size(const struct intel_rotation_info *rot_info); +unsigned int intel_remapped_info_size(const struct intel_remapped_info *rem_info); bool intel_has_pending_fb_unpin(struct drm_i915_private *dev_priv); void intel_mark_busy(struct drm_i915_private *dev_priv); void intel_mark_idle(struct drm_i915_private *dev_priv); @@ -1762,18 +1572,13 @@ void assert_pipe(struct drm_i915_private *dev_priv, enum pipe pipe, bool state); #define assert_pipe_disabled(d, p) assert_pipe(d, p, false) void intel_prepare_reset(struct drm_i915_private *dev_priv); void intel_finish_reset(struct drm_i915_private *dev_priv); -void hsw_enable_pc8(struct drm_i915_private *dev_priv); -void hsw_disable_pc8(struct drm_i915_private *dev_priv); -void gen9_sanitize_dc_state(struct drm_i915_private *dev_priv); -void bxt_enable_dc9(struct drm_i915_private *dev_priv); -void bxt_disable_dc9(struct drm_i915_private *dev_priv); -void gen9_enable_dc5(struct drm_i915_private *dev_priv); unsigned int skl_cdclk_get_vco(unsigned int freq); -void skl_enable_dc6(struct drm_i915_private *dev_priv); void intel_dp_get_m_n(struct intel_crtc *crtc, struct intel_crtc_state *pipe_config); void intel_dp_set_m_n(const struct intel_crtc_state *crtc_state, enum link_m_n_set m_n); +void intel_dp_ycbcr_420_enable(struct intel_dp *intel_dp, + const struct intel_crtc_state *crtc_state); int intel_dotclock_calculate(int link_freq, const struct intel_link_m_n *m_n); bool bxt_find_best_dpll(struct intel_crtc_state *crtc_state, struct dpll *best_clock); @@ -1815,99 +1620,26 @@ int skl_format_to_fourcc(int format, bool rgb_order, bool alpha); unsigned int i9xx_plane_max_stride(struct intel_plane *plane, u32 pixel_format, u64 modifier, unsigned int rotation); - -/* intel_dp_link_training.c */ -void intel_dp_start_link_train(struct intel_dp *intel_dp); -void intel_dp_stop_link_train(struct intel_dp *intel_dp); - -/* intel_vdsc.c */ -int intel_dp_compute_dsc_params(struct intel_dp *intel_dp, - struct intel_crtc_state *pipe_config); -enum intel_display_power_domain -intel_dsc_power_domain(const struct intel_crtc_state *crtc_state); - -/* intel_dp_aux_backlight.c */ -int intel_dp_aux_init_backlight_funcs(struct intel_connector *intel_connector); - -/* intel_dp_mst.c */ -int intel_dp_mst_encoder_init(struct intel_digital_port *intel_dig_port, int conn_id); -void intel_dp_mst_encoder_cleanup(struct intel_digital_port *intel_dig_port); -/* vlv_dsi.c */ -void vlv_dsi_init(struct drm_i915_private *dev_priv); - -/* icl_dsi.c */ -void icl_dsi_init(struct drm_i915_private *dev_priv); - -/* intel_dsi_dcs_backlight.c */ -int intel_dsi_dcs_init_backlight_funcs(struct intel_connector *intel_connector); - -/* intel_hotplug.c */ -void intel_hpd_poll_init(struct drm_i915_private *dev_priv); -bool intel_encoder_hotplug(struct intel_encoder *encoder, - struct intel_connector *connector); - -/* intel_overlay.c */ -void intel_overlay_setup(struct drm_i915_private *dev_priv); -void intel_overlay_cleanup(struct drm_i915_private *dev_priv); -int intel_overlay_switch_off(struct intel_overlay *overlay); -int intel_overlay_put_image_ioctl(struct drm_device *dev, void *data, - struct drm_file *file_priv); -int intel_overlay_attrs_ioctl(struct drm_device *dev, void *data, - struct drm_file *file_priv); -void intel_overlay_reset(struct drm_i915_private *dev_priv); - -/* intel_quirks.c */ -void intel_init_quirks(struct drm_i915_private *dev_priv); +int bdw_get_pipemisc_bpp(struct intel_crtc *crtc); /* intel_runtime_pm.c */ -void intel_runtime_pm_init_early(struct drm_i915_private *dev_priv); -int intel_power_domains_init(struct drm_i915_private *); -void intel_power_domains_cleanup(struct drm_i915_private *dev_priv); -void intel_power_domains_init_hw(struct drm_i915_private *dev_priv, bool resume); -void intel_power_domains_fini_hw(struct drm_i915_private *dev_priv); -void icl_display_core_init(struct drm_i915_private *dev_priv, bool resume); -void icl_display_core_uninit(struct drm_i915_private *dev_priv); -void intel_power_domains_enable(struct drm_i915_private *dev_priv); -void intel_power_domains_disable(struct drm_i915_private *dev_priv); - -enum i915_drm_suspend_mode { - I915_DRM_SUSPEND_IDLE, - I915_DRM_SUSPEND_MEM, - I915_DRM_SUSPEND_HIBERNATE, -}; +#define BITS_PER_WAKEREF \ + BITS_PER_TYPE(struct_member(struct i915_runtime_pm, wakeref_count)) +#define INTEL_RPM_WAKELOCK_SHIFT (BITS_PER_WAKEREF / 2) +#define INTEL_RPM_WAKELOCK_BIAS (1 << INTEL_RPM_WAKELOCK_SHIFT) +#define INTEL_RPM_RAW_WAKEREF_MASK (INTEL_RPM_WAKELOCK_BIAS - 1) + +static inline int +intel_rpm_raw_wakeref_count(int wakeref_count) +{ + return wakeref_count & INTEL_RPM_RAW_WAKEREF_MASK; +} -void intel_power_domains_suspend(struct drm_i915_private *dev_priv, - enum i915_drm_suspend_mode); -void intel_power_domains_resume(struct drm_i915_private *dev_priv); -void bxt_display_core_init(struct drm_i915_private *dev_priv, bool resume); -void bxt_display_core_uninit(struct drm_i915_private *dev_priv); -void intel_runtime_pm_enable(struct drm_i915_private *dev_priv); -void intel_runtime_pm_disable(struct drm_i915_private *dev_priv); -void intel_runtime_pm_cleanup(struct drm_i915_private *dev_priv); -const char * -intel_display_power_domain_str(enum intel_display_power_domain domain); - -bool intel_display_power_is_enabled(struct drm_i915_private *dev_priv, - enum intel_display_power_domain domain); -bool __intel_display_power_is_enabled(struct drm_i915_private *dev_priv, - enum intel_display_power_domain domain); -intel_wakeref_t intel_display_power_get(struct drm_i915_private *dev_priv, - enum intel_display_power_domain domain); -intel_wakeref_t -intel_display_power_get_if_enabled(struct drm_i915_private *dev_priv, - enum intel_display_power_domain domain); -void intel_display_power_put_unchecked(struct drm_i915_private *dev_priv, - enum intel_display_power_domain domain); -#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM) -void intel_display_power_put(struct drm_i915_private *dev_priv, - enum intel_display_power_domain domain, - intel_wakeref_t wakeref); -#else -#define intel_display_power_put(i915, domain, wakeref) \ - intel_display_power_put_unchecked(i915, domain) -#endif -void icl_dbuf_slices_update(struct drm_i915_private *dev_priv, - u8 req_slices); +static inline int +intel_rpm_wakelock_count(int wakeref_count) +{ + return wakeref_count >> INTEL_RPM_WAKELOCK_SHIFT; +} static inline void assert_rpm_device_not_suspended(struct i915_runtime_pm *rpm) @@ -1917,11 +1649,33 @@ assert_rpm_device_not_suspended(struct i915_runtime_pm *rpm) } static inline void -__assert_rpm_wakelock_held(struct i915_runtime_pm *rpm) +____assert_rpm_raw_wakeref_held(struct i915_runtime_pm *rpm, int wakeref_count) { assert_rpm_device_not_suspended(rpm); - WARN_ONCE(!atomic_read(&rpm->wakeref_count), - "RPM wakelock ref not held during HW access"); + WARN_ONCE(!intel_rpm_raw_wakeref_count(wakeref_count), + "RPM raw-wakeref not held\n"); +} + +static inline void +____assert_rpm_wakelock_held(struct i915_runtime_pm *rpm, int wakeref_count) +{ + ____assert_rpm_raw_wakeref_held(rpm, wakeref_count); + WARN_ONCE(!intel_rpm_wakelock_count(wakeref_count), + "RPM wakelock ref not held during HW access\n"); +} + +static inline void +assert_rpm_raw_wakeref_held(struct drm_i915_private *i915) +{ + struct i915_runtime_pm *rpm = &i915->runtime_pm; + + ____assert_rpm_raw_wakeref_held(rpm, atomic_read(&rpm->wakeref_count)); +} + +static inline void +__assert_rpm_wakelock_held(struct i915_runtime_pm *rpm) +{ + ____assert_rpm_wakelock_held(rpm, atomic_read(&rpm->wakeref_count)); } static inline void @@ -1951,7 +1705,8 @@ assert_rpm_wakelock_held(struct drm_i915_private *i915) static inline void disable_rpm_wakeref_asserts(struct drm_i915_private *i915) { - atomic_inc(&i915->runtime_pm.wakeref_count); + atomic_add(INTEL_RPM_WAKELOCK_BIAS + 1, + &i915->runtime_pm.wakeref_count); } /** @@ -1968,77 +1723,8 @@ disable_rpm_wakeref_asserts(struct drm_i915_private *i915) static inline void enable_rpm_wakeref_asserts(struct drm_i915_private *i915) { - atomic_dec(&i915->runtime_pm.wakeref_count); -} - -intel_wakeref_t intel_runtime_pm_get(struct drm_i915_private *i915); -intel_wakeref_t intel_runtime_pm_get_if_in_use(struct drm_i915_private *i915); -intel_wakeref_t intel_runtime_pm_get_noresume(struct drm_i915_private *i915); - -#define with_intel_runtime_pm(i915, wf) \ - for ((wf) = intel_runtime_pm_get(i915); (wf); \ - intel_runtime_pm_put((i915), (wf)), (wf) = 0) - -#define with_intel_runtime_pm_if_in_use(i915, wf) \ - for ((wf) = intel_runtime_pm_get_if_in_use(i915); (wf); \ - intel_runtime_pm_put((i915), (wf)), (wf) = 0) - -void intel_runtime_pm_put_unchecked(struct drm_i915_private *i915); -#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM) -void intel_runtime_pm_put(struct drm_i915_private *i915, intel_wakeref_t wref); -#else -#define intel_runtime_pm_put(i915, wref) intel_runtime_pm_put_unchecked(i915) -#endif - -#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM) -void print_intel_runtime_pm_wakeref(struct drm_i915_private *i915, - struct drm_printer *p); -#else -static inline void print_intel_runtime_pm_wakeref(struct drm_i915_private *i915, - struct drm_printer *p) -{ + atomic_sub(INTEL_RPM_WAKELOCK_BIAS + 1, + &i915->runtime_pm.wakeref_count); } -#endif - -void chv_phy_powergate_lanes(struct intel_encoder *encoder, - bool override, unsigned int mask); -bool chv_phy_powergate_ch(struct drm_i915_private *dev_priv, enum dpio_phy phy, - enum dpio_channel ch, bool override); - -/* intel_atomic.c */ -int intel_digital_connector_atomic_get_property(struct drm_connector *connector, - const struct drm_connector_state *state, - struct drm_property *property, - u64 *val); -int intel_digital_connector_atomic_set_property(struct drm_connector *connector, - struct drm_connector_state *state, - struct drm_property *property, - u64 val); -int intel_digital_connector_atomic_check(struct drm_connector *conn, - struct drm_connector_state *new_state); -struct drm_connector_state * -intel_digital_connector_duplicate_state(struct drm_connector *connector); - -struct drm_crtc_state *intel_crtc_duplicate_state(struct drm_crtc *crtc); -void intel_crtc_destroy_state(struct drm_crtc *crtc, - struct drm_crtc_state *state); -struct drm_atomic_state *intel_atomic_state_alloc(struct drm_device *dev); -void intel_atomic_state_clear(struct drm_atomic_state *); - -static inline struct intel_crtc_state * -intel_atomic_get_crtc_state(struct drm_atomic_state *state, - struct intel_crtc *crtc) -{ - struct drm_crtc_state *crtc_state; - crtc_state = drm_atomic_get_crtc_state(state, &crtc->base); - if (IS_ERR(crtc_state)) - return ERR_CAST(crtc_state); - - return to_intel_crtc_state(crtc_state); -} - -int intel_atomic_setup_scalers(struct drm_i915_private *dev_priv, - struct intel_crtc *intel_crtc, - struct intel_crtc_state *crtc_state); #endif /* __INTEL_DRV_H__ */ diff --git a/drivers/gpu/drm/i915/intel_dsi.h b/drivers/gpu/drm/i915/intel_dsi.h index 705a609050c06c36ccc8d7febc127cfc410ae169..f9b90061d9128f002ae293d52a4ec810a5d0b7f0 100644 --- a/drivers/gpu/drm/i915/intel_dsi.h +++ b/drivers/gpu/drm/i915/intel_dsi.h @@ -28,6 +28,9 @@ #include #include "intel_drv.h" +#define INTEL_DSI_VIDEO_MODE 0 +#define INTEL_DSI_COMMAND_MODE 1 + /* Dual Link support */ #define DSI_DUAL_LINK_NONE 0 #define DSI_DUAL_LINK_FRONT_BACK 1 @@ -151,6 +154,9 @@ static inline u16 intel_dsi_encoder_ports(struct intel_encoder *encoder) return enc_to_intel_dsi(&encoder->base)->ports; } +/* icl_dsi.c */ +void icl_dsi_init(struct drm_i915_private *dev_priv); + /* intel_dsi.c */ int intel_dsi_bitrate(const struct intel_dsi *intel_dsi); int intel_dsi_tlpx_ns(const struct intel_dsi *intel_dsi); @@ -166,6 +172,7 @@ enum drm_mode_status intel_dsi_mode_valid(struct drm_connector *connector, struct intel_dsi_host *intel_dsi_host_init(struct intel_dsi *intel_dsi, const struct mipi_dsi_host_ops *funcs, enum port port); +void vlv_dsi_init(struct drm_i915_private *dev_priv); /* vlv_dsi_pll.c */ int vlv_dsi_pll_compute(struct intel_encoder *encoder, diff --git a/drivers/gpu/drm/i915/intel_dsi_dcs_backlight.c b/drivers/gpu/drm/i915/intel_dsi_dcs_backlight.c index 150a156f3b1e9a2bff7f32bbf1ed1905eb71ca8b..8c33262cb0b2987d68fce09ef38d7261f3150a0f 100644 --- a/drivers/gpu/drm/i915/intel_dsi_dcs_backlight.c +++ b/drivers/gpu/drm/i915/intel_dsi_dcs_backlight.c @@ -23,11 +23,13 @@ * Author: Deepak M */ +#include +#include