drm/amd/display: Output Transfer Function Regamma Refactor

- Create translation function to translate logical format to hw format - Refactor to use transfer function in dc instead of input gamma Signed-off-by: N Amy Zhang <Amy.Zhang@amd.com> Acked-by: N Harry Wentland <Harry.Wentland@amd.com> Reviewed-by: N Anthony Koo <Anthony.Koo@amd.com> Signed-off-by: N Alex Deucher <alexander.deucher@amd.com>

drm/amd/display: Output Transfer Function Regamma Refactor
- Create translation function to translate logical format to hw format - Refactor to use transfer function in dc instead of input gamma Signed-off-by: N Amy Zhang <Amy.Zhang@amd.com> Acked-by: N Harry Wentland <Harry.Wentland@amd.com> Reviewed-by: N Anthony Koo <Anthony.Koo@amd.com> Signed-off-by: N Alex Deucher <alexander.deucher@amd.com>
fcd2f4bf · Amy Zhang · Alex Deucher · 457b74cb · fcd2f4bf · fcd2f4bf
9 changed file
--- a/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c
+++ b/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c
@@ -246,6 +246,15 @@ struct fixed31_32 dal_fixed31_32_add(
 	return res;
 }

+struct fixed31_32 dal_fixed31_32_add_int(
+	struct fixed31_32 arg1,
+	int32_t arg2)
+{
+	return dal_fixed31_32_add(
+		arg1,
+		dal_fixed31_32_from_int(arg2));
+}
+
 struct fixed31_32 dal_fixed31_32_sub_int(
 	struct fixed31_32 arg1,
 	int32_t arg2)

--- a/drivers/gpu/drm/amd/display/dc/calcs/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/calcs/Makefile
@@ -3,7 +3,7 @@
 # It calculates Bandwidth and Watermarks values for HW programming
 #

-BW_CALCS = bandwidth_calcs.o bw_fixed.o gamma_calcs.o
+BW_CALCS = bandwidth_calcs.o bw_fixed.o

 AMD_DAL_BW_CALCS = $(addprefix $(AMDDALPATH)/dc/calcs/,$(BW_CALCS))


--- a/drivers/gpu/drm/amd/display/dc/calcs/gamma_calcs.c
+++ b/drivers/gpu/drm/amd/display/dc/calcs/gamma_calcs.c
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -1519,23 +1519,23 @@ void dc_update_surfaces_for_stream(struct dc *dc, struct dc_surface_update *upda
 			if (dc->debug.disable_color_module)
 				continue;  /* skip below color updates */

-			if (updates[i].hdr_static_metadata) {
-				resource_build_info_frame(pipe_ctx);
-				core_dc->hwss.update_info_frame(pipe_ctx);
-			}
 			if (is_new_pipe_surface[j] ||
 					updates[i].in_transfer_func)
 				core_dc->hwss.set_input_transfer_func(
 						pipe_ctx, pipe_ctx->surface);

 			if (is_new_pipe_surface[j] ||
-					updates[i].gamma ||
 					updates[i].out_transfer_func)
 				core_dc->hwss.set_output_transfer_func(
 						pipe_ctx,
 						pipe_ctx->surface,
 						pipe_ctx->stream);

+			if (updates[i].hdr_static_metadata) {
+				resource_build_info_frame(pipe_ctx);
+				core_dc->hwss.update_info_frame(pipe_ctx);
+			}
+
 		}
 		if (apply_ctx) {
 			core_dc->hwss.apply_ctx_for_surface(core_dc, surface, context);

--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -213,11 +213,14 @@ enum dc_transfer_func_type {
 };

 struct dc_transfer_func_distributed_points {
-	uint16_t red[TRANSFER_FUNC_POINTS];
-	uint16_t green[TRANSFER_FUNC_POINTS];
-	uint16_t blue[TRANSFER_FUNC_POINTS];
+	struct fixed31_32 red[TRANSFER_FUNC_POINTS];
+	struct fixed31_32 green[TRANSFER_FUNC_POINTS];
+	struct fixed31_32 blue[TRANSFER_FUNC_POINTS];
+
 	uint16_t end_exponent;
-	uint16_t x_point_at_y1;
+	uint16_t x_point_at_y1_red;
+	uint16_t x_point_at_y1_green;
+	uint16_t x_point_at_y1_blue;
 };

 enum dc_transfer_func_predefined {

--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
@@ -42,7 +42,6 @@
 #include "stream_encoder.h"
 #include "link_encoder.h"
 #include "clock_source.h"
-#include "gamma_calcs.h"
 #include "audio.h"
 #include "dce/dce_hwseq.h"

@@ -286,6 +285,436 @@ static bool dce110_set_input_transfer_func(
 	return result;
 }

+static bool build_custom_float(
+	struct fixed31_32 value,
+	const struct custom_float_format *format,
+	bool *negative,
+	uint32_t *mantissa,
+	uint32_t *exponenta)
+{
+	uint32_t exp_offset = (1 << (format->exponenta_bits - 1)) - 1;
+
+	const struct fixed31_32 mantissa_constant_plus_max_fraction =
+		dal_fixed31_32_from_fraction(
+			(1LL << (format->mantissa_bits + 1)) - 1,
+			1LL << format->mantissa_bits);
+
+	struct fixed31_32 mantiss;
+
+	if (dal_fixed31_32_eq(
+		value,
+		dal_fixed31_32_zero)) {
+		*negative = false;
+		*mantissa = 0;
+		*exponenta = 0;
+		return true;
+	}
+
+	if (dal_fixed31_32_lt(
+		value,
+		dal_fixed31_32_zero)) {
+		*negative = format->sign;
+		value = dal_fixed31_32_neg(value);
+	} else {
+		*negative = false;
+	}
+
+	if (dal_fixed31_32_lt(
+		value,
+		dal_fixed31_32_one)) {
+		uint32_t i = 1;
+
+		do {
+			value = dal_fixed31_32_shl(value, 1);
+			++i;
+		} while (dal_fixed31_32_lt(
+			value,
+			dal_fixed31_32_one));
+
+		--i;
+
+		if (exp_offset <= i) {
+			*mantissa = 0;
+			*exponenta = 0;
+			return true;
+		}
+
+		*exponenta = exp_offset - i;
+	} else if (dal_fixed31_32_le(
+		mantissa_constant_plus_max_fraction,
+		value)) {
+		uint32_t i = 1;
+
+		do {
+			value = dal_fixed31_32_shr(value, 1);
+			++i;
+		} while (dal_fixed31_32_lt(
+			mantissa_constant_plus_max_fraction,
+			value));
+
+		*exponenta = exp_offset + i - 1;
+	} else {
+		*exponenta = exp_offset;
+	}
+
+	mantiss = dal_fixed31_32_sub(
+		value,
+		dal_fixed31_32_one);
+
+	if (dal_fixed31_32_lt(
+			mantiss,
+			dal_fixed31_32_zero) ||
+		dal_fixed31_32_lt(
+			dal_fixed31_32_one,
+			mantiss))
+		mantiss = dal_fixed31_32_zero;
+	else
+		mantiss = dal_fixed31_32_shl(
+			mantiss,
+			format->mantissa_bits);
+
+	*mantissa = dal_fixed31_32_floor(mantiss);
+
+	return true;
+}
+
+static bool setup_custom_float(
+	const struct custom_float_format *format,
+	bool negative,
+	uint32_t mantissa,
+	uint32_t exponenta,
+	uint32_t *result)
+{
+	uint32_t i = 0;
+	uint32_t j = 0;
+
+	uint32_t value = 0;
+
+	/* verification code:
+	 * once calculation is ok we can remove it
+	 */
+
+	const uint32_t mantissa_mask =
+		(1 << (format->mantissa_bits + 1)) - 1;
+
+	const uint32_t exponenta_mask =
+		(1 << (format->exponenta_bits + 1)) - 1;
+
+	if (mantissa & ~mantissa_mask) {
+		BREAK_TO_DEBUGGER();
+		mantissa = mantissa_mask;
+	}
+
+	if (exponenta & ~exponenta_mask) {
+		BREAK_TO_DEBUGGER();
+		exponenta = exponenta_mask;
+	}
+
+	/* end of verification code */
+
+	while (i < format->mantissa_bits) {
+		uint32_t mask = 1 << i;
+
+		if (mantissa & mask)
+			value |= mask;
+
+		++i;
+	}
+
+	while (j < format->exponenta_bits) {
+		uint32_t mask = 1 << j;
+
+		if (exponenta & mask)
+			value |= mask << i;
+
+		++j;
+	}
+
+	if (negative && format->sign)
+		value |= 1 << (i + j);
+
+	*result = value;
+
+	return true;
+}
+
+static bool convert_to_custom_float_format(
+	struct fixed31_32 value,
+	const struct custom_float_format *format,
+	uint32_t *result)
+{
+	uint32_t mantissa;
+	uint32_t exponenta;
+	bool negative;
+
+	return build_custom_float(
+		value, format, &negative, &mantissa, &exponenta) &&
+	setup_custom_float(
+		format, negative, mantissa, exponenta, result);
+}
+
+static bool convert_to_custom_float(
+		struct pwl_result_data *rgb_resulted,
+		struct curve_points *arr_points,
+		uint32_t hw_points_num)
+{
+	struct custom_float_format fmt;
+
+	struct pwl_result_data *rgb = rgb_resulted;
+
+	uint32_t i = 0;
+
+	fmt.exponenta_bits = 6;
+	fmt.mantissa_bits = 12;
+	fmt.sign = true;
+
+	if (!convert_to_custom_float_format(
+		arr_points[0].x,
+		&fmt,
+		&arr_points[0].custom_float_x)) {
+		BREAK_TO_DEBUGGER();
+		return false;
+	}
+
+	if (!convert_to_custom_float_format(
+		arr_points[0].offset,
+		&fmt,
+		&arr_points[0].custom_float_offset)) {
+		BREAK_TO_DEBUGGER();
+		return false;
+	}
+
+	if (!convert_to_custom_float_format(
+		arr_points[0].slope,
+		&fmt,
+		&arr_points[0].custom_float_slope)) {
+		BREAK_TO_DEBUGGER();
+		return false;
+	}
+
+	fmt.mantissa_bits = 10;
+	fmt.sign = false;
+
+	if (!convert_to_custom_float_format(
+		arr_points[1].x,
+		&fmt,
+		&arr_points[1].custom_float_x)) {
+		BREAK_TO_DEBUGGER();
+		return false;
+	}
+
+	if (!convert_to_custom_float_format(
+		arr_points[1].y,
+		&fmt,
+		&arr_points[1].custom_float_y)) {
+		BREAK_TO_DEBUGGER();
+		return false;
+	}
+
+	if (!convert_to_custom_float_format(
+		arr_points[2].slope,
+		&fmt,
+		&arr_points[2].custom_float_slope)) {
+		BREAK_TO_DEBUGGER();
+		return false;
+	}
+
+	fmt.mantissa_bits = 12;
+	fmt.sign = true;
+
+	while (i != hw_points_num) {
+		if (!convert_to_custom_float_format(
+			rgb->red,
+			&fmt,
+			&rgb->red_reg)) {
+			BREAK_TO_DEBUGGER();
+			return false;
+		}
+
+		if (!convert_to_custom_float_format(
+			rgb->green,
+			&fmt,
+			&rgb->green_reg)) {
+			BREAK_TO_DEBUGGER();
+			return false;
+		}
+
+		if (!convert_to_custom_float_format(
+			rgb->blue,
+			&fmt,
+			&rgb->blue_reg)) {
+			BREAK_TO_DEBUGGER();
+			return false;
+		}
+
+		if (!convert_to_custom_float_format(
+			rgb->delta_red,
+			&fmt,
+			&rgb->delta_red_reg)) {
+			BREAK_TO_DEBUGGER();
+			return false;
+		}
+
+		if (!convert_to_custom_float_format(
+			rgb->delta_green,
+			&fmt,
+			&rgb->delta_green_reg)) {
+			BREAK_TO_DEBUGGER();
+			return false;
+		}
+
+		if (!convert_to_custom_float_format(
+			rgb->delta_blue,
+			&fmt,
+			&rgb->delta_blue_reg)) {
+			BREAK_TO_DEBUGGER();
+			return false;
+		}
+
+		++rgb;
+		++i;
+	}
+
+	return true;
+}
+
+static bool dce110_translate_regamma_to_hw_format(const struct dc_transfer_func
+		*output_tf, struct pwl_params *regamma_params)
+{
+	if (output_tf == NULL || regamma_params == NULL)
+		return false;
+
+	struct gamma_curve *arr_curve_points = regamma_params->arr_curve_points;
+	struct curve_points *arr_points = regamma_params->arr_points;
+	struct pwl_result_data *rgb_resulted = regamma_params->rgb_resulted;
+	struct fixed31_32 y_r;
+	struct fixed31_32 y_g;
+	struct fixed31_32 y_b;
+	struct fixed31_32 y1_min;
+	struct fixed31_32 y3_max;
+
+	int32_t segment_start, segment_end;
+	uint32_t hw_points, start_index;
+	uint32_t i, j;
+
+	memset(regamma_params, 0, sizeof(struct pwl_params));
+
+	if (output_tf->tf == TRANSFER_FUNCTION_PQ) {
+		/* 16 segments x 16 points
+		 * segments are from 2^-11 to 2^5
+		 */
+		segment_start = -11;
+		segment_end = 5;
+
+	} else {
+		/* 10 segments x 16 points
+		 * segment is from 2^-10 to 2^0
+		 */
+		segment_start = -10;
+		segment_end = 0;
+	}
+
+	hw_points = (segment_end - segment_start) * 16;
+	j = 0;
+	/* (segment + 25) * 32, every 2nd point */
+	start_index = (segment_start + 25) * 32;
+	for (i = start_index; i <= 1025; i += 2) {
+		if (j > hw_points)
+			break;
+		rgb_resulted[j].red = output_tf->tf_pts.red[i];
+		rgb_resulted[j].green = output_tf->tf_pts.green[i];
+		rgb_resulted[j].blue = output_tf->tf_pts.blue[i];
+		j++;
+	}
+
+	arr_points[0].x = dal_fixed31_32_pow(dal_fixed31_32_from_int(2),
+			dal_fixed31_32_from_int(segment_start));
+	arr_points[1].x = dal_fixed31_32_pow(dal_fixed31_32_from_int(2),
+			dal_fixed31_32_from_int(segment_end));
+	arr_points[2].x = dal_fixed31_32_pow(dal_fixed31_32_from_int(2),
+			dal_fixed31_32_from_int(segment_end));
+
+	y_r = rgb_resulted[0].red;
+	y_g = rgb_resulted[0].green;
+	y_b = rgb_resulted[0].blue;
+
+	y1_min = dal_fixed31_32_min(y_r, dal_fixed31_32_min(y_g, y_b));
+
+	arr_points[0].y = y1_min;
+	arr_points[0].slope = dal_fixed31_32_div(
+					arr_points[0].y,
+					arr_points[0].x);
+
+	y_r = rgb_resulted[hw_points - 1].red;
+	y_g = rgb_resulted[hw_points - 1].green;
+	y_b = rgb_resulted[hw_points - 1].blue;
+
+	/* see comment above, m_arrPoints[1].y should be the Y value for the
+	 * region end (m_numOfHwPoints), not last HW point(m_numOfHwPoints - 1)
+	 */
+	y3_max = dal_fixed31_32_max(y_r, dal_fixed31_32_max(y_g, y_b));
+
+	arr_points[1].y = y3_max;
+	arr_points[2].y = y3_max;
+
+	arr_points[1].slope = dal_fixed31_32_zero;
+	arr_points[2].slope = dal_fixed31_32_zero;
+
+	if (output_tf->tf == TRANSFER_FUNCTION_PQ) {
+		/* for PQ, we want to have a straight line from last HW X point,
+		 * and the slope to be such that we hit 1.0 at 10000 nits.
+		 */
+		const struct fixed31_32 end_value =
+				dal_fixed31_32_from_int(125);
+
+		arr_points[1].slope = dal_fixed31_32_div(
+			dal_fixed31_32_sub(dal_fixed31_32_one, arr_points[1].y),
+			dal_fixed31_32_sub(end_value, arr_points[1].x));
+		arr_points[2].slope = dal_fixed31_32_div(
+			dal_fixed31_32_sub(dal_fixed31_32_one, arr_points[1].y),
+			dal_fixed31_32_sub(end_value, arr_points[1].x));
+	}
+
+	regamma_params->hw_points_num = hw_points;
+
+	for (i = 0; i < segment_end - segment_start; i++) {
+		regamma_params->arr_curve_points[i].offset = i * 16;
+		regamma_params->arr_curve_points[i].segments_num = 4;
+	}
+
+	struct pwl_result_data *rgb = rgb_resulted;
+	struct pwl_result_data *rgb_plus_1 = rgb_resulted + 1;
+
+	i = 1;
+
+	while (i != hw_points + 1) {
+		if (dal_fixed31_32_lt(rgb_plus_1->red, rgb->red))
+			rgb_plus_1->red = rgb->red;
+		if (dal_fixed31_32_lt(rgb_plus_1->green, rgb->green))
+			rgb_plus_1->green = rgb->green;
+		if (dal_fixed31_32_lt(rgb_plus_1->blue, rgb->blue))
+			rgb_plus_1->blue = rgb->blue;
+
+		rgb->delta_red = dal_fixed31_32_sub(
+			rgb_plus_1->red,
+			rgb->red);
+		rgb->delta_green = dal_fixed31_32_sub(
+			rgb_plus_1->green,
+			rgb->green);
+		rgb->delta_blue = dal_fixed31_32_sub(
+			rgb_plus_1->blue,
+			rgb->blue);
+
+		++rgb_plus_1;
+		++rgb;
+		++i;
+	}
+
+	convert_to_custom_float(rgb_resulted, arr_points, hw_points);
+
+	return true;
+}
+
 static bool dce110_set_output_transfer_func(
 	struct pipe_ctx *pipe_ctx,
 	const struct core_surface *surface, /* Surface - To be removed */
@@ -308,10 +737,13 @@ static bool dce110_set_output_transfer_func(
 	opp->funcs->opp_power_on_regamma_lut(opp, true);

 	if (stream->public.out_transfer_func &&
-	    stream->public.out_transfer_func->type == TF_TYPE_PREDEFINED &&
-	    stream->public.out_transfer_func->tf == TRANSFER_FUNCTION_SRGB) {
+			stream->public.out_transfer_func->type ==
+			TF_TYPE_PREDEFINED &&
+			stream->public.out_transfer_func->tf ==
+			TRANSFER_FUNCTION_SRGB) {
 		opp->funcs->opp_set_regamma_mode(opp, OPP_REGAMMA_SRGB);
-	} else if (ramp && calculate_regamma_params(regamma_params, ramp, surface, stream)) {
+	} else if (dce110_translate_regamma_to_hw_format(
+			stream->public.out_transfer_func, regamma_params)) {
 		opp->funcs->opp_program_regamma_pwl(opp, regamma_params);
 		opp->funcs->opp_set_regamma_mode(opp, OPP_REGAMMA_USER);
 	} else {

--- a/drivers/gpu/drm/amd/display/dc/inc/gamma_calcs.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/gamma_calcs.h
-/*
- * gamma_calcs.h
- *
- *  Created on: Feb 9, 2016
- *      Author: yonsun
- */
-
-#ifndef DRIVERS_GPU_DRM_AMD_DC_DEV_DC_INC_GAMMA_CALCS_H_
-#define DRIVERS_GPU_DRM_AMD_DC_DEV_DC_INC_GAMMA_CALCS_H_
-
-#include "opp.h"
-#include "core_types.h"
-#include "dc.h"
-
-bool calculate_regamma_params(struct pwl_params *params,
-		const struct core_gamma *ramp,
-		const struct core_surface *surface,
-		const struct core_stream *stream);
-
-#endif /* DRIVERS_GPU_DRM_AMD_DC_DEV_DC_INC_GAMMA_CALCS_H_ */
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/opp.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/opp.h
@@ -138,9 +138,7 @@ struct custom_float_value {

 struct hw_x_point {
 	uint32_t custom_float_x;
-	uint32_t custom_float_x_adjusted;
 	struct fixed31_32 x;
-	struct fixed31_32 adjusted_x;
 	struct fixed31_32 regamma_y_red;
 	struct fixed31_32 regamma_y_green;
 	struct fixed31_32 regamma_y_blue;

--- a/drivers/gpu/drm/amd/display/include/fixed31_32.h
+++ b/drivers/gpu/drm/amd/display/include/fixed31_32.h
@@ -190,6 +190,14 @@ struct fixed31_32 dal_fixed31_32_add(
 	struct fixed31_32 arg1,
 	struct fixed31_32 arg2);

+/*
+ * @brief
+ * result = arg1 + arg2
+ */
+struct fixed31_32 dal_fixed31_32_add_int(
+	struct fixed31_32 arg1,
+	int32_t arg2);
+
 /*
 * @brief
 * result = arg1 - arg2