From e4d8eaa1bcdb145ef70dc5e46a35132646ba2065 Mon Sep 17 00:00:00 2001 From: jpark37 Date: Sun, 15 Dec 2019 12:01:01 -0800 Subject: [PATCH] obs-filters: Use volume texture for LUT Simplifies shader calculations. Not much perf gain, but GPA shows bottleneck shift from Shader Execution to Sampler as expected. --- plugins/obs-filters/color-grade-filter.c | 70 ++++++++++++++++++- .../data/color_grade_filter.effect | 29 ++------ 2 files changed, 75 insertions(+), 24 deletions(-) diff --git a/plugins/obs-filters/color-grade-filter.c b/plugins/obs-filters/color-grade-filter.c index fb82ed81e..ccb86532b 100644 --- a/plugins/obs-filters/color-grade-filter.c +++ b/plugins/obs-filters/color-grade-filter.c @@ -12,6 +12,8 @@ /* clang-format on */ +static const uint32_t LUT_WIDTH = 64; + struct lut_filter_data { obs_source_t *context; gs_effect_t *effect; @@ -20,6 +22,8 @@ struct lut_filter_data { char *file; float clut_amount; + float clut_scale; + float clut_offset; }; static const char *color_grade_filter_get_name(void *unused) @@ -28,6 +32,54 @@ static const char *color_grade_filter_get_name(void *unused) return obs_module_text("ColorGradeFilter"); } +static gs_texture_t *make_clut_texture(const enum gs_color_format format, + const uint32_t image_width, + const uint32_t image_height, + const uint8_t *data) +{ + if (image_width % LUT_WIDTH != 0) + return NULL; + + if (image_height % LUT_WIDTH != 0) + return NULL; + + const uint32_t pixel_count = LUT_WIDTH * LUT_WIDTH * LUT_WIDTH; + if ((image_width * image_height) != pixel_count) + return NULL; + + const uint32_t bpp = gs_get_format_bpp(format); + if (bpp % 8 != 0) + return NULL; + + const uint32_t pixel_size = bpp / 8; + const uint32_t buffer_size = pixel_size * pixel_count; + uint8_t *const buffer = bmalloc(buffer_size); + const uint32_t macro_width = image_width / LUT_WIDTH; + const uint32_t macro_height = image_height / LUT_WIDTH; + uint8_t *cursor = buffer; + for (uint32_t z = 0; z < LUT_WIDTH; ++z) { + const int z_x = (z % macro_width) * LUT_WIDTH; + const int z_y = (z / macro_height) * LUT_WIDTH; + for (uint32_t y = 0; y < LUT_WIDTH; ++y) { + const uint32_t row_index = image_width * (z_y + y); + for (uint32_t x = 0; x < LUT_WIDTH; ++x) { + const uint32_t index = row_index + z_x + x; + memcpy(cursor, &data[pixel_size * index], + pixel_size); + + cursor += pixel_size; + } + } + } + + gs_texture_t *const texture = + gs_voltexture_create(LUT_WIDTH, LUT_WIDTH, LUT_WIDTH, format, 1, + (const uint8_t **)&buffer, 0); + bfree(buffer); + + return texture; +} + static void color_grade_filter_update(void *data, obs_data_t *settings) { struct lut_filter_data *filter = data; @@ -49,10 +101,17 @@ static void color_grade_filter_update(void *data, obs_data_t *settings) obs_enter_graphics(); - gs_image_file_init_texture(&filter->image); + gs_voltexture_destroy(filter->target); + if (filter->image.loaded) { + filter->target = make_clut_texture(filter->image.format, + filter->image.cx, + filter->image.cy, + filter->image.texture_data); + } - filter->target = filter->image.texture; filter->clut_amount = (float)clut_amount; + filter->clut_scale = (float)(LUT_WIDTH - 1) / (float)LUT_WIDTH; + filter->clut_offset = 0.5f / (float)LUT_WIDTH; char *effect_path = obs_module_file("color_grade_filter.effect"); gs_effect_destroy(filter->effect); @@ -121,6 +180,7 @@ static void color_grade_filter_destroy(void *data) obs_enter_graphics(); gs_effect_destroy(filter->effect); + gs_voltexture_destroy(filter->target); gs_image_file_free(&filter->image); obs_leave_graphics(); @@ -149,6 +209,12 @@ static void color_grade_filter_render(void *data, gs_effect_t *effect) param = gs_effect_get_param_by_name(filter->effect, "clut_amount"); gs_effect_set_float(param, filter->clut_amount); + param = gs_effect_get_param_by_name(filter->effect, "clut_scale"); + gs_effect_set_float(param, filter->clut_scale); + + param = gs_effect_get_param_by_name(filter->effect, "clut_offset"); + gs_effect_set_float(param, filter->clut_offset); + obs_source_process_filter_end(filter->context, filter->effect, 0, 0); UNUSED_PARAMETER(effect); diff --git a/plugins/obs-filters/data/color_grade_filter.effect b/plugins/obs-filters/data/color_grade_filter.effect index 02e02a8ec..8c9ba2041 100644 --- a/plugins/obs-filters/data/color_grade_filter.effect +++ b/plugins/obs-filters/data/color_grade_filter.effect @@ -1,13 +1,16 @@ uniform float4x4 ViewProj; uniform texture2d image; -uniform texture2d clut; +uniform texture3d clut; uniform float clut_amount; +uniform float clut_scale; +uniform float clut_offset; sampler_state textureSampler { Filter = Linear; AddressU = Clamp; AddressV = Clamp; + AddressW = Clamp; }; struct VertDataIn { @@ -31,29 +34,11 @@ VertDataOut VSDefault(VertDataIn v_in) float4 LUT(VertDataOut v_in) : TARGET { float4 textureColor = image.Sample(textureSampler, v_in.uv); - float blueColor = textureColor.b * 63.0; - float2 quad1; - quad1.y = floor(floor(blueColor) / 8.0); - quad1.x = floor(blueColor) - (quad1.y * 8.0); + float3 clut_uvw = textureColor.rgb * clut_scale + clut_offset; + float3 luttedColor = clut.Sample(textureSampler, clut_uvw).rgb; - float2 quad2; - quad2.y = floor(ceil(blueColor) / 8.0); - quad2.x = ceil(blueColor) - (quad2.y * 8.0); - - float2 texPos1; - texPos1.x = (quad1.x * 0.125) + 0.5/512.0 + ((0.125 - 1.0/512.0) * textureColor.r); - texPos1.y = (quad1.y * 0.125) + 0.5/512.0 + ((0.125 - 1.0/512.0) * textureColor.g); - - float2 texPos2; - texPos2.x = (quad2.x * 0.125) + 0.5/512.0 + ((0.125 - 1.0/512.0) * textureColor.r); - texPos2.y = (quad2.y * 0.125) + 0.5/512.0 + ((0.125 - 1.0/512.0) * textureColor.g); - - float4 newColor1 = clut.Sample(textureSampler, texPos1); - float4 newColor2 = clut.Sample(textureSampler, texPos2); - float4 luttedColor = lerp(newColor1, newColor2, frac(blueColor)); - - float4 final_color = lerp(textureColor, luttedColor, clut_amount); + float3 final_color = lerp(textureColor.rgb, luttedColor, clut_amount); return float4(final_color.rgb, textureColor.a); } -- GitLab