提交 e4d8eaa1 编写于 作者: J jpark37

obs-filters: Use volume texture for LUT

Simplifies shader calculations. Not much perf gain, but GPA shows
bottleneck shift from Shader Execution to Sampler as expected.
上级 8f6984e3
......@@ -12,6 +12,8 @@
/* clang-format on */
static const uint32_t LUT_WIDTH = 64;
struct lut_filter_data {
obs_source_t *context;
gs_effect_t *effect;
......@@ -20,6 +22,8 @@ struct lut_filter_data {
char *file;
float clut_amount;
float clut_scale;
float clut_offset;
};
static const char *color_grade_filter_get_name(void *unused)
......@@ -28,6 +32,54 @@ static const char *color_grade_filter_get_name(void *unused)
return obs_module_text("ColorGradeFilter");
}
static gs_texture_t *make_clut_texture(const enum gs_color_format format,
const uint32_t image_width,
const uint32_t image_height,
const uint8_t *data)
{
if (image_width % LUT_WIDTH != 0)
return NULL;
if (image_height % LUT_WIDTH != 0)
return NULL;
const uint32_t pixel_count = LUT_WIDTH * LUT_WIDTH * LUT_WIDTH;
if ((image_width * image_height) != pixel_count)
return NULL;
const uint32_t bpp = gs_get_format_bpp(format);
if (bpp % 8 != 0)
return NULL;
const uint32_t pixel_size = bpp / 8;
const uint32_t buffer_size = pixel_size * pixel_count;
uint8_t *const buffer = bmalloc(buffer_size);
const uint32_t macro_width = image_width / LUT_WIDTH;
const uint32_t macro_height = image_height / LUT_WIDTH;
uint8_t *cursor = buffer;
for (uint32_t z = 0; z < LUT_WIDTH; ++z) {
const int z_x = (z % macro_width) * LUT_WIDTH;
const int z_y = (z / macro_height) * LUT_WIDTH;
for (uint32_t y = 0; y < LUT_WIDTH; ++y) {
const uint32_t row_index = image_width * (z_y + y);
for (uint32_t x = 0; x < LUT_WIDTH; ++x) {
const uint32_t index = row_index + z_x + x;
memcpy(cursor, &data[pixel_size * index],
pixel_size);
cursor += pixel_size;
}
}
}
gs_texture_t *const texture =
gs_voltexture_create(LUT_WIDTH, LUT_WIDTH, LUT_WIDTH, format, 1,
(const uint8_t **)&buffer, 0);
bfree(buffer);
return texture;
}
static void color_grade_filter_update(void *data, obs_data_t *settings)
{
struct lut_filter_data *filter = data;
......@@ -49,10 +101,17 @@ static void color_grade_filter_update(void *data, obs_data_t *settings)
obs_enter_graphics();
gs_image_file_init_texture(&filter->image);
gs_voltexture_destroy(filter->target);
if (filter->image.loaded) {
filter->target = make_clut_texture(filter->image.format,
filter->image.cx,
filter->image.cy,
filter->image.texture_data);
}
filter->target = filter->image.texture;
filter->clut_amount = (float)clut_amount;
filter->clut_scale = (float)(LUT_WIDTH - 1) / (float)LUT_WIDTH;
filter->clut_offset = 0.5f / (float)LUT_WIDTH;
char *effect_path = obs_module_file("color_grade_filter.effect");
gs_effect_destroy(filter->effect);
......@@ -121,6 +180,7 @@ static void color_grade_filter_destroy(void *data)
obs_enter_graphics();
gs_effect_destroy(filter->effect);
gs_voltexture_destroy(filter->target);
gs_image_file_free(&filter->image);
obs_leave_graphics();
......@@ -149,6 +209,12 @@ static void color_grade_filter_render(void *data, gs_effect_t *effect)
param = gs_effect_get_param_by_name(filter->effect, "clut_amount");
gs_effect_set_float(param, filter->clut_amount);
param = gs_effect_get_param_by_name(filter->effect, "clut_scale");
gs_effect_set_float(param, filter->clut_scale);
param = gs_effect_get_param_by_name(filter->effect, "clut_offset");
gs_effect_set_float(param, filter->clut_offset);
obs_source_process_filter_end(filter->context, filter->effect, 0, 0);
UNUSED_PARAMETER(effect);
......
uniform float4x4 ViewProj;
uniform texture2d image;
uniform texture2d clut;
uniform texture3d clut;
uniform float clut_amount;
uniform float clut_scale;
uniform float clut_offset;
sampler_state textureSampler {
Filter = Linear;
AddressU = Clamp;
AddressV = Clamp;
AddressW = Clamp;
};
struct VertDataIn {
......@@ -31,29 +34,11 @@ VertDataOut VSDefault(VertDataIn v_in)
float4 LUT(VertDataOut v_in) : TARGET
{
float4 textureColor = image.Sample(textureSampler, v_in.uv);
float blueColor = textureColor.b * 63.0;
float2 quad1;
quad1.y = floor(floor(blueColor) / 8.0);
quad1.x = floor(blueColor) - (quad1.y * 8.0);
float3 clut_uvw = textureColor.rgb * clut_scale + clut_offset;
float3 luttedColor = clut.Sample(textureSampler, clut_uvw).rgb;
float2 quad2;
quad2.y = floor(ceil(blueColor) / 8.0);
quad2.x = ceil(blueColor) - (quad2.y * 8.0);
float2 texPos1;
texPos1.x = (quad1.x * 0.125) + 0.5/512.0 + ((0.125 - 1.0/512.0) * textureColor.r);
texPos1.y = (quad1.y * 0.125) + 0.5/512.0 + ((0.125 - 1.0/512.0) * textureColor.g);
float2 texPos2;
texPos2.x = (quad2.x * 0.125) + 0.5/512.0 + ((0.125 - 1.0/512.0) * textureColor.r);
texPos2.y = (quad2.y * 0.125) + 0.5/512.0 + ((0.125 - 1.0/512.0) * textureColor.g);
float4 newColor1 = clut.Sample(textureSampler, texPos1);
float4 newColor2 = clut.Sample(textureSampler, texPos2);
float4 luttedColor = lerp(newColor1, newColor2, frac(blueColor));
float4 final_color = lerp(textureColor, luttedColor, clut_amount);
float3 final_color = lerp(textureColor.rgb, luttedColor, clut_amount);
return float4(final_color.rgb, textureColor.a);
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册