未验证 提交 173eec0d 编写于 作者: J Jim 提交者: GitHub

Merge pull request #1978 from jpark37/defer-yuv-multiply

libobs: Rework RGB to YUV conversion
......@@ -6,7 +6,6 @@
uniform float4x4 ViewProj;
uniform texture2d image;
uniform float4x4 color_matrix;
uniform float2 base_dimension_i;
uniform float undistort_factor = 1.0;
......@@ -138,13 +137,6 @@ float4 PSDrawBicubicRGBADivide(VertData v_in) : TARGET
return float4(rgba.rgb * multiplier, alpha);
}
float4 PSDrawBicubicMatrix(VertData v_in) : TARGET
{
float3 rgb = DrawBicubic(v_in, false).rgb;
float3 yuv = mul(float4(saturate(rgb), 1.0), color_matrix).xyz;
return float4(yuv, 1.0);
}
technique Draw
{
pass
......@@ -171,12 +163,3 @@ technique DrawUndistort
pixel_shader = PSDrawBicubicRGBA(v_in, true);
}
}
technique DrawMatrix
{
pass
{
vertex_shader = VSDefault(v_in);
pixel_shader = PSDrawBicubicMatrix(v_in);
}
}
......@@ -5,7 +5,6 @@
uniform float4x4 ViewProj;
uniform texture2d image;
uniform float4x4 color_matrix;
sampler_state textureSampler {
Filter = Linear;
......@@ -66,13 +65,6 @@ float4 PSDrawLowresBilinearRGBADivide(VertData v_in) : TARGET
return float4(rgba.rgb * multiplier, alpha);
}
float4 PSDrawLowresBilinearMatrix(VertData v_in) : TARGET
{
float3 rgb = DrawLowresBilinear(v_in).rgb;
float3 yuv = mul(float4(saturate(rgb), 1.0), color_matrix).xyz;
return float4(yuv, 1.0);
}
technique Draw
{
pass
......@@ -91,12 +83,3 @@ technique DrawAlphaDivide
}
}
technique DrawMatrix
{
pass
{
vertex_shader = VSDefault(v_in);
pixel_shader = PSDrawLowresBilinearMatrix(v_in);
}
}
uniform float4x4 ViewProj;
uniform float4x4 color_matrix;
uniform texture2d image;
sampler_state def_sampler {
......@@ -34,13 +33,6 @@ float4 PSDrawAlphaDivide(VertInOut vert_in) : TARGET
return float4(rgba.rgb * multiplier, alpha);
}
float4 PSDrawMatrix(VertInOut vert_in) : TARGET
{
float3 rgb = image.Sample(def_sampler, vert_in.uv).rgb;
float3 yuv = mul(float4(rgb, 1.0), color_matrix).xyz;
return float4(yuv, 1.0);
}
technique Draw
{
pass
......@@ -58,12 +50,3 @@ technique DrawAlphaDivide
pixel_shader = PSDrawAlphaDivide(vert_in);
}
}
technique DrawMatrix
{
pass
{
vertex_shader = VSDefault(vert_in);
pixel_shader = PSDrawMatrix(vert_in);
}
}
......@@ -44,6 +44,10 @@ uniform float4x4 color_matrix;
uniform float3 color_range_min = {0.0, 0.0, 0.0};
uniform float3 color_range_max = {1.0, 1.0, 1.0};
uniform float4 color_vec_y;
uniform float4 color_vec_u;
uniform float4 color_vec_v;
uniform texture2d image;
sampler_state def_sampler {
......@@ -52,12 +56,33 @@ sampler_state def_sampler {
AddressV = Clamp;
};
struct VertInOut {
struct FragPos {
float4 pos : POSITION;
};
struct VertTexPos {
float2 uv : TEXCOORD0;
float4 pos : POSITION;
};
VertInOut VSDefault(uint id : VERTEXID)
struct FragTex {
float2 uv : TEXCOORD0;
};
FragPos VSPos(uint id : VERTEXID)
{
float idHigh = float(id >> 1);
float idLow = float(id & uint(1));
float x = idHigh * 4.0 - 1.0;
float y = idLow * 4.0 - 1.0;
FragPos vert_out;
vert_out.pos = float4(x, y, 0.0, 1.0);
return vert_out;
}
VertTexPos VSPosTex(uint id : VERTEXID)
{
float idHigh = float(id >> 1);
float idLow = float(id & uint(1));
......@@ -68,24 +93,22 @@ VertInOut VSDefault(uint id : VERTEXID)
float u = idHigh * 2.0;
float v = obs_glsl_compile ? (idLow * 2.0) : (1.0 - idLow * 2.0);
VertInOut vert_out;
vert_out.pos = float4(x, y, 0.0, 1.0);
VertTexPos vert_out;
vert_out.uv = float2(u, v);
vert_out.pos = float4(x, y, 0.0, 1.0);
return vert_out;
}
/* used to prevent internal GPU precision issues width fmod in particular */
#define PRECISION_OFFSET 0.2
float4 PSNV12(VertInOut vert_in) : TARGET
float4 PSNV12(FragTex frag_in) : TARGET
{
float v_mul = floor(vert_in.uv.y * input_height);
float v_mul = floor(frag_in.uv.y * input_height);
float byte_offset = floor((v_mul + vert_in.uv.x) * width) * 4.0;
float byte_offset = floor((v_mul + frag_in.uv.x) * width) * 4.0;
byte_offset += PRECISION_OFFSET;
float2 sample_pos[4];
if (byte_offset < u_plane_offset) {
#ifdef DEBUGGING
return float4(1.0, 1.0, 1.0, 1.0);
......@@ -98,19 +121,24 @@ float4 PSNV12(VertInOut vert_in) : TARGET
lum_u += width_i * 0.5;
lum_v += height_i * 0.5;
sample_pos[0] = float2(lum_u, lum_v);
sample_pos[1] = float2(lum_u += width_i, lum_v);
sample_pos[2] = float2(lum_u += width_i, lum_v);
sample_pos[3] = float2(lum_u + width_i, lum_v);
float4x4 out_val = float4x4(
image.Sample(def_sampler, sample_pos[0]),
image.Sample(def_sampler, sample_pos[1]),
image.Sample(def_sampler, sample_pos[2]),
image.Sample(def_sampler, sample_pos[3])
float2 sample_pos0 = float2(lum_u, lum_v);
float2 sample_pos1 = float2(lum_u += width_i, lum_v);
float2 sample_pos2 = float2(lum_u += width_i, lum_v);
float2 sample_pos3 = float2(lum_u + width_i, lum_v);
float3 rgb0 = image.Sample(def_sampler, sample_pos0).rgb;
float3 rgb1 = image.Sample(def_sampler, sample_pos1).rgb;
float3 rgb2 = image.Sample(def_sampler, sample_pos2).rgb;
float3 rgb3 = image.Sample(def_sampler, sample_pos3).rgb;
float4 out_val = float4(
dot(color_vec_y.xyz, rgb0) + color_vec_y.w,
dot(color_vec_y.xyz, rgb1) + color_vec_y.w,
dot(color_vec_y.xyz, rgb2) + color_vec_y.w,
dot(color_vec_y.xyz, rgb3) + color_vec_y.w
);
return transpose(out_val)[1];
return out_val;
} else {
#ifdef DEBUGGING
return float4(0.5, 0.2, 0.5, 0.2);
......@@ -127,34 +155,44 @@ float4 PSNV12(VertInOut vert_in) : TARGET
ch_u += width_i;
ch_v += height_i;
sample_pos[0] = float2(ch_u, ch_v);
sample_pos[1] = float2(ch_u + width_i2, ch_v);
float2 sample_pos0 = float2(ch_u, ch_v);
float2 sample_pos1 = float2(ch_u + width_i2, ch_v);
float3 rgb0 = image.Sample(def_sampler, sample_pos0).rgb;
float3 rgb1 = image.Sample(def_sampler, sample_pos1).rgb;
return float4(
image.Sample(def_sampler, sample_pos[0]).rb,
image.Sample(def_sampler, sample_pos[1]).rb
);
dot(color_vec_u.xyz, rgb0) + color_vec_u.w,
dot(color_vec_v.xyz, rgb0) + color_vec_v.w,
dot(color_vec_u.xyz, rgb1) + color_vec_u.w,
dot(color_vec_v.xyz, rgb1) + color_vec_v.w
);
}
}
float PSNV12_Y(VertInOut vert_in) : TARGET
float PSNV12_Y(FragPos frag_in) : TARGET
{
return image.Sample(def_sampler, vert_in.uv.xy).y;
float3 rgb = image.Load(int3(frag_in.pos.xy, 0)).rgb;
float y = dot(color_vec_y.xyz, rgb) + color_vec_y.w;
return y;
}
float2 PSNV12_UV(VertInOut vert_in) : TARGET
float2 PSNV12_UV(FragTex frag_in) : TARGET
{
return image.Sample(def_sampler, vert_in.uv.xy).xz;
float3 rgb = image.Sample(def_sampler, frag_in.uv).rgb;
float u = dot(color_vec_u.xyz, rgb) + color_vec_u.w;
float v = dot(color_vec_v.xyz, rgb) + color_vec_v.w;
return float2(u, v);
}
float4 PSPlanar420(VertInOut vert_in) : TARGET
float4 PSPlanar420(FragTex frag_in) : TARGET
{
float v_mul = floor(vert_in.uv.y * input_height);
float v_mul = floor(frag_in.uv.y * input_height);
float byte_offset = floor((v_mul + vert_in.uv.x) * width) * 4.0;
float byte_offset = floor((v_mul + frag_in.uv.x) * width) * 4.0;
byte_offset += PRECISION_OFFSET;
float2 sample_pos[4];
float2 sample_pos0, sample_pos1, sample_pos2, sample_pos3;
if (byte_offset < u_plane_offset) {
#ifdef DEBUGGING
......@@ -168,10 +206,10 @@ float4 PSPlanar420(VertInOut vert_in) : TARGET
lum_u += width_i * 0.5;
lum_v += height_i * 0.5;
sample_pos[0] = float2(lum_u, lum_v);
sample_pos[1] = float2(lum_u += width_i, lum_v);
sample_pos[2] = float2(lum_u += width_i, lum_v);
sample_pos[3] = float2(lum_u + width_i, lum_v);
sample_pos0 = float2(lum_u, lum_v);
sample_pos1 = float2(lum_u += width_i, lum_v);
sample_pos2 = float2(lum_u += width_i, lum_v);
sample_pos3 = float2(lum_u + width_i, lum_v);
} else {
#ifdef DEBUGGING
......@@ -200,42 +238,46 @@ float4 PSPlanar420(VertInOut vert_in) : TARGET
float ch_u_n = 0. + width_i;
float ch_v_n = ch_v + height_i * 3;
sample_pos[0] = float2(ch_u, ch_v);
sample_pos[1] = float2(ch_u += width_i2, ch_v);
sample_pos0 = float2(ch_u, ch_v);
sample_pos1 = float2(ch_u += width_i2, ch_v);
ch_u += width_i2;
// check if ch_u overflowed the current source and chroma line
if (ch_u > 1.0) {
sample_pos[2] = float2(ch_u_n, ch_v_n);
sample_pos[2] = float2(ch_u_n + width_i2, ch_v_n);
sample_pos2 = float2(ch_u_n, ch_v_n);
sample_pos2 = float2(ch_u_n + width_i2, ch_v_n);
} else {
sample_pos[2] = float2(ch_u, ch_v);
sample_pos[3] = float2(ch_u + width_i2, ch_v);
sample_pos2 = float2(ch_u, ch_v);
sample_pos3 = float2(ch_u + width_i2, ch_v);
}
}
float4x4 out_val = float4x4(
image.Sample(def_sampler, sample_pos[0]),
image.Sample(def_sampler, sample_pos[1]),
image.Sample(def_sampler, sample_pos[2]),
image.Sample(def_sampler, sample_pos[3])
);
out_val = transpose(out_val);
float3 rgb0 = image.Sample(def_sampler, sample_pos0).rgb;
float3 rgb1 = image.Sample(def_sampler, sample_pos1).rgb;
float3 rgb2 = image.Sample(def_sampler, sample_pos2).rgb;
float3 rgb3 = image.Sample(def_sampler, sample_pos3).rgb;
float4 color_vec;
if (byte_offset < u_plane_offset)
return out_val[1];
color_vec = color_vec_y;
else if (byte_offset < v_plane_offset)
return out_val[0];
color_vec = color_vec_u;
else
return out_val[2];
color_vec = color_vec_v;
return float4(
dot(color_vec.xyz, rgb0) + color_vec.w,
dot(color_vec.xyz, rgb1) + color_vec.w,
dot(color_vec.xyz, rgb2) + color_vec.w,
dot(color_vec.xyz, rgb3) + color_vec.w
);
}
float4 PSPlanar444(VertInOut vert_in) : TARGET
float4 PSPlanar444(FragTex frag_in) : TARGET
{
float v_mul = floor(vert_in.uv.y * input_height);
float v_mul = floor(frag_in.uv.y * input_height);
float byte_offset = floor((v_mul + vert_in.uv.x) * width) * 4.0;
float byte_offset = floor((v_mul + frag_in.uv.x) * width) * 4.0;
byte_offset += PRECISION_OFFSET;
float new_byte_offset = byte_offset;
......@@ -245,8 +287,6 @@ float4 PSPlanar444(VertInOut vert_in) : TARGET
else if (byte_offset >= u_plane_offset)
new_byte_offset -= u_plane_offset;
float2 sample_pos[4];
float u_val = floor(fmod(new_byte_offset, width)) * width_i;
float v_val = floor(new_byte_offset * width_i) * height_i;
......@@ -254,26 +294,30 @@ float4 PSPlanar444(VertInOut vert_in) : TARGET
u_val += width_i * 0.5;
v_val += height_i * 0.5;
sample_pos[0] = float2(u_val, v_val);
sample_pos[1] = float2(u_val += width_i, v_val);
sample_pos[2] = float2(u_val += width_i, v_val);
sample_pos[3] = float2(u_val + width_i, v_val);
float2 sample_pos0 = float2(u_val, v_val);
float2 sample_pos1 = float2(u_val += width_i, v_val);
float2 sample_pos2 = float2(u_val += width_i, v_val);
float2 sample_pos3 = float2(u_val + width_i, v_val);
float4x4 out_val = float4x4(
image.Sample(def_sampler, sample_pos[0]),
image.Sample(def_sampler, sample_pos[1]),
image.Sample(def_sampler, sample_pos[2]),
image.Sample(def_sampler, sample_pos[3])
);
out_val = transpose(out_val);
float3 rgb0 = image.Sample(def_sampler, sample_pos0).rgb;
float3 rgb1 = image.Sample(def_sampler, sample_pos1).rgb;
float3 rgb2 = image.Sample(def_sampler, sample_pos2).rgb;
float3 rgb3 = image.Sample(def_sampler, sample_pos3).rgb;
float4 color_vec;
if (byte_offset < u_plane_offset)
return out_val[1];
color_vec = color_vec_y;
else if (byte_offset < v_plane_offset)
return out_val[0];
color_vec = color_vec_u;
else
return out_val[2];
color_vec = color_vec_v;
return float4(
dot(color_vec.xyz, rgb0) + color_vec.w,
dot(color_vec.xyz, rgb1) + color_vec.w,
dot(color_vec.xyz, rgb2) + color_vec.w,
dot(color_vec.xyz, rgb3) + color_vec.w
);
}
float GetIntOffsetColor(int offset)
......@@ -283,12 +327,12 @@ float GetIntOffsetColor(int offset)
0)).r;
}
float4 PSPacked422_Reverse(VertInOut vert_in, int u_pos, int v_pos,
float4 PSPacked422_Reverse(FragTex frag_in, int u_pos, int v_pos,
int y0_pos, int y1_pos) : TARGET
{
float y = vert_in.uv.y;
float odd = floor(fmod(width * vert_in.uv.x + PRECISION_OFFSET, 2.0));
float x = floor(width_d2 * vert_in.uv.x + PRECISION_OFFSET) *
float y = frag_in.uv.y;
float odd = floor(fmod(width * frag_in.uv.x + PRECISION_OFFSET, 2.0));
float x = floor(width_d2 * frag_in.uv.x + PRECISION_OFFSET) *
width_d2_i;
x += input_width_i_d2;
......@@ -300,10 +344,10 @@ float4 PSPacked422_Reverse(VertInOut vert_in, int u_pos, int v_pos,
return saturate(mul(float4(yuv, 1.0), color_matrix));
}
float4 PSPlanar420_Reverse(VertInOut vert_in) : TARGET
float4 PSPlanar420_Reverse(FragTex frag_in) : TARGET
{
int x = int(vert_in.uv.x * width + PRECISION_OFFSET);
int y = int(vert_in.uv.y * height + PRECISION_OFFSET);
int x = int(frag_in.uv.x * width + PRECISION_OFFSET);
int y = int(frag_in.uv.y * height + PRECISION_OFFSET);
int lum_offset = y * int_width + x;
int chroma_offset = (y / 2) * (int_width / 2) + x / 2;
......@@ -319,10 +363,10 @@ float4 PSPlanar420_Reverse(VertInOut vert_in) : TARGET
return saturate(mul(float4(yuv, 1.0), color_matrix));
}
float4 PSPlanar444_Reverse(VertInOut vert_in) : TARGET
float4 PSPlanar444_Reverse(FragTex frag_in) : TARGET
{
int x = int(vert_in.uv.x * width + PRECISION_OFFSET);
int y = int(vert_in.uv.y * height + PRECISION_OFFSET);
int x = int(frag_in.uv.x * width + PRECISION_OFFSET);
int y = int(frag_in.uv.y * height + PRECISION_OFFSET);
int lum_offset = y * int_width + x;
int chroma_offset = y * int_width + x;
......@@ -338,10 +382,10 @@ float4 PSPlanar444_Reverse(VertInOut vert_in) : TARGET
return saturate(mul(float4(yuv, 1.0), color_matrix));
}
float4 PSNV12_Reverse(VertInOut vert_in) : TARGET
float4 PSNV12_Reverse(FragTex frag_in) : TARGET
{
int x = int(vert_in.uv.x * width + PRECISION_OFFSET);
int y = int(vert_in.uv.y * height + PRECISION_OFFSET);
int x = int(frag_in.uv.x * width + PRECISION_OFFSET);
int y = int(frag_in.uv.y * height + PRECISION_OFFSET);
int lum_offset = y * int_width + x;
int chroma_offset = (y / 2) * (int_width / 2) + x / 2;
......@@ -356,39 +400,39 @@ float4 PSNV12_Reverse(VertInOut vert_in) : TARGET
return saturate(mul(float4(yuv, 1.0), color_matrix));
}
float4 PSY800_Limited(VertInOut vert_in) : TARGET
float4 PSY800_Limited(FragTex frag_in) : TARGET
{
int x = int(vert_in.uv.x * width + PRECISION_OFFSET);
int y = int(vert_in.uv.y * height + PRECISION_OFFSET);
int x = int(frag_in.uv.x * width + PRECISION_OFFSET);
int y = int(frag_in.uv.y * height + PRECISION_OFFSET);
float limited = image.Load(int3(x, y, 0)).x;
float full = saturate((limited - (16.0 / 255.0)) * (255.0 / 219.0));
return float4(full, full, full, 1.0);
}
float4 PSY800_Full(VertInOut vert_in) : TARGET
float4 PSY800_Full(FragTex frag_in) : TARGET
{
int x = int(vert_in.uv.x * width + PRECISION_OFFSET);
int y = int(vert_in.uv.y * height + PRECISION_OFFSET);
int x = int(frag_in.uv.x * width + PRECISION_OFFSET);
int y = int(frag_in.uv.y * height + PRECISION_OFFSET);
float3 full = image.Load(int3(x, y, 0)).xxx;
return float4(full, 1.0);
}
float4 PSRGB_Limited(VertInOut vert_in) : TARGET
float4 PSRGB_Limited(FragTex frag_in) : TARGET
{
int x = int(vert_in.uv.x * width + PRECISION_OFFSET);
int y = int(vert_in.uv.y * height + PRECISION_OFFSET);
int x = int(frag_in.uv.x * width + PRECISION_OFFSET);
int y = int(frag_in.uv.y * height + PRECISION_OFFSET);
float4 rgba = image.Load(int3(x, y, 0));
rgba.rgb = saturate((rgba.rgb - (16.0 / 255.0)) * (255.0 / 219.0));
return rgba;
}
float4 PSBGR3_Limited(VertInOut vert_in) : TARGET
float4 PSBGR3_Limited(FragTex frag_in) : TARGET
{
int x = int(vert_in.uv.x * width * 3.0 + PRECISION_OFFSET);
int y = int(vert_in.uv.y * height + PRECISION_OFFSET);
int x = int(frag_in.uv.x * width * 3.0 + PRECISION_OFFSET);
int y = int(frag_in.uv.y * height + PRECISION_OFFSET);
float b = image.Load(int3(x - 1, y, 0)).x;
float g = image.Load(int3(x, y, 0)).x;
......@@ -398,10 +442,10 @@ float4 PSBGR3_Limited(VertInOut vert_in) : TARGET
return float4(rgb, 1.0);
}
float4 PSBGR3_Full(VertInOut vert_in) : TARGET
float4 PSBGR3_Full(FragTex frag_in) : TARGET
{
int x = int(vert_in.uv.x * width * 3.0 + PRECISION_OFFSET);
int y = int(vert_in.uv.y * height + PRECISION_OFFSET);
int x = int(frag_in.uv.x * width * 3.0 + PRECISION_OFFSET);
int y = int(frag_in.uv.y * height + PRECISION_OFFSET);
float b = image.Load(int3(x - 1, y, 0)).x;
float g = image.Load(int3(x, y, 0)).x;
......@@ -414,8 +458,8 @@ technique Planar420
{
pass
{
vertex_shader = VSDefault(id);
pixel_shader = PSPlanar420(vert_in);
vertex_shader = VSPosTex(id);
pixel_shader = PSPlanar420(frag_in);
}
}
......@@ -423,8 +467,8 @@ technique Planar444
{
pass
{
vertex_shader = VSDefault(id);
pixel_shader = PSPlanar444(vert_in);
vertex_shader = VSPosTex(id);
pixel_shader = PSPlanar444(frag_in);
}
}
......@@ -432,8 +476,8 @@ technique NV12
{
pass
{
vertex_shader = VSDefault(id);
pixel_shader = PSNV12(vert_in);
vertex_shader = VSPosTex(id);
pixel_shader = PSNV12(frag_in);
}
}
......@@ -441,8 +485,8 @@ technique NV12_Y
{
pass
{
vertex_shader = VSDefault(id);
pixel_shader = PSNV12_Y(vert_in);
vertex_shader = VSPos(id);
pixel_shader = PSNV12_Y(frag_in);
}
}
......@@ -450,8 +494,8 @@ technique NV12_UV
{
pass
{
vertex_shader = VSDefault(id);
pixel_shader = PSNV12_UV(vert_in);
vertex_shader = VSPosTex(id);
pixel_shader = PSNV12_UV(frag_in);
}
}
......@@ -459,8 +503,8 @@ technique UYVY_Reverse
{
pass
{
vertex_shader = VSDefault(id);
pixel_shader = PSPacked422_Reverse(vert_in, 2, 0, 1, 3);
vertex_shader = VSPosTex(id);
pixel_shader = PSPacked422_Reverse(frag_in, 2, 0, 1, 3);
}
}
......@@ -468,8 +512,8 @@ technique YUY2_Reverse
{
pass
{
vertex_shader = VSDefault(id);
pixel_shader = PSPacked422_Reverse(vert_in, 1, 3, 2, 0);
vertex_shader = VSPosTex(id);
pixel_shader = PSPacked422_Reverse(frag_in, 1, 3, 2, 0);
}
}
......@@ -477,8 +521,8 @@ technique YVYU_Reverse
{
pass
{
vertex_shader = VSDefault(id);
pixel_shader = PSPacked422_Reverse(vert_in, 3, 1, 2, 0);
vertex_shader = VSPosTex(id);
pixel_shader = PSPacked422_Reverse(frag_in, 3, 1, 2, 0);
}
}
......@@ -486,8 +530,8 @@ technique I420_Reverse
{
pass
{
vertex_shader = VSDefault(id);
pixel_shader = PSPlanar420_Reverse(vert_in);
vertex_shader = VSPosTex(id);
pixel_shader = PSPlanar420_Reverse(frag_in);
}
}
......@@ -495,8 +539,8 @@ technique I444_Reverse
{
pass
{
vertex_shader = VSDefault(id);
pixel_shader = PSPlanar444_Reverse(vert_in);
vertex_shader = VSPosTex(id);
pixel_shader = PSPlanar444_Reverse(frag_in);
}
}
......@@ -504,8 +548,8 @@ technique NV12_Reverse
{
pass
{
vertex_shader = VSDefault(id);
pixel_shader = PSNV12_Reverse(vert_in);
vertex_shader = VSPosTex(id);
pixel_shader = PSNV12_Reverse(frag_in);
}
}
......@@ -513,8 +557,8 @@ technique Y800_Limited
{
pass
{
vertex_shader = VSDefault(id);
pixel_shader = PSY800_Limited(vert_in);
vertex_shader = VSPosTex(id);
pixel_shader = PSY800_Limited(frag_in);
}
}
......@@ -522,8 +566,8 @@ technique Y800_Full
{
pass
{
vertex_shader = VSDefault(id);
pixel_shader = PSY800_Full(vert_in);
vertex_shader = VSPosTex(id);
pixel_shader = PSY800_Full(frag_in);
}
}
......@@ -531,8 +575,8 @@ technique RGB_Limited
{
pass
{
vertex_shader = VSDefault(id);
pixel_shader = PSRGB_Limited(vert_in);
vertex_shader = VSPosTex(id);
pixel_shader = PSRGB_Limited(frag_in);
}
}
......@@ -540,8 +584,8 @@ technique BGR3_Limited
{
pass
{
vertex_shader = VSDefault(id);
pixel_shader = PSBGR3_Limited(vert_in);
vertex_shader = VSPosTex(id);
pixel_shader = PSBGR3_Limited(frag_in);
}
}
......@@ -549,7 +593,7 @@ technique BGR3_Full
{
pass
{
vertex_shader = VSDefault(id);
pixel_shader = PSBGR3_Full(vert_in);
vertex_shader = VSPosTex(id);
pixel_shader = PSBGR3_Full(frag_in);
}
}
......@@ -6,7 +6,6 @@
uniform float4x4 ViewProj;
uniform texture2d image;
uniform float4x4 color_matrix;
uniform float2 base_dimension_i;
uniform float undistort_factor = 1.0;
......@@ -146,13 +145,6 @@ float4 PSDrawLanczosRGBADivide(FragData v_in) : TARGET
return float4(rgba.rgb * multiplier, alpha);
}
float4 PSDrawLanczosMatrix(FragData v_in) : TARGET
{
float3 rgb = DrawLanczos(v_in, false).rgb;
float3 yuv = mul(float4(saturate(rgb), 1.0), color_matrix).xyz;
return float4(yuv, 1.0);
}
technique Draw
{
pass
......@@ -179,12 +171,3 @@ technique DrawUndistort
pixel_shader = PSDrawLanczosRGBA(v_in, true);
}
}
technique DrawMatrix
{
pass
{
vertex_shader = VSDefault(v_in);
pixel_shader = PSDrawLanczosMatrix(v_in);
}
}
......@@ -200,19 +200,12 @@ static inline gs_effect_t *get_scale_effect(struct obs_core_video *video,
}
static const char *render_output_texture_name = "render_output_texture";
static inline void render_output_texture(struct obs_core_video *video)
static inline gs_texture_t *render_output_texture(struct obs_core_video *video)
{
profile_start(render_output_texture_name);
gs_texture_t *texture = video->render_texture;
gs_texture_t *target = video->output_texture;
uint32_t width = gs_texture_get_width(target);
uint32_t height = gs_texture_get_height(target);
struct vec2 base, base_i;
vec2_set(&base, (float)video->base_width, (float)video->base_height);
vec2_set(&base_i, 1.0f / (float)video->base_width,
1.0f / (float)video->base_height);
gs_effect_t *effect = get_scale_effect(video, width, height);
gs_technique_t *tech;
......@@ -220,12 +213,17 @@ static inline void render_output_texture(struct obs_core_video *video)
if (video->ovi.output_format == VIDEO_FORMAT_RGBA) {
tech = gs_effect_get_technique(effect, "DrawAlphaDivide");
} else {
tech = gs_effect_get_technique(effect, "DrawMatrix");
if ((effect == video->default_effect) &&
(width == video->base_width) &&
(height == video->base_height))
return texture;
tech = gs_effect_get_technique(effect, "Draw");
}
profile_start(render_output_texture_name);
gs_eparam_t *image = gs_effect_get_param_by_name(effect, "image");
gs_eparam_t *matrix =
gs_effect_get_param_by_name(effect, "color_matrix");
gs_eparam_t *bres =
gs_effect_get_param_by_name(effect, "base_dimension");
gs_eparam_t *bres_i =
......@@ -235,12 +233,20 @@ static inline void render_output_texture(struct obs_core_video *video)
gs_set_render_target(target, NULL);
set_render_size(width, height);
if (bres)
if (bres) {
struct vec2 base;
vec2_set(&base, (float)video->base_width,
(float)video->base_height);
gs_effect_set_vec2(bres, &base);
if (bres_i)
}
if (bres_i) {
struct vec2 base_i;
vec2_set(&base_i, 1.0f / (float)video->base_width,
1.0f / (float)video->base_height);
gs_effect_set_vec2(bres_i, &base_i);
}
gs_effect_set_val(matrix, video->color_matrix, sizeof(float) * 16);
gs_effect_set_texture(image, texture);
gs_enable_blending(false);
......@@ -254,6 +260,8 @@ static inline void render_output_texture(struct obs_core_video *video)
gs_enable_blending(true);
profile_end(render_output_texture_name);
return target;
}
static inline void set_eparam(gs_effect_t *effect, const char *name, float val)
......@@ -263,17 +271,23 @@ static inline void set_eparam(gs_effect_t *effect, const char *name, float val)
}
static const char *render_convert_texture_name = "render_convert_texture";
static void render_convert_texture(struct obs_core_video *video)
static void render_convert_texture(struct obs_core_video *video,
gs_texture_t *texture)
{
profile_start(render_convert_texture_name);
gs_texture_t *texture = video->output_texture;
gs_texture_t *target = video->convert_texture;
float fwidth = (float)video->output_width;
float fheight = (float)video->output_height;
size_t passes, i;
gs_effect_t *effect = video->conversion_effect;
gs_eparam_t *color_vec_y =
gs_effect_get_param_by_name(effect, "color_vec_y");
gs_eparam_t *color_vec_u =
gs_effect_get_param_by_name(effect, "color_vec_u");
gs_eparam_t *color_vec_v =
gs_effect_get_param_by_name(effect, "color_vec_v");
gs_eparam_t *image = gs_effect_get_param_by_name(effect, "image");
gs_technique_t *tech =
gs_effect_get_technique(effect, video->conversion_tech);
......@@ -290,6 +304,17 @@ static void render_convert_texture(struct obs_core_video *video)
set_eparam(effect, "height_d2_i", 1.0f / (fheight * 0.5f));
set_eparam(effect, "input_height", (float)video->conversion_height);
struct vec4 vec_y, vec_u, vec_v;
vec4_set(&vec_y, video->color_matrix[4], video->color_matrix[5],
video->color_matrix[6], video->color_matrix[7]);
vec4_set(&vec_u, video->color_matrix[0], video->color_matrix[1],
video->color_matrix[2], video->color_matrix[3]);
vec4_set(&vec_v, video->color_matrix[8], video->color_matrix[9],
video->color_matrix[10], video->color_matrix[11]);
gs_effect_set_vec4(color_vec_y, &vec_y);
gs_effect_set_vec4(color_vec_u, &vec_u);
gs_effect_set_vec4(color_vec_v, &vec_v);
gs_effect_set_texture(image, texture);
gs_set_render_target(target, NULL);
......@@ -310,16 +335,32 @@ static void render_convert_texture(struct obs_core_video *video)
profile_end(render_convert_texture_name);
}
static void render_nv12(struct obs_core_video *video, gs_texture_t *target,
const char *tech_name, uint32_t width, uint32_t height)
static void render_nv12(struct obs_core_video *video, gs_texture_t *texture,
gs_texture_t *target, const char *tech_name,
uint32_t width, uint32_t height)
{
gs_texture_t *texture = video->output_texture;
gs_effect_t *effect = video->conversion_effect;
gs_eparam_t *color_vec_y =
gs_effect_get_param_by_name(effect, "color_vec_y");
gs_eparam_t *color_vec_u =
gs_effect_get_param_by_name(effect, "color_vec_u");
gs_eparam_t *color_vec_v =
gs_effect_get_param_by_name(effect, "color_vec_v");
gs_eparam_t *image = gs_effect_get_param_by_name(effect, "image");
gs_technique_t *tech = gs_effect_get_technique(effect, tech_name);
size_t passes, i;
struct vec4 vec_y, vec_u, vec_v;
vec4_set(&vec_y, video->color_matrix[4], video->color_matrix[5],
video->color_matrix[6], video->color_matrix[7]);
vec4_set(&vec_u, video->color_matrix[0], video->color_matrix[1],
video->color_matrix[2], video->color_matrix[3]);
vec4_set(&vec_v, video->color_matrix[8], video->color_matrix[9],
video->color_matrix[10], video->color_matrix[11]);
gs_effect_set_vec4(color_vec_y, &vec_y);
gs_effect_set_vec4(color_vec_u, &vec_u);
gs_effect_set_vec4(color_vec_v, &vec_v);
gs_effect_set_texture(image, texture);
gs_set_render_target(target, NULL);
......@@ -337,13 +378,14 @@ static void render_nv12(struct obs_core_video *video, gs_texture_t *target,
}
static const char *render_convert_nv12_name = "render_convert_texture_nv12";
static void render_convert_texture_nv12(struct obs_core_video *video)
static void render_convert_texture_nv12(struct obs_core_video *video,
gs_texture_t *texture)
{
profile_start(render_convert_nv12_name);
render_nv12(video, video->convert_texture, "NV12_Y",
render_nv12(video, texture, video->convert_texture, "NV12_Y",
video->output_width, video->output_height);
render_nv12(video, video->convert_uv_texture, "NV12_UV",
render_nv12(video, texture, video->convert_uv_texture, "NV12_UV",
video->output_width / 2, video->output_height / 2);
video->texture_converted = true;
......@@ -353,11 +395,10 @@ static void render_convert_texture_nv12(struct obs_core_video *video)
static const char *stage_output_texture_name = "stage_output_texture";
static inline void stage_output_texture(struct obs_core_video *video,
int cur_texture)
gs_texture_t *texture, int cur_texture)
{
profile_start(stage_output_texture_name);
gs_texture_t *texture;
bool texture_ready;
gs_stagesurf_t *copy = video->copy_surfaces[cur_texture];
......@@ -365,7 +406,6 @@ static inline void stage_output_texture(struct obs_core_video *video,
texture = video->convert_texture;
texture_ready = video->texture_converted;
} else {
texture = video->output_texture;
texture_ready = true;
}
......@@ -486,7 +526,7 @@ static inline void render_video(struct obs_core_video *video, bool raw_active,
render_main_texture(video);
if (raw_active || gpu_active) {
render_output_texture(video);
gs_texture_t *texture = render_output_texture(video);
#ifdef _WIN32
if (gpu_active) {
......@@ -496,9 +536,9 @@ static inline void render_video(struct obs_core_video *video, bool raw_active,
if (video->gpu_conversion) {
if (video->using_nv12_tex)
render_convert_texture_nv12(video);
render_convert_texture_nv12(video, texture);
else
render_convert_texture(video);
render_convert_texture(video, texture);
}
#ifdef _WIN32
......@@ -508,7 +548,7 @@ static inline void render_video(struct obs_core_video *video, bool raw_active,
}
#endif
if (raw_active)
stage_output_texture(video, cur_texture);
stage_output_texture(video, texture, cur_texture);
}
gs_set_render_target(NULL, NULL);
......@@ -632,30 +672,6 @@ static void set_gpu_converted_data(struct obs_core_video *video,
}
}
static void convert_frame(struct video_frame *output,
const struct video_data *input,
const struct video_output_info *info)
{
if (info->format == VIDEO_FORMAT_I420) {
compress_uyvx_to_i420(input->data[0], input->linesize[0], 0,
info->height, output->data,
output->linesize);
} else if (info->format == VIDEO_FORMAT_NV12) {
compress_uyvx_to_nv12(input->data[0], input->linesize[0], 0,
info->height, output->data,
output->linesize);
} else if (info->format == VIDEO_FORMAT_I444) {
convert_uyvx_to_i444(input->data[0], input->linesize[0], 0,
info->height, output->data,
output->linesize);
} else {
blog(LOG_ERROR, "convert_frame: unsupported texture format");
}
}
static inline void copy_rgbx_frame(struct video_frame *output,
const struct video_data *input,
const struct video_output_info *info)
......@@ -690,9 +706,6 @@ static inline void output_video_data(struct obs_core_video *video,
if (video->gpu_conversion) {
set_gpu_converted_data(video, &output_frame,
input_frame, info);
} else if (format_is_yuv(info->format)) {
convert_frame(&output_frame, input_frame, info);
} else {
copy_rgbx_frame(&output_frame, input_frame, info);
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册