diff --git a/libobs/data/bicubic_scale.effect b/libobs/data/bicubic_scale.effect
index 0f55292d3e988eb1cc331884337503f558eef9be..5ae2dfc2bd4f3a3feb706091d8ce2487aba91d9c 100644
--- a/libobs/data/bicubic_scale.effect
+++ b/libobs/data/bicubic_scale.effect
@@ -6,7 +6,6 @@
 
 uniform float4x4 ViewProj;
 uniform texture2d image;
-uniform float4x4 color_matrix;
 uniform float2 base_dimension_i;
 uniform float undistort_factor = 1.0;
 
@@ -138,13 +137,6 @@ float4 PSDrawBicubicRGBADivide(VertData v_in) : TARGET
 	return float4(rgba.rgb * multiplier, alpha);
 }
 
-float4 PSDrawBicubicMatrix(VertData v_in) : TARGET
-{
-	float3 rgb = DrawBicubic(v_in, false).rgb;
-	float3 yuv = mul(float4(saturate(rgb), 1.0), color_matrix).xyz;
-	return float4(yuv, 1.0);
-}
-
 technique Draw
 {
 	pass
@@ -171,12 +163,3 @@ technique DrawUndistort
 		pixel_shader  = PSDrawBicubicRGBA(v_in, true);
 	}
 }
-
-technique DrawMatrix
-{
-	pass
-	{
-		vertex_shader = VSDefault(v_in);
-		pixel_shader  = PSDrawBicubicMatrix(v_in);
-	}
-}
diff --git a/libobs/data/bilinear_lowres_scale.effect b/libobs/data/bilinear_lowres_scale.effect
index 1ee03698d208a871609be770e7d0f5c7322d6620..72762399a7e66e650136c22b481cbb64531bad9b 100644
--- a/libobs/data/bilinear_lowres_scale.effect
+++ b/libobs/data/bilinear_lowres_scale.effect
@@ -5,7 +5,6 @@
 
 uniform float4x4 ViewProj;
 uniform texture2d image;
-uniform float4x4 color_matrix;
 
 sampler_state textureSampler {
 	Filter    = Linear;
@@ -66,13 +65,6 @@ float4 PSDrawLowresBilinearRGBADivide(VertData v_in) : TARGET
 	return float4(rgba.rgb * multiplier, alpha);
 }
 
-float4 PSDrawLowresBilinearMatrix(VertData v_in) : TARGET
-{
-	float3 rgb = DrawLowresBilinear(v_in).rgb;
-	float3 yuv = mul(float4(saturate(rgb), 1.0), color_matrix).xyz;
-	return float4(yuv, 1.0);
-}
-
 technique Draw
 {
 	pass
@@ -91,12 +83,3 @@ technique DrawAlphaDivide
 	}
 }
 
-technique DrawMatrix
-{
-	pass
-	{
-		vertex_shader = VSDefault(v_in);
-		pixel_shader  = PSDrawLowresBilinearMatrix(v_in);
-	}
-}
-
diff --git a/libobs/data/default.effect b/libobs/data/default.effect
index 54e07fa1a30805e47f19fd59ad44d1c6ffe44d27..ebc6a7c93c8f561f762c986255557048ef5d5739 100644
--- a/libobs/data/default.effect
+++ b/libobs/data/default.effect
@@ -1,5 +1,4 @@
 uniform float4x4 ViewProj;
-uniform float4x4 color_matrix;
 uniform texture2d image;
 
 sampler_state def_sampler {
@@ -34,13 +33,6 @@ float4 PSDrawAlphaDivide(VertInOut vert_in) : TARGET
 	return float4(rgba.rgb * multiplier, alpha);
 }
 
-float4 PSDrawMatrix(VertInOut vert_in) : TARGET
-{
-	float3 rgb = image.Sample(def_sampler, vert_in.uv).rgb;
-	float3 yuv = mul(float4(rgb, 1.0), color_matrix).xyz;
-	return float4(yuv, 1.0);
-}
-
 technique Draw
 {
 	pass
@@ -58,12 +50,3 @@ technique DrawAlphaDivide
 		pixel_shader  = PSDrawAlphaDivide(vert_in);
 	}
 }
-
-technique DrawMatrix
-{
-	pass
-	{
-		vertex_shader = VSDefault(vert_in);
-		pixel_shader  = PSDrawMatrix(vert_in);
-	}
-}
diff --git a/libobs/data/format_conversion.effect b/libobs/data/format_conversion.effect
index 93e4163b26067877475cc183bb89e299fefccf98..c3aa18f5276bcb00adf66f60fa8b73e1d66ff188 100644
--- a/libobs/data/format_conversion.effect
+++ b/libobs/data/format_conversion.effect
@@ -44,6 +44,10 @@ uniform float4x4  color_matrix;
 uniform float3    color_range_min = {0.0, 0.0, 0.0};
 uniform float3    color_range_max = {1.0, 1.0, 1.0};
 
+uniform float4    color_vec_y;
+uniform float4    color_vec_u;
+uniform float4    color_vec_v;
+
 uniform texture2d image;
 
 sampler_state def_sampler {
@@ -52,12 +56,33 @@ sampler_state def_sampler {
 	AddressV = Clamp;
 };
 
-struct VertInOut {
+struct FragPos {
 	float4 pos : POSITION;
+};
+
+struct VertTexPos {
 	float2 uv  : TEXCOORD0;
+	float4 pos : POSITION;
 };
 
-VertInOut VSDefault(uint id : VERTEXID)
+struct FragTex {
+	float2 uv  : TEXCOORD0;
+};
+
+FragPos VSPos(uint id : VERTEXID)
+{
+	float idHigh = float(id >> 1);
+	float idLow = float(id & uint(1));
+
+	float x = idHigh * 4.0 - 1.0;
+	float y = idLow * 4.0 - 1.0;
+
+	FragPos vert_out;
+	vert_out.pos = float4(x, y, 0.0, 1.0);
+	return vert_out;
+}
+
+VertTexPos VSPosTex(uint id : VERTEXID)
 {
 	float idHigh = float(id >> 1);
 	float idLow = float(id & uint(1));
@@ -68,24 +93,22 @@ VertInOut VSDefault(uint id : VERTEXID)
 	float u = idHigh * 2.0;
 	float v = obs_glsl_compile ? (idLow * 2.0) : (1.0 - idLow * 2.0);
 
-	VertInOut vert_out;
-	vert_out.pos = float4(x, y, 0.0, 1.0);
+	VertTexPos vert_out;
 	vert_out.uv = float2(u, v);
+	vert_out.pos = float4(x, y, 0.0, 1.0);
 	return vert_out;
 }
 
 /* used to prevent internal GPU precision issues width fmod in particular */
 #define PRECISION_OFFSET 0.2
 
-float4 PSNV12(VertInOut vert_in) : TARGET
+float4 PSNV12(FragTex frag_in) : TARGET
 {
-	float v_mul = floor(vert_in.uv.y * input_height);
+	float v_mul = floor(frag_in.uv.y * input_height);
 
-	float byte_offset = floor((v_mul + vert_in.uv.x) * width) * 4.0;
+	float byte_offset = floor((v_mul + frag_in.uv.x) * width) * 4.0;
 	byte_offset += PRECISION_OFFSET;
 
-	float2 sample_pos[4];
-
 	if (byte_offset < u_plane_offset) {
 #ifdef DEBUGGING
 		return float4(1.0, 1.0, 1.0, 1.0);
@@ -98,19 +121,24 @@ float4 PSNV12(VertInOut vert_in) : TARGET
 		lum_u += width_i  * 0.5;
 		lum_v += height_i * 0.5;
 
-		sample_pos[0] = float2(lum_u,            lum_v);
-		sample_pos[1] = float2(lum_u += width_i, lum_v);
-		sample_pos[2] = float2(lum_u += width_i, lum_v);
-		sample_pos[3] = float2(lum_u +  width_i, lum_v);
-
-		float4x4 out_val = float4x4(
-			image.Sample(def_sampler, sample_pos[0]),
-			image.Sample(def_sampler, sample_pos[1]),
-			image.Sample(def_sampler, sample_pos[2]),
-			image.Sample(def_sampler, sample_pos[3])
+		float2 sample_pos0 = float2(lum_u,            lum_v);
+		float2 sample_pos1 = float2(lum_u += width_i, lum_v);
+		float2 sample_pos2 = float2(lum_u += width_i, lum_v);
+		float2 sample_pos3 = float2(lum_u +  width_i, lum_v);
+
+		float3 rgb0 = image.Sample(def_sampler, sample_pos0).rgb;
+		float3 rgb1 = image.Sample(def_sampler, sample_pos1).rgb;
+		float3 rgb2 = image.Sample(def_sampler, sample_pos2).rgb;
+		float3 rgb3 = image.Sample(def_sampler, sample_pos3).rgb;
+
+		float4 out_val = float4(
+			dot(color_vec_y.xyz, rgb0) + color_vec_y.w,
+			dot(color_vec_y.xyz, rgb1) + color_vec_y.w,
+			dot(color_vec_y.xyz, rgb2) + color_vec_y.w,
+			dot(color_vec_y.xyz, rgb3) + color_vec_y.w
 		);
 
-		return transpose(out_val)[1];
+		return out_val;
 	} else {
 #ifdef DEBUGGING
 		return float4(0.5, 0.2, 0.5, 0.2);
@@ -127,34 +155,44 @@ float4 PSNV12(VertInOut vert_in) : TARGET
 		ch_u += width_i;
 		ch_v += height_i;
 
-		sample_pos[0] = float2(ch_u,             ch_v);
-		sample_pos[1] = float2(ch_u + width_i2,  ch_v);
-		
+		float2 sample_pos0 = float2(ch_u,             ch_v);
+		float2 sample_pos1 = float2(ch_u + width_i2,  ch_v);
+
+		float3 rgb0 = image.Sample(def_sampler, sample_pos0).rgb;
+		float3 rgb1 = image.Sample(def_sampler, sample_pos1).rgb;
+
 		return float4(
-				image.Sample(def_sampler, sample_pos[0]).rb,
-				image.Sample(def_sampler, sample_pos[1]).rb
-				);
+			dot(color_vec_u.xyz, rgb0) + color_vec_u.w,
+			dot(color_vec_v.xyz, rgb0) + color_vec_v.w,
+			dot(color_vec_u.xyz, rgb1) + color_vec_u.w,
+			dot(color_vec_v.xyz, rgb1) + color_vec_v.w
+		);
 	}
 }
 
-float PSNV12_Y(VertInOut vert_in) : TARGET
+float PSNV12_Y(FragPos frag_in) : TARGET
 {
-	return image.Sample(def_sampler, vert_in.uv.xy).y;
+	float3 rgb = image.Load(int3(frag_in.pos.xy, 0)).rgb;
+	float y = dot(color_vec_y.xyz, rgb) + color_vec_y.w;
+	return y;
 }
 
-float2 PSNV12_UV(VertInOut vert_in) : TARGET
+float2 PSNV12_UV(FragTex frag_in) : TARGET
 {
-	return image.Sample(def_sampler, vert_in.uv.xy).xz;
+	float3 rgb = image.Sample(def_sampler, frag_in.uv).rgb;
+	float u = dot(color_vec_u.xyz, rgb) + color_vec_u.w;
+	float v = dot(color_vec_v.xyz, rgb) + color_vec_v.w;
+	return float2(u, v);
 }
 
-float4 PSPlanar420(VertInOut vert_in) : TARGET
+float4 PSPlanar420(FragTex frag_in) : TARGET
 {
-	float v_mul = floor(vert_in.uv.y * input_height);
+	float v_mul = floor(frag_in.uv.y * input_height);
 
-	float byte_offset = floor((v_mul + vert_in.uv.x) * width) * 4.0;
+	float byte_offset = floor((v_mul + frag_in.uv.x) * width) * 4.0;
 	byte_offset += PRECISION_OFFSET;
 
-	float2 sample_pos[4];
+	float2 sample_pos0, sample_pos1, sample_pos2, sample_pos3;
 
 	if (byte_offset < u_plane_offset) {
 #ifdef DEBUGGING
@@ -168,10 +206,10 @@ float4 PSPlanar420(VertInOut vert_in) : TARGET
 		lum_u += width_i  * 0.5;
 		lum_v += height_i * 0.5;
 
-		sample_pos[0] = float2(lum_u,            lum_v);
-		sample_pos[1] = float2(lum_u += width_i, lum_v);
-		sample_pos[2] = float2(lum_u += width_i, lum_v);
-		sample_pos[3] = float2(lum_u +  width_i, lum_v);
+		sample_pos0 = float2(lum_u,            lum_v);
+		sample_pos1 = float2(lum_u += width_i, lum_v);
+		sample_pos2 = float2(lum_u += width_i, lum_v);
+		sample_pos3 = float2(lum_u +  width_i, lum_v);
 
 	} else {
 #ifdef DEBUGGING
@@ -200,42 +238,46 @@ float4 PSPlanar420(VertInOut vert_in) : TARGET
 		float ch_u_n = 0.   + width_i;
 		float ch_v_n = ch_v + height_i * 3;
 
-		sample_pos[0] = float2(ch_u,             ch_v);
-		sample_pos[1] = float2(ch_u += width_i2, ch_v);
+		sample_pos0 = float2(ch_u,             ch_v);
+		sample_pos1 = float2(ch_u += width_i2, ch_v);
 
 		ch_u += width_i2;
 		// check if ch_u overflowed the current source and chroma line
 		if (ch_u > 1.0) {
-			sample_pos[2] = float2(ch_u_n,            ch_v_n);
-			sample_pos[2] = float2(ch_u_n + width_i2, ch_v_n);
+			sample_pos2 = float2(ch_u_n,            ch_v_n);
+			sample_pos2 = float2(ch_u_n + width_i2, ch_v_n);
 		} else {
-			sample_pos[2] = float2(ch_u,             ch_v);
-			sample_pos[3] = float2(ch_u +  width_i2, ch_v);
+			sample_pos2 = float2(ch_u,            ch_v);
+			sample_pos3 = float2(ch_u + width_i2, ch_v);
 		}
 	}
 
-	float4x4 out_val = float4x4(
-		image.Sample(def_sampler, sample_pos[0]),
-		image.Sample(def_sampler, sample_pos[1]),
-		image.Sample(def_sampler, sample_pos[2]),
-		image.Sample(def_sampler, sample_pos[3])
-	);
-
-	out_val = transpose(out_val);
+	float3 rgb0 = image.Sample(def_sampler, sample_pos0).rgb;
+	float3 rgb1 = image.Sample(def_sampler, sample_pos1).rgb;
+	float3 rgb2 = image.Sample(def_sampler, sample_pos2).rgb;
+	float3 rgb3 = image.Sample(def_sampler, sample_pos3).rgb;
 
+	float4 color_vec;
 	if (byte_offset < u_plane_offset)
-		return out_val[1];
+		color_vec = color_vec_y;
 	else if (byte_offset < v_plane_offset)
-		return out_val[0];
+		color_vec = color_vec_u;
 	else
-		return out_val[2];
+		color_vec = color_vec_v;
+
+	return float4(
+		dot(color_vec.xyz, rgb0) + color_vec.w,
+		dot(color_vec.xyz, rgb1) + color_vec.w,
+		dot(color_vec.xyz, rgb2) + color_vec.w,
+		dot(color_vec.xyz, rgb3) + color_vec.w
+	);
 }
 
-float4 PSPlanar444(VertInOut vert_in) : TARGET
+float4 PSPlanar444(FragTex frag_in) : TARGET
 {
-	float v_mul = floor(vert_in.uv.y * input_height);
+	float v_mul = floor(frag_in.uv.y * input_height);
 
-	float byte_offset = floor((v_mul + vert_in.uv.x) * width) * 4.0;
+	float byte_offset = floor((v_mul + frag_in.uv.x) * width) * 4.0;
 	byte_offset += PRECISION_OFFSET;
 
 	float new_byte_offset = byte_offset;
@@ -245,8 +287,6 @@ float4 PSPlanar444(VertInOut vert_in) : TARGET
 	else if (byte_offset >= u_plane_offset)
 		new_byte_offset -= u_plane_offset;
 
-	float2 sample_pos[4];
-
 	float u_val = floor(fmod(new_byte_offset, width)) * width_i;
 	float v_val = floor(new_byte_offset * width_i)    * height_i;
 
@@ -254,26 +294,30 @@ float4 PSPlanar444(VertInOut vert_in) : TARGET
 	u_val += width_i  * 0.5;
 	v_val += height_i * 0.5;
 
-	sample_pos[0] = float2(u_val,            v_val);
-	sample_pos[1] = float2(u_val += width_i, v_val);
-	sample_pos[2] = float2(u_val += width_i, v_val);
-	sample_pos[3] = float2(u_val +  width_i, v_val);
+	float2 sample_pos0 = float2(u_val,            v_val);
+	float2 sample_pos1 = float2(u_val += width_i, v_val);
+	float2 sample_pos2 = float2(u_val += width_i, v_val);
+	float2 sample_pos3 = float2(u_val +  width_i, v_val);
 
-	float4x4 out_val = float4x4(
-		image.Sample(def_sampler, sample_pos[0]),
-		image.Sample(def_sampler, sample_pos[1]),
-		image.Sample(def_sampler, sample_pos[2]),
-		image.Sample(def_sampler, sample_pos[3])
-	);
-
-	out_val = transpose(out_val);
+	float3 rgb0 = image.Sample(def_sampler, sample_pos0).rgb;
+	float3 rgb1 = image.Sample(def_sampler, sample_pos1).rgb;
+	float3 rgb2 = image.Sample(def_sampler, sample_pos2).rgb;
+	float3 rgb3 = image.Sample(def_sampler, sample_pos3).rgb;
 
+	float4 color_vec;
 	if (byte_offset < u_plane_offset)
-		return out_val[1];
+		color_vec = color_vec_y;
 	else if (byte_offset < v_plane_offset)
-		return out_val[0];
+		color_vec = color_vec_u;
 	else
-		return out_val[2];
+		color_vec = color_vec_v;
+
+	return float4(
+		dot(color_vec.xyz, rgb0) + color_vec.w,
+		dot(color_vec.xyz, rgb1) + color_vec.w,
+		dot(color_vec.xyz, rgb2) + color_vec.w,
+		dot(color_vec.xyz, rgb3) + color_vec.w
+	);
 }
 
 float GetIntOffsetColor(int offset)
@@ -283,12 +327,12 @@ float GetIntOffsetColor(int offset)
 	                       0)).r;
 }
 
-float4 PSPacked422_Reverse(VertInOut vert_in, int u_pos, int v_pos,
+float4 PSPacked422_Reverse(FragTex frag_in, int u_pos, int v_pos,
 		int y0_pos, int y1_pos) : TARGET
 {
-	float y = vert_in.uv.y;
-	float odd = floor(fmod(width * vert_in.uv.x + PRECISION_OFFSET, 2.0));
-	float x = floor(width_d2 * vert_in.uv.x + PRECISION_OFFSET) *
+	float y = frag_in.uv.y;
+	float odd = floor(fmod(width * frag_in.uv.x + PRECISION_OFFSET, 2.0));
+	float x = floor(width_d2 * frag_in.uv.x + PRECISION_OFFSET) *
 			width_d2_i;
 
 	x += input_width_i_d2;
@@ -300,10 +344,10 @@ float4 PSPacked422_Reverse(VertInOut vert_in, int u_pos, int v_pos,
 	return saturate(mul(float4(yuv, 1.0), color_matrix));
 }
 
-float4 PSPlanar420_Reverse(VertInOut vert_in) : TARGET
+float4 PSPlanar420_Reverse(FragTex frag_in) : TARGET
 {
-	int x = int(vert_in.uv.x * width  + PRECISION_OFFSET);
-	int y = int(vert_in.uv.y * height + PRECISION_OFFSET);
+	int x = int(frag_in.uv.x * width  + PRECISION_OFFSET);
+	int y = int(frag_in.uv.y * height + PRECISION_OFFSET);
 
 	int lum_offset = y * int_width + x;
 	int chroma_offset = (y / 2) * (int_width / 2) + x / 2;
@@ -319,10 +363,10 @@ float4 PSPlanar420_Reverse(VertInOut vert_in) : TARGET
 	return saturate(mul(float4(yuv, 1.0), color_matrix));
 }
 
-float4 PSPlanar444_Reverse(VertInOut vert_in) : TARGET
+float4 PSPlanar444_Reverse(FragTex frag_in) : TARGET
 {
-	int x = int(vert_in.uv.x * width  + PRECISION_OFFSET);
-	int y = int(vert_in.uv.y * height + PRECISION_OFFSET);
+	int x = int(frag_in.uv.x * width  + PRECISION_OFFSET);
+	int y = int(frag_in.uv.y * height + PRECISION_OFFSET);
 
 	int lum_offset = y * int_width + x;
 	int chroma_offset = y * int_width + x;
@@ -338,10 +382,10 @@ float4 PSPlanar444_Reverse(VertInOut vert_in) : TARGET
 	return saturate(mul(float4(yuv, 1.0), color_matrix));
 }
 
-float4 PSNV12_Reverse(VertInOut vert_in) : TARGET
+float4 PSNV12_Reverse(FragTex frag_in) : TARGET
 {
-	int x = int(vert_in.uv.x * width  + PRECISION_OFFSET);
-	int y = int(vert_in.uv.y * height + PRECISION_OFFSET);
+	int x = int(frag_in.uv.x * width  + PRECISION_OFFSET);
+	int y = int(frag_in.uv.y * height + PRECISION_OFFSET);
 
 	int lum_offset    = y * int_width + x;
 	int chroma_offset = (y / 2) * (int_width / 2) + x / 2;
@@ -356,39 +400,39 @@ float4 PSNV12_Reverse(VertInOut vert_in) : TARGET
 	return saturate(mul(float4(yuv, 1.0), color_matrix));
 }
 
-float4 PSY800_Limited(VertInOut vert_in) : TARGET
+float4 PSY800_Limited(FragTex frag_in) : TARGET
 {
-	int x = int(vert_in.uv.x * width  + PRECISION_OFFSET);
-	int y = int(vert_in.uv.y * height + PRECISION_OFFSET);
+	int x = int(frag_in.uv.x * width  + PRECISION_OFFSET);
+	int y = int(frag_in.uv.y * height + PRECISION_OFFSET);
 
 	float limited = image.Load(int3(x, y, 0)).x;
 	float full = saturate((limited - (16.0 / 255.0)) * (255.0 / 219.0));
 	return float4(full, full, full, 1.0);
 }
 
-float4 PSY800_Full(VertInOut vert_in) : TARGET
+float4 PSY800_Full(FragTex frag_in) : TARGET
 {
-	int x = int(vert_in.uv.x * width  + PRECISION_OFFSET);
-	int y = int(vert_in.uv.y * height + PRECISION_OFFSET);
+	int x = int(frag_in.uv.x * width  + PRECISION_OFFSET);
+	int y = int(frag_in.uv.y * height + PRECISION_OFFSET);
 
 	float3 full = image.Load(int3(x, y, 0)).xxx;
 	return float4(full, 1.0);
 }
 
-float4 PSRGB_Limited(VertInOut vert_in) : TARGET
+float4 PSRGB_Limited(FragTex frag_in) : TARGET
 {
-	int x = int(vert_in.uv.x * width  + PRECISION_OFFSET);
-	int y = int(vert_in.uv.y * height + PRECISION_OFFSET);
+	int x = int(frag_in.uv.x * width  + PRECISION_OFFSET);
+	int y = int(frag_in.uv.y * height + PRECISION_OFFSET);
 
 	float4 rgba = image.Load(int3(x, y, 0));
 	rgba.rgb = saturate((rgba.rgb - (16.0 / 255.0)) * (255.0 / 219.0));
 	return rgba;
 }
 
-float4 PSBGR3_Limited(VertInOut vert_in) : TARGET
+float4 PSBGR3_Limited(FragTex frag_in) : TARGET
 {
-	int x = int(vert_in.uv.x * width * 3.0 + PRECISION_OFFSET);
-	int y = int(vert_in.uv.y * height      + PRECISION_OFFSET);
+	int x = int(frag_in.uv.x * width * 3.0 + PRECISION_OFFSET);
+	int y = int(frag_in.uv.y * height      + PRECISION_OFFSET);
 
 	float b = image.Load(int3(x - 1, y, 0)).x;
 	float g = image.Load(int3(x, y, 0)).x;
@@ -398,10 +442,10 @@ float4 PSBGR3_Limited(VertInOut vert_in) : TARGET
 	return float4(rgb, 1.0);
 }
 
-float4 PSBGR3_Full(VertInOut vert_in) : TARGET
+float4 PSBGR3_Full(FragTex frag_in) : TARGET
 {
-	int x = int(vert_in.uv.x * width * 3.0 + PRECISION_OFFSET);
-	int y = int(vert_in.uv.y * height      + PRECISION_OFFSET);
+	int x = int(frag_in.uv.x * width * 3.0 + PRECISION_OFFSET);
+	int y = int(frag_in.uv.y * height      + PRECISION_OFFSET);
 
 	float b = image.Load(int3(x - 1, y, 0)).x;
 	float g = image.Load(int3(x, y, 0)).x;
@@ -414,8 +458,8 @@ technique Planar420
 {
 	pass
 	{
-		vertex_shader = VSDefault(id);
-		pixel_shader  = PSPlanar420(vert_in);
+		vertex_shader = VSPosTex(id);
+		pixel_shader  = PSPlanar420(frag_in);
 	}
 }
 
@@ -423,8 +467,8 @@ technique Planar444
 {
 	pass
 	{
-		vertex_shader = VSDefault(id);
-		pixel_shader  = PSPlanar444(vert_in);
+		vertex_shader = VSPosTex(id);
+		pixel_shader  = PSPlanar444(frag_in);
 	}
 }
 
@@ -432,8 +476,8 @@ technique NV12
 {
 	pass
 	{
-		vertex_shader = VSDefault(id);
-		pixel_shader  = PSNV12(vert_in);
+		vertex_shader = VSPosTex(id);
+		pixel_shader  = PSNV12(frag_in);
 	}
 }
 
@@ -441,8 +485,8 @@ technique NV12_Y
 {
 	pass
 	{
-		vertex_shader = VSDefault(id);
-		pixel_shader  = PSNV12_Y(vert_in);
+		vertex_shader = VSPos(id);
+		pixel_shader  = PSNV12_Y(frag_in);
 	}
 }
 
@@ -450,8 +494,8 @@ technique NV12_UV
 {
 	pass
 	{
-		vertex_shader = VSDefault(id);
-		pixel_shader  = PSNV12_UV(vert_in);
+		vertex_shader = VSPosTex(id);
+		pixel_shader  = PSNV12_UV(frag_in);
 	}
 }
 
@@ -459,8 +503,8 @@ technique UYVY_Reverse
 {
 	pass
 	{
-		vertex_shader = VSDefault(id);
-		pixel_shader  = PSPacked422_Reverse(vert_in, 2, 0, 1, 3);
+		vertex_shader = VSPosTex(id);
+		pixel_shader  = PSPacked422_Reverse(frag_in, 2, 0, 1, 3);
 	}
 }
 
@@ -468,8 +512,8 @@ technique YUY2_Reverse
 {
 	pass
 	{
-		vertex_shader = VSDefault(id);
-		pixel_shader  = PSPacked422_Reverse(vert_in, 1, 3, 2, 0);
+		vertex_shader = VSPosTex(id);
+		pixel_shader  = PSPacked422_Reverse(frag_in, 1, 3, 2, 0);
 	}
 }
 
@@ -477,8 +521,8 @@ technique YVYU_Reverse
 {
 	pass
 	{
-		vertex_shader = VSDefault(id);
-		pixel_shader  = PSPacked422_Reverse(vert_in, 3, 1, 2, 0);
+		vertex_shader = VSPosTex(id);
+		pixel_shader  = PSPacked422_Reverse(frag_in, 3, 1, 2, 0);
 	}
 }
 
@@ -486,8 +530,8 @@ technique I420_Reverse
 {
 	pass
 	{
-		vertex_shader = VSDefault(id);
-		pixel_shader  = PSPlanar420_Reverse(vert_in);
+		vertex_shader = VSPosTex(id);
+		pixel_shader  = PSPlanar420_Reverse(frag_in);
 	}
 }
 
@@ -495,8 +539,8 @@ technique I444_Reverse
 {
 	pass
 	{
-		vertex_shader = VSDefault(id);
-		pixel_shader  = PSPlanar444_Reverse(vert_in);
+		vertex_shader = VSPosTex(id);
+		pixel_shader  = PSPlanar444_Reverse(frag_in);
 	}
 }
 
@@ -504,8 +548,8 @@ technique NV12_Reverse
 {
 	pass
 	{
-		vertex_shader = VSDefault(id);
-		pixel_shader  = PSNV12_Reverse(vert_in);
+		vertex_shader = VSPosTex(id);
+		pixel_shader  = PSNV12_Reverse(frag_in);
 	}
 }
 
@@ -513,8 +557,8 @@ technique Y800_Limited
 {
 	pass
 	{
-		vertex_shader = VSDefault(id);
-		pixel_shader  = PSY800_Limited(vert_in);
+		vertex_shader = VSPosTex(id);
+		pixel_shader  = PSY800_Limited(frag_in);
 	}
 }
 
@@ -522,8 +566,8 @@ technique Y800_Full
 {
 	pass
 	{
-		vertex_shader = VSDefault(id);
-		pixel_shader  = PSY800_Full(vert_in);
+		vertex_shader = VSPosTex(id);
+		pixel_shader  = PSY800_Full(frag_in);
 	}
 }
 
@@ -531,8 +575,8 @@ technique RGB_Limited
 {
 	pass
 	{
-		vertex_shader = VSDefault(id);
-		pixel_shader  = PSRGB_Limited(vert_in);
+		vertex_shader = VSPosTex(id);
+		pixel_shader  = PSRGB_Limited(frag_in);
 	}
 }
 
@@ -540,8 +584,8 @@ technique BGR3_Limited
 {
 	pass
 	{
-		vertex_shader = VSDefault(id);
-		pixel_shader  = PSBGR3_Limited(vert_in);
+		vertex_shader = VSPosTex(id);
+		pixel_shader  = PSBGR3_Limited(frag_in);
 	}
 }
 
@@ -549,7 +593,7 @@ technique BGR3_Full
 {
 	pass
 	{
-		vertex_shader = VSDefault(id);
-		pixel_shader  = PSBGR3_Full(vert_in);
+		vertex_shader = VSPosTex(id);
+		pixel_shader  = PSBGR3_Full(frag_in);
 	}
 }
diff --git a/libobs/data/lanczos_scale.effect b/libobs/data/lanczos_scale.effect
index 061acc4e14058a91e5479512228c21a2ae96cec6..534b3c538c45819bddff51d129583c961940415e 100644
--- a/libobs/data/lanczos_scale.effect
+++ b/libobs/data/lanczos_scale.effect
@@ -6,7 +6,6 @@
 
 uniform float4x4 ViewProj;
 uniform texture2d image;
-uniform float4x4 color_matrix;
 uniform float2 base_dimension_i;
 uniform float undistort_factor = 1.0;
 
@@ -146,13 +145,6 @@ float4 PSDrawLanczosRGBADivide(FragData v_in) : TARGET
 	return float4(rgba.rgb * multiplier, alpha);
 }
 
-float4 PSDrawLanczosMatrix(FragData v_in) : TARGET
-{
-	float3 rgb = DrawLanczos(v_in, false).rgb;
-	float3 yuv = mul(float4(saturate(rgb), 1.0), color_matrix).xyz;
-	return float4(yuv, 1.0);
-}
-
 technique Draw
 {
 	pass
@@ -179,12 +171,3 @@ technique DrawUndistort
 		pixel_shader  = PSDrawLanczosRGBA(v_in, true);
 	}
 }
-
-technique DrawMatrix
-{
-	pass
-	{
-		vertex_shader = VSDefault(v_in);
-		pixel_shader  = PSDrawLanczosMatrix(v_in);
-	}
-}
diff --git a/libobs/obs-video.c b/libobs/obs-video.c
index 55ee81cae6ed5347b8cd13ac97e7401abe6ff127..b402c3be89ad472ecd7a4f37a7ea979ba3404310 100644
--- a/libobs/obs-video.c
+++ b/libobs/obs-video.c
@@ -200,19 +200,12 @@ static inline gs_effect_t *get_scale_effect(struct obs_core_video *video,
 }
 
 static const char *render_output_texture_name = "render_output_texture";
-static inline void render_output_texture(struct obs_core_video *video)
+static inline gs_texture_t *render_output_texture(struct obs_core_video *video)
 {
-	profile_start(render_output_texture_name);
-
 	gs_texture_t *texture = video->render_texture;
 	gs_texture_t *target = video->output_texture;
 	uint32_t width = gs_texture_get_width(target);
 	uint32_t height = gs_texture_get_height(target);
-	struct vec2 base, base_i;
-
-	vec2_set(&base, (float)video->base_width, (float)video->base_height);
-	vec2_set(&base_i, 1.0f / (float)video->base_width,
-		 1.0f / (float)video->base_height);
 
 	gs_effect_t *effect = get_scale_effect(video, width, height);
 	gs_technique_t *tech;
@@ -220,12 +213,17 @@ static inline void render_output_texture(struct obs_core_video *video)
 	if (video->ovi.output_format == VIDEO_FORMAT_RGBA) {
 		tech = gs_effect_get_technique(effect, "DrawAlphaDivide");
 	} else {
-		tech = gs_effect_get_technique(effect, "DrawMatrix");
+		if ((effect == video->default_effect) &&
+		    (width == video->base_width) &&
+		    (height == video->base_height))
+			return texture;
+
+		tech = gs_effect_get_technique(effect, "Draw");
 	}
 
+	profile_start(render_output_texture_name);
+
 	gs_eparam_t *image = gs_effect_get_param_by_name(effect, "image");
-	gs_eparam_t *matrix =
-		gs_effect_get_param_by_name(effect, "color_matrix");
 	gs_eparam_t *bres =
 		gs_effect_get_param_by_name(effect, "base_dimension");
 	gs_eparam_t *bres_i =
@@ -235,12 +233,20 @@ static inline void render_output_texture(struct obs_core_video *video)
 	gs_set_render_target(target, NULL);
 	set_render_size(width, height);
 
-	if (bres)
+	if (bres) {
+		struct vec2 base;
+		vec2_set(&base, (float)video->base_width,
+			 (float)video->base_height);
 		gs_effect_set_vec2(bres, &base);
-	if (bres_i)
+	}
+
+	if (bres_i) {
+		struct vec2 base_i;
+		vec2_set(&base_i, 1.0f / (float)video->base_width,
+			 1.0f / (float)video->base_height);
 		gs_effect_set_vec2(bres_i, &base_i);
+	}
 
-	gs_effect_set_val(matrix, video->color_matrix, sizeof(float) * 16);
 	gs_effect_set_texture(image, texture);
 
 	gs_enable_blending(false);
@@ -254,6 +260,8 @@ static inline void render_output_texture(struct obs_core_video *video)
 	gs_enable_blending(true);
 
 	profile_end(render_output_texture_name);
+
+	return target;
 }
 
 static inline void set_eparam(gs_effect_t *effect, const char *name, float val)
@@ -263,17 +271,23 @@ static inline void set_eparam(gs_effect_t *effect, const char *name, float val)
 }
 
 static const char *render_convert_texture_name = "render_convert_texture";
-static void render_convert_texture(struct obs_core_video *video)
+static void render_convert_texture(struct obs_core_video *video,
+				   gs_texture_t *texture)
 {
 	profile_start(render_convert_texture_name);
 
-	gs_texture_t *texture = video->output_texture;
 	gs_texture_t *target = video->convert_texture;
 	float fwidth = (float)video->output_width;
 	float fheight = (float)video->output_height;
 	size_t passes, i;
 
 	gs_effect_t *effect = video->conversion_effect;
+	gs_eparam_t *color_vec_y =
+		gs_effect_get_param_by_name(effect, "color_vec_y");
+	gs_eparam_t *color_vec_u =
+		gs_effect_get_param_by_name(effect, "color_vec_u");
+	gs_eparam_t *color_vec_v =
+		gs_effect_get_param_by_name(effect, "color_vec_v");
 	gs_eparam_t *image = gs_effect_get_param_by_name(effect, "image");
 	gs_technique_t *tech =
 		gs_effect_get_technique(effect, video->conversion_tech);
@@ -290,6 +304,17 @@ static void render_convert_texture(struct obs_core_video *video)
 	set_eparam(effect, "height_d2_i", 1.0f / (fheight * 0.5f));
 	set_eparam(effect, "input_height", (float)video->conversion_height);
 
+	struct vec4 vec_y, vec_u, vec_v;
+	vec4_set(&vec_y, video->color_matrix[4], video->color_matrix[5],
+		 video->color_matrix[6], video->color_matrix[7]);
+	vec4_set(&vec_u, video->color_matrix[0], video->color_matrix[1],
+		 video->color_matrix[2], video->color_matrix[3]);
+	vec4_set(&vec_v, video->color_matrix[8], video->color_matrix[9],
+		 video->color_matrix[10], video->color_matrix[11]);
+	gs_effect_set_vec4(color_vec_y, &vec_y);
+	gs_effect_set_vec4(color_vec_u, &vec_u);
+	gs_effect_set_vec4(color_vec_v, &vec_v);
+
 	gs_effect_set_texture(image, texture);
 
 	gs_set_render_target(target, NULL);
@@ -310,16 +335,32 @@ static void render_convert_texture(struct obs_core_video *video)
 	profile_end(render_convert_texture_name);
 }
 
-static void render_nv12(struct obs_core_video *video, gs_texture_t *target,
-			const char *tech_name, uint32_t width, uint32_t height)
+static void render_nv12(struct obs_core_video *video, gs_texture_t *texture,
+			gs_texture_t *target, const char *tech_name,
+			uint32_t width, uint32_t height)
 {
-	gs_texture_t *texture = video->output_texture;
-
 	gs_effect_t *effect = video->conversion_effect;
+	gs_eparam_t *color_vec_y =
+		gs_effect_get_param_by_name(effect, "color_vec_y");
+	gs_eparam_t *color_vec_u =
+		gs_effect_get_param_by_name(effect, "color_vec_u");
+	gs_eparam_t *color_vec_v =
+		gs_effect_get_param_by_name(effect, "color_vec_v");
 	gs_eparam_t *image = gs_effect_get_param_by_name(effect, "image");
 	gs_technique_t *tech = gs_effect_get_technique(effect, tech_name);
 	size_t passes, i;
 
+	struct vec4 vec_y, vec_u, vec_v;
+	vec4_set(&vec_y, video->color_matrix[4], video->color_matrix[5],
+		 video->color_matrix[6], video->color_matrix[7]);
+	vec4_set(&vec_u, video->color_matrix[0], video->color_matrix[1],
+		 video->color_matrix[2], video->color_matrix[3]);
+	vec4_set(&vec_v, video->color_matrix[8], video->color_matrix[9],
+		 video->color_matrix[10], video->color_matrix[11]);
+	gs_effect_set_vec4(color_vec_y, &vec_y);
+	gs_effect_set_vec4(color_vec_u, &vec_u);
+	gs_effect_set_vec4(color_vec_v, &vec_v);
+
 	gs_effect_set_texture(image, texture);
 
 	gs_set_render_target(target, NULL);
@@ -337,13 +378,14 @@ static void render_nv12(struct obs_core_video *video, gs_texture_t *target,
 }
 
 static const char *render_convert_nv12_name = "render_convert_texture_nv12";
-static void render_convert_texture_nv12(struct obs_core_video *video)
+static void render_convert_texture_nv12(struct obs_core_video *video,
+					gs_texture_t *texture)
 {
 	profile_start(render_convert_nv12_name);
 
-	render_nv12(video, video->convert_texture, "NV12_Y",
+	render_nv12(video, texture, video->convert_texture, "NV12_Y",
 		    video->output_width, video->output_height);
-	render_nv12(video, video->convert_uv_texture, "NV12_UV",
+	render_nv12(video, texture, video->convert_uv_texture, "NV12_UV",
 		    video->output_width / 2, video->output_height / 2);
 
 	video->texture_converted = true;
@@ -353,11 +395,10 @@ static void render_convert_texture_nv12(struct obs_core_video *video)
 
 static const char *stage_output_texture_name = "stage_output_texture";
 static inline void stage_output_texture(struct obs_core_video *video,
-					int cur_texture)
+					gs_texture_t *texture, int cur_texture)
 {
 	profile_start(stage_output_texture_name);
 
-	gs_texture_t *texture;
 	bool texture_ready;
 	gs_stagesurf_t *copy = video->copy_surfaces[cur_texture];
 
@@ -365,7 +406,6 @@ static inline void stage_output_texture(struct obs_core_video *video,
 		texture = video->convert_texture;
 		texture_ready = video->texture_converted;
 	} else {
-		texture = video->output_texture;
 		texture_ready = true;
 	}
 
@@ -486,7 +526,7 @@ static inline void render_video(struct obs_core_video *video, bool raw_active,
 	render_main_texture(video);
 
 	if (raw_active || gpu_active) {
-		render_output_texture(video);
+		gs_texture_t *texture = render_output_texture(video);
 
 #ifdef _WIN32
 		if (gpu_active) {
@@ -496,9 +536,9 @@ static inline void render_video(struct obs_core_video *video, bool raw_active,
 
 		if (video->gpu_conversion) {
 			if (video->using_nv12_tex)
-				render_convert_texture_nv12(video);
+				render_convert_texture_nv12(video, texture);
 			else
-				render_convert_texture(video);
+				render_convert_texture(video, texture);
 		}
 
 #ifdef _WIN32
@@ -508,7 +548,7 @@ static inline void render_video(struct obs_core_video *video, bool raw_active,
 		}
 #endif
 		if (raw_active)
-			stage_output_texture(video, cur_texture);
+			stage_output_texture(video, texture, cur_texture);
 	}
 
 	gs_set_render_target(NULL, NULL);
@@ -632,30 +672,6 @@ static void set_gpu_converted_data(struct obs_core_video *video,
 	}
 }
 
-static void convert_frame(struct video_frame *output,
-			  const struct video_data *input,
-			  const struct video_output_info *info)
-{
-	if (info->format == VIDEO_FORMAT_I420) {
-		compress_uyvx_to_i420(input->data[0], input->linesize[0], 0,
-				      info->height, output->data,
-				      output->linesize);
-
-	} else if (info->format == VIDEO_FORMAT_NV12) {
-		compress_uyvx_to_nv12(input->data[0], input->linesize[0], 0,
-				      info->height, output->data,
-				      output->linesize);
-
-	} else if (info->format == VIDEO_FORMAT_I444) {
-		convert_uyvx_to_i444(input->data[0], input->linesize[0], 0,
-				     info->height, output->data,
-				     output->linesize);
-
-	} else {
-		blog(LOG_ERROR, "convert_frame: unsupported texture format");
-	}
-}
-
 static inline void copy_rgbx_frame(struct video_frame *output,
 				   const struct video_data *input,
 				   const struct video_output_info *info)
@@ -690,9 +706,6 @@ static inline void output_video_data(struct obs_core_video *video,
 		if (video->gpu_conversion) {
 			set_gpu_converted_data(video, &output_frame,
 					       input_frame, info);
-
-		} else if (format_is_yuv(info->format)) {
-			convert_frame(&output_frame, input_frame, info);
 		} else {
 			copy_rgbx_frame(&output_frame, input_frame, info);
 		}