From 7d4fe0c5cb9501efc4a434053cec85a70cae156e Mon Sep 17 00:00:00 2001 From: Richard Ling Date: Tue, 21 Nov 2017 21:32:06 +1100 Subject: [PATCH] avfilter: add normalize filter --- Changelog | 1 + doc/filters.texi | 80 ++++++++ libavfilter/Makefile | 1 + libavfilter/allfilters.c | 1 + libavfilter/version.h | 2 +- libavfilter/vf_normalize.c | 386 +++++++++++++++++++++++++++++++++++++ 6 files changed, 470 insertions(+), 1 deletion(-) create mode 100644 libavfilter/vf_normalize.c diff --git a/Changelog b/Changelog index 6ef144c315..0b7e02392d 100644 --- a/Changelog +++ b/Changelog @@ -19,6 +19,7 @@ version : - acontrast audio filter - OpenCL overlay filter - video mix filter +- video normalize filter version 3.4: diff --git a/doc/filters.texi b/doc/filters.texi index 76929e4db5..fda789630b 100644 --- a/doc/filters.texi +++ b/doc/filters.texi @@ -10867,6 +10867,86 @@ Add temporal and uniform noise to input video: noise=alls=20:allf=t+u @end example +@section normalize + +Normalize RGB video (aka histogram stretching, contrast stretching). +See: https://en.wikipedia.org/wiki/Normalization_(image_processing) + +For each channel of each frame, the filter computes the input range and maps +it linearly to the user-specified output range. The output range defaults +to the full dynamic range from pure black to pure white. + +Temporal smoothing can be used on the input range to reduce flickering (rapid +changes in brightness) caused when small dark or bright objects enter or leave +the scene. This is similar to the auto-exposure (automatic gain control) on a +video camera, and, like a video camera, it may cause a period of over- or +under-exposure of the video. + +The R,G,B channels can be normalized independently, which may cause some +color shifting, or linked together as a single channel, which prevents +color shifting. Linked normalization preserves hue. Independent normalization +does not, so it can be used to remove some color casts. Independent and linked +normalization can be combined in any ratio. + +The normalize filter accepts the following options: + +@table @option +@item blackpt +@item whitept +Colors which define the output range. The minimum input value is mapped to +the @var{blackpt}. The maximum input value is mapped to the @var{whitept}. +The defaults are black and white respectively. Specifying white for +@var{blackpt} and black for @var{whitept} will give color-inverted, +normalized video. Shades of grey can be used to reduce the dynamic range +(contrast). Specifying saturated colors here can create some interesting +effects. + +@item smoothing +The number of previous frames to use for temporal smoothing. The input range +of each channel is smoothed using a rolling average over the current frame +and the @var{smoothing} previous frames. The default is 0 (no temporal +smoothing). + +@item independence +Controls the ratio of independent (color shifting) channel normalization to +linked (color preserving) normalization. 0.0 is fully linked, 1.0 is fully +independent. Defaults to 1.0 (fully independent). + +@item strength +Overall strength of the filter. 1.0 is full strength. 0.0 is a rather +expensive no-op. Defaults to 1.0 (full strength). + +@end table + +@subsection Examples + +Stretch video contrast to use the full dynamic range, with no temporal +smoothing; may flicker depending on the source content: +@example +normalize=blackpt=black:whitept=white:smoothing=0 +@end example + +As above, but with 50 frames of temporal smoothing; flicker should be +reduced, depending on the source content: +@example +normalize=blackpt=black:whitept=white:smoothing=50 +@end example + +As above, but with hue-preserving linked channel normalization: +@example +normalize=blackpt=black:whitept=white:smoothing=50:independence=0 +@end example + +As above, but with half strength: +@example +normalize=blackpt=black:whitept=white:smoothing=50:independence=0:strength=0.5 +@end example + +Map the darkest input color to red, the brightest input color to cyan: +@example +normalize=blackpt=red:whitept=cyan +@end example + @section null Pass the video source unchanged to the output. diff --git a/libavfilter/Makefile b/libavfilter/Makefile index c47373a65b..dd195d2538 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -247,6 +247,7 @@ OBJS-$(CONFIG_NLMEANS_FILTER) += vf_nlmeans.o OBJS-$(CONFIG_NNEDI_FILTER) += vf_nnedi.o OBJS-$(CONFIG_NOFORMAT_FILTER) += vf_format.o OBJS-$(CONFIG_NOISE_FILTER) += vf_noise.o +OBJS-$(CONFIG_NORMALIZE_FILTER) += vf_normalize.o OBJS-$(CONFIG_NULL_FILTER) += vf_null.o OBJS-$(CONFIG_OCR_FILTER) += vf_ocr.o OBJS-$(CONFIG_OCV_FILTER) += vf_libopencv.o diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c index 3a9cd00fdf..e09d841387 100644 --- a/libavfilter/allfilters.c +++ b/libavfilter/allfilters.c @@ -257,6 +257,7 @@ static void register_all(void) REGISTER_FILTER(NNEDI, nnedi, vf); REGISTER_FILTER(NOFORMAT, noformat, vf); REGISTER_FILTER(NOISE, noise, vf); + REGISTER_FILTER(NORMALIZE, normalize, vf); REGISTER_FILTER(NULL, null, vf); REGISTER_FILTER(OCR, ocr, vf); REGISTER_FILTER(OCV, ocv, vf); diff --git a/libavfilter/version.h b/libavfilter/version.h index 3242c44618..fdcf76befe 100644 --- a/libavfilter/version.h +++ b/libavfilter/version.h @@ -30,7 +30,7 @@ #include "libavutil/version.h" #define LIBAVFILTER_VERSION_MAJOR 7 -#define LIBAVFILTER_VERSION_MINOR 3 +#define LIBAVFILTER_VERSION_MINOR 4 #define LIBAVFILTER_VERSION_MICRO 100 #define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \ diff --git a/libavfilter/vf_normalize.c b/libavfilter/vf_normalize.c new file mode 100644 index 0000000000..5c1fe98c60 --- /dev/null +++ b/libavfilter/vf_normalize.c @@ -0,0 +1,386 @@ +/* + * Copyright (c) 2017 Richard Ling + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* + * Normalize RGB video (aka histogram stretching, contrast stretching). + * See: https://en.wikipedia.org/wiki/Normalization_(image_processing) + * + * For each channel of each frame, the filter computes the input range and maps + * it linearly to the user-specified output range. The output range defaults + * to the full dynamic range from pure black to pure white. + * + * Naively maximising the dynamic range of each frame of video in isolation + * may cause flickering (rapid changes in brightness of static objects in the + * scene) when small dark or bright objects enter or leave the scene. This + * filter can apply temporal smoothing to the input range to reduce flickering. + * Temporal smoothing is similar to the auto-exposure (automatic gain control) + * on a video camera, which performs the same function; and, like a video + * camera, it may cause a period of over- or under-exposure of the video. + * + * The filter can normalize the R,G,B channels independently, which may cause + * color shifting, or link them together as a single channel, which prevents + * color shifting. More precisely, linked normalization preserves hue (as it's + * defined in HSV/HSL color spaces) while independent normalization does not. + * Independent normalization can be used to remove color casts, such as the + * blue cast from underwater video, restoring more natural colors. The filter + * can also combine independent and linked normalization in any ratio. + * + * Finally the overall strength of the filter can be adjusted, from no effect + * to full normalization. + * + * The 5 AVOptions are: + * blackpt, Colors which define the output range. The minimum input value + * whitept is mapped to the blackpt. The maximum input value is mapped to + * the whitept. The defaults are black and white respectively. + * Specifying white for blackpt and black for whitept will give + * color-inverted, normalized video. Shades of grey can be used + * to reduce the dynamic range (contrast). Specifying saturated + * colors here can create some interesting effects. + * + * smoothing The amount of temporal smoothing, expressed in frames (>=0). + * the minimum and maximum input values of each channel are + * smoothed using a rolling average over the current frame and + * that many previous frames of video. Defaults to 0 (no temporal + * smoothing). + * + * independence + * Controls the ratio of independent (color shifting) channel + * normalization to linked (color preserving) normalization. 0.0 + * is fully linked, 1.0 is fully independent. Defaults to fully + * independent. + * + * strength Overall strength of the filter. 1.0 is full strength. 0.0 is + * a rather expensive no-op. Values in between can give a gentle + * boost to low-contrast video without creating an artificial + * over-processed look. The default is full strength. + */ + +#include "libavutil/imgutils.h" +#include "libavutil/opt.h" +#include "libavutil/pixdesc.h" +#include "avfilter.h" +#include "formats.h" +#include "internal.h" +#include "video.h" + +typedef struct NormalizeContext { + const AVClass *class; + + // Storage for the corresponding AVOptions + uint8_t blackpt[4]; + uint8_t whitept[4]; + int smoothing; + float independence; + float strength; + + int co[4]; // Offsets to R,G,B,A bytes respectively in each pixel + int num_components; // Number of components in the pixel format + int history_len; // Number of frames to average; based on smoothing factor + int frame_num; // Increments on each frame, starting from 0. + + // Per-extremum, per-channel history, for temporal smoothing. + struct { + uint8_t *history; // History entries. + uint32_t history_sum; // Sum of history entries. + } min[3], max[3]; // Min and max for each channel in {R,G,B}. + uint8_t *history_mem; // Single allocation for above history entries + +} NormalizeContext; + +#define OFFSET(x) offsetof(NormalizeContext, x) +#define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM + +static const AVOption normalize_options[] = { + { "blackpt", "output color to which darkest input color is mapped", OFFSET(blackpt), AV_OPT_TYPE_COLOR, { .str = "black" }, CHAR_MIN, CHAR_MAX, FLAGS }, + { "whitept", "output color to which brightest input color is mapped", OFFSET(whitept), AV_OPT_TYPE_COLOR, { .str = "white" }, CHAR_MIN, CHAR_MAX, FLAGS }, + { "smoothing", "amount of temporal smoothing of the input range, to reduce flicker", OFFSET(smoothing), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX/8, FLAGS }, + { "independence", "proportion of independent to linked channel normalization", OFFSET(independence), AV_OPT_TYPE_FLOAT, {.dbl=1.0}, 0.0, 1.0, FLAGS }, + { "strength", "strength of filter, from no effect to full normalization", OFFSET(strength), AV_OPT_TYPE_FLOAT, {.dbl=1.0}, 0.0, 1.0, FLAGS }, + { NULL } +}; + +AVFILTER_DEFINE_CLASS(normalize); + +// This function is the main guts of the filter. Normalizes the input frame +// into the output frame. The frames are known to have the same dimensions +// and pixel format. +static void normalize(NormalizeContext *s, AVFrame *in, AVFrame *out) +{ + // Per-extremum, per-channel local variables. + struct { + uint8_t in; // Original input byte value for this frame. + float smoothed; // Smoothed input value [0,255]. + float out; // Output value [0,255]. + } min[3], max[3]; // Min and max for each channel in {R,G,B}. + + float rgb_min_smoothed; // Min input range for linked normalization + float rgb_max_smoothed; // Max input range for linked normalization + uint8_t lut[3][256]; // Lookup table + int x, y, c; + + // First, scan the input frame to find, for each channel, the minimum + // (min.in) and maximum (max.in) values present in the channel. + for (c = 0; c < 3; c++) + min[c].in = max[c].in = in->data[0][s->co[c]]; + for (y = 0; y < in->height; y++) { + uint8_t *inp = in->data[0] + y * in->linesize[0]; + uint8_t *outp = out->data[0] + y * out->linesize[0]; + for (x = 0; x < in->width; x++) { + for (c = 0; c < 3; c++) { + min[c].in = FFMIN(min[c].in, inp[s->co[c]]); + max[c].in = FFMAX(max[c].in, inp[s->co[c]]); + } + inp += s->num_components; + outp += s->num_components; + } + } + + // Next, for each channel, push min.in and max.in into their respective + // histories, to determine the min.smoothed and max.smoothed for this frame. + { + int history_idx = s->frame_num % s->history_len; + // Assume the history is not yet full; num_history_vals is the number + // of frames received so far including the current frame. + int num_history_vals = s->frame_num + 1; + if (s->frame_num >= s->history_len) { + //The history is full; drop oldest value and cap num_history_vals. + for (c = 0; c < 3; c++) { + s->min[c].history_sum -= s->min[c].history[history_idx]; + s->max[c].history_sum -= s->max[c].history[history_idx]; + } + num_history_vals = s->history_len; + } + // For each extremum, update history_sum and calculate smoothed value + // as the rolling average of the history entries. + for (c = 0; c < 3; c++) { + s->min[c].history_sum += (s->min[c].history[history_idx] = min[c].in); + min[c].smoothed = s->min[c].history_sum / (float)num_history_vals; + s->max[c].history_sum += (s->max[c].history[history_idx] = max[c].in); + max[c].smoothed = s->max[c].history_sum / (float)num_history_vals; + } + } + + // Determine the input range for linked normalization. This is simply the + // minimum of the per-channel minimums, and the maximum of the per-channel + // maximums. + rgb_min_smoothed = FFMIN3(min[0].smoothed, min[1].smoothed, min[2].smoothed); + rgb_max_smoothed = FFMAX3(max[0].smoothed, max[1].smoothed, max[2].smoothed); + + // Now, process each channel to determine the input and output range and + // build the lookup tables. + for (c = 0; c < 3; c++) { + int in_val; + // Adjust the input range for this channel [min.smoothed,max.smoothed] + // by mixing in the correct proportion of the linked normalization + // input range [rgb_min_smoothed,rgb_max_smoothed]. + min[c].smoothed = (min[c].smoothed * s->independence) + + (rgb_min_smoothed * (1.0f - s->independence)); + max[c].smoothed = (max[c].smoothed * s->independence) + + (rgb_max_smoothed * (1.0f - s->independence)); + + // Calculate the output range [min.out,max.out] as a ratio of the full- + // strength output range [blackpt,whitept] and the original input range + // [min.in,max.in], based on the user-specified filter strength. + min[c].out = (s->blackpt[c] * s->strength) + + (min[c].in * (1.0f - s->strength)); + max[c].out = (s->whitept[c] * s->strength) + + (max[c].in * (1.0f - s->strength)); + + // Now, build a lookup table which linearly maps the adjusted input range + // [min.smoothed,max.smoothed] to the output range [min.out,max.out]. + // Perform the linear interpolation for each x: + // lut[x] = (int)(float(x - min.smoothed) * scale + max.out + 0.5) + // where scale = (max.out - min.out) / (max.smoothed - min.smoothed) + if (min[c].smoothed == max[c].smoothed) { + // There is no dynamic range to expand. No mapping for this channel. + for (in_val = min[c].in; in_val <= max[c].in; in_val++) + lut[c][in_val] = min[c].out; + } else { + // We must set lookup values for all values in the original input + // range [min.in,max.in]. Since the original input range may be + // larger than [min.smoothed,max.smoothed], some output values may + // fall outside the [0,255] dynamic range. We need to clamp them. + float scale = (max[c].out - min[c].out) / (max[c].smoothed - min[c].smoothed); + for (in_val = min[c].in; in_val <= max[c].in; in_val++) { + int out_val = (in_val - min[c].smoothed) * scale + min[c].out + 0.5f; + out_val = FFMAX(out_val, 0); + out_val = FFMIN(out_val, 255); + lut[c][in_val] = out_val; + } + } + } + + // Finally, process the pixels of the input frame using the lookup tables. + for (y = 0; y < in->height; y++) { + uint8_t *inp = in->data[0] + y * in->linesize[0]; + uint8_t *outp = out->data[0] + y * out->linesize[0]; + for (x = 0; x < in->width; x++) { + for (c = 0; c < 3; c++) + outp[s->co[c]] = lut[c][inp[s->co[c]]]; + if (s->num_components == 4) + // Copy alpha as-is. + outp[s->co[3]] = inp[s->co[3]]; + inp += s->num_components; + outp += s->num_components; + } + } + + s->frame_num++; +} + +// Now we define all the functions accessible from the ff_vf_normalize class, +// which is ffmpeg's interface to our filter. See doc/filter_design.txt and +// doc/writing_filters.txt for descriptions of what these interface functions +// are expected to do. + +// Set the pixel formats that our filter supports. We should be able to process +// any 8-bit RGB formats. 16-bit support might be useful one day. +static int query_formats(AVFilterContext *ctx) +{ + static const enum AVPixelFormat pixel_fmts[] = { + AV_PIX_FMT_RGB24, + AV_PIX_FMT_BGR24, + AV_PIX_FMT_ARGB, + AV_PIX_FMT_RGBA, + AV_PIX_FMT_ABGR, + AV_PIX_FMT_BGRA, + AV_PIX_FMT_0RGB, + AV_PIX_FMT_RGB0, + AV_PIX_FMT_0BGR, + AV_PIX_FMT_BGR0, + AV_PIX_FMT_NONE + }; + // According to filter_design.txt, using ff_set_common_formats() this way + // ensures the pixel formats of the input and output will be the same. That + // saves a bit of effort possibly needing to handle format conversions. + AVFilterFormats *formats = ff_make_format_list(pixel_fmts); + if (!formats) + return AVERROR(ENOMEM); + return ff_set_common_formats(ctx, formats); +} + +// At this point we know the pixel format used for both input and output. We +// can also access the frame rate of the input video and allocate some memory +// appropriately +static int config_input(AVFilterLink *inlink) +{ + NormalizeContext *s = inlink->dst->priv; + // Store offsets to R,G,B,A bytes respectively in each pixel + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format); + int c; + + for (c = 0; c < 4; ++c) + s->co[c] = desc->comp[c].offset; + s->num_components = desc->nb_components; + // Convert smoothing value to history_len (a count of frames to average, + // must be at least 1). Currently this is a direct assignment, but the + // smoothing value was originally envisaged as a number of seconds. In + // future it would be nice to set history_len using a number of seconds, + // but VFR video is currently an obstacle to doing so. + s->history_len = s->smoothing + 1; + // Allocate the history buffers -- there are 6 -- one for each extrema. + // s->smoothing is limited to INT_MAX/8, so that (s->history_len * 6) + // can't overflow on 32bit causing a too-small allocation. + s->history_mem = av_malloc(s->history_len * 6); + if (s->history_mem == NULL) + return AVERROR(ENOMEM); + + for (c = 0; c < 3; c++) { + s->min[c].history = s->history_mem + (c*2) * s->history_len; + s->max[c].history = s->history_mem + (c*2+1) * s->history_len; + } + return 0; +} + +// Free any memory allocations here +static av_cold void uninit(AVFilterContext *ctx) +{ + NormalizeContext *s = ctx->priv; + + av_freep(&s->history_mem); +} + +// This function is pretty much standard from doc/writing_filters.txt. It +// tries to do in-place filtering where possible, only allocating a new output +// frame when absolutely necessary. +static int filter_frame(AVFilterLink *inlink, AVFrame *in) +{ + AVFilterContext *ctx = inlink->dst; + AVFilterLink *outlink = ctx->outputs[0]; + NormalizeContext *s = ctx->priv; + AVFrame *out; + // Set 'direct' if we can modify the input frame in-place. Otherwise we + // need to retrieve a new frame from the output link. + int direct = av_frame_is_writable(in) && !ctx->is_disabled; + + if (direct) { + out = in; + } else { + out = ff_get_video_buffer(outlink, outlink->w, outlink->h); + if (!out) { + av_frame_free(&in); + return AVERROR(ENOMEM); + } + av_frame_copy_props(out, in); + } + + // Now we've got the input and output frames (which may be the same frame) + // perform the filtering with our custom function. + normalize(s, in, out); + + if (ctx->is_disabled) { + av_frame_free(&out); + return ff_filter_frame(outlink, in); + } + + if (!direct) + av_frame_free(&in); + + return ff_filter_frame(outlink, out); +} + +static const AVFilterPad inputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .filter_frame = filter_frame, + .config_props = config_input, + }, + { NULL } +}; + +static const AVFilterPad outputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + }, + { NULL } +}; + +AVFilter ff_vf_normalize = { + .name = "normalize", + .description = NULL_IF_CONFIG_SMALL("Normalize RGB video."), + .priv_size = sizeof(NormalizeContext), + .priv_class = &normalize_class, + .uninit = uninit, + .query_formats = query_formats, + .inputs = inputs, + .outputs = outputs, +}; -- GitLab