Add VDPAU hardware accelerated decoding for H264 which can be used by

video players. Original patch by NVIDIA corporation. Originally committed as revision 16431 to svn://svn.ffmpeg.org/ffmpeg/trunk

Add VDPAU hardware accelerated decoding for H264 which can be used by
video players. Original patch by NVIDIA corporation. Originally committed as revision 16431 to svn://svn.ffmpeg.org/ffmpeg/trunk
369122dd · NVIDIA Corporation · Carl Eugen Hoyos · bc1d2afb · 369122dd · 369122dd
12 changed file
--- a/configure
+++ b/configure
@@ -975,6 +975,7 @@ h261_encoder_select="aandct"
 h263_encoder_select="aandct"
 h263p_encoder_select="aandct"
 h264_decoder_select="golomb"
+h264_vdpau_decoder_deps="vdpau"
 imc_decoder_select="fft mdct"
 jpegls_decoder_select="golomb"
 jpegls_encoder_select="golomb"

--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -100,6 +100,7 @@ OBJS-$(CONFIG_H263_ENCODER)            += mpegvideo_enc.o motion_est.o ratecontr
 OBJS-$(CONFIG_H263P_ENCODER)           += mpegvideo_enc.o motion_est.o ratecontrol.o h263.o mpeg12data.o mpegvideo.o error_resilience.o
 OBJS-$(CONFIG_H264_DECODER)            += h264.o h264idct.o h264pred.o h264_parser.o cabac.o mpegvideo.o error_resilience.o
 OBJS-$(CONFIG_H264_ENCODER)            += h264enc.o h264dspenc.o
+OBJS-$(CONFIG_H264_VDPAU_DECODER)      += vdpauvideo.o
 OBJS-$(CONFIG_HUFFYUV_DECODER)         += huffyuv.o
 OBJS-$(CONFIG_HUFFYUV_ENCODER)         += huffyuv.o
 OBJS-$(CONFIG_IDCIN_DECODER)           += idcinvideo.o

--- a/libavcodec/allcodecs.c
+++ b/libavcodec/allcodecs.c
@@ -88,6 +88,7 @@ void avcodec_register_all(void)
    REGISTER_DECODER (H263I, h263i);
    REGISTER_ENCODER (H263P, h263p);
    REGISTER_DECODER (H264, h264);
+    REGISTER_DECODER (H264_VDPAU, h264_vdpau);
    REGISTER_ENCDEC  (HUFFYUV, huffyuv);
    REGISTER_DECODER (IDCIN, idcin);
    REGISTER_DECODER (INDEO2, indeo2);

--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -191,6 +191,9 @@ enum CodecID {
    CODEC_ID_TGV,
    CODEC_ID_TGQ,

+    /* "codecs" for HW decoding with VDPAU */
+    CODEC_ID_H264_VDPAU= 0x9000,
+
    /* various PCM "codecs" */
    CODEC_ID_PCM_S16LE= 0x10000,
    CODEC_ID_PCM_S16BE,
@@ -527,6 +530,10 @@ typedef struct RcOverride{
 * This can be used to prevent truncation of the last audio samples.
 */
 #define CODEC_CAP_SMALL_LAST_FRAME 0x0040
+/**
+ * Codec can export data for HW decoding (VDPAU).
+ */
+#define CODEC_CAP_HWACCEL_VDPAU    0x0080

 //The following defines may change, don't expect compatibility if you use them.
 #define MB_TYPE_INTRA4x4   0x0001

--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -33,6 +33,7 @@
 #include "h264_parser.h"
 #include "golomb.h"
 #include "rectangle.h"
+#include "vdpau_internal.h"

 #include "cabac.h"
 #ifdef ARCH_X86
@@ -2188,6 +2189,8 @@ static av_cold int decode_init(AVCodecContext *avctx){

    if(avctx->codec_id == CODEC_ID_SVQ3)
        avctx->pix_fmt= PIX_FMT_YUVJ420P;
+    else if(avctx->codec_id == CODEC_ID_H264_VDPAU)
+        avctx->pix_fmt= PIX_FMT_VDPAU_H264;
    else
        avctx->pix_fmt= PIX_FMT_YUV420P;

@@ -7289,6 +7292,8 @@ static void execute_decode_slices(H264Context *h, int context_count){
    H264Context *hx;
    int i;

+    if(avctx->codec_id == CODEC_ID_H264_VDPAU)
+        return;
    if(context_count == 1) {
        decode_slice(avctx, &h);
    } else {
@@ -7416,8 +7421,14 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
               && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
               && (avctx->skip_frame < AVDISCARD_BIDIR  || hx->slice_type_nos!=FF_B_TYPE)
               && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
-               && avctx->skip_frame < AVDISCARD_ALL)
+               && avctx->skip_frame < AVDISCARD_ALL){
+                if(ENABLE_H264_VDPAU_DECODER && avctx->codec_id == CODEC_ID_H264_VDPAU){
+                    static const uint8_t start_code[] = {0x00, 0x00, 0x01};
+                    ff_VDPAU_h264_add_data_chunk(h, start_code, sizeof(start_code));
+                    ff_VDPAU_h264_add_data_chunk(h, &buf[buf_index - consumed], consumed );
+                }else
                context_count++;
+            }
            break;
        case NAL_DPA:
            init_get_bits(&hx->s.gb, ptr, bit_length);
@@ -7620,6 +7631,9 @@ static int decode_frame(AVCodecContext *avctx,
        h->prev_frame_num_offset= h->frame_num_offset;
        h->prev_frame_num= h->frame_num;

+        if (ENABLE_H264_VDPAU_DECODER && avctx->codec_id == CODEC_ID_H264_VDPAU)
+            ff_VDPAU_h264_picture_complete(h);
+
        /*
         * FIXME: Error handling code does not seem to support interlaced
         * when slices span multiple rows
@@ -7632,7 +7646,7 @@ static int decode_frame(AVCodecContext *avctx,
         * past end by one (callers fault) and resync_mb_y != 0
         * causes problems for the first MB line, too.
         */
-        if (!FIELD_PICTURE)
+        if (!avctx->codec_id == CODEC_ID_H264_VDPAU && !FIELD_PICTURE)
            ff_er_frame_end(s);

        MPV_frame_end(s);
@@ -8005,4 +8019,20 @@ AVCodec h264_decoder = {
    .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
 };

+#ifdef CONFIG_H264_VDPAU_DECODER
+AVCodec h264_vdpau_decoder = {
+    "h264_vdpau",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_H264_VDPAU,
+    sizeof(H264Context),
+    decode_init,
+    NULL,
+    decode_end,
+    decode_frame,
+    CODEC_CAP_DR1 | CODEC_CAP_DELAY | CODEC_CAP_HWACCEL_VDPAU,
+    .flush= flush_dpb,
+    .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10 (VDPAU acceleration)"),
+};
+#endif
+
 #include "svq3.c"
--- a/libavcodec/h264_parser.c
+++ b/libavcodec/h264_parser.c
@@ -161,7 +161,8 @@ static void close(AVCodecParserContext *s)


 AVCodecParser h264_parser = {
-    { CODEC_ID_H264 },
+    { CODEC_ID_H264,
+      CODEC_ID_H264_VDPAU },
    sizeof(H264Context),
    NULL,
    h264_parse,

--- a/libavcodec/imgconvert.c
+++ b/libavcodec/imgconvert.c
@@ -267,6 +267,9 @@ static const PixFmtInfo pix_fmt_info[PIX_FMT_NB] = {
    [PIX_FMT_XVMC_MPEG2_IDCT] = {
        .name = "xvmcidct",
    },
+    [PIX_FMT_VDPAU_H264] = {
+        .name = "vdpau_h264",
+    },
    [PIX_FMT_UYYVYY411] = {
        .name = "uyyvyy411",
        .nb_channels = 1,

--- a/libavcodec/mpegvideo.c
+++ b/libavcodec/mpegvideo.c
@@ -957,7 +957,11 @@ void MPV_frame_end(MpegEncContext *s)
        XVMC_field_end(s);
    }else
 #endif
-    if(s->unrestricted_mv && s->current_picture.reference && !s->intra_only && !(s->flags&CODEC_FLAG_EMU_EDGE)) {
+    if(!(s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
+       && s->unrestricted_mv
+       && s->current_picture.reference
+       && !s->intra_only
+       && !(s->flags&CODEC_FLAG_EMU_EDGE)) {
            s->dsp.draw_edges(s->current_picture.data[0], s->linesize  , s->h_edge_pos   , s->v_edge_pos   , EDGE_WIDTH  );
            s->dsp.draw_edges(s->current_picture.data[1], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
            s->dsp.draw_edges(s->current_picture.data[2], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);

--- a/libavcodec/vdpau_internal.h
+++ b/libavcodec/vdpau_internal.h
+/*
+ * Video Decode and Presentation API for UNIX (VDPAU) is used for
+ * HW decode acceleration for MPEG-1/2, H.264 and VC-1.
+ *
+ * Copyright (C) 2008 NVIDIA.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_VDPAU_INTERNAL_H
+#define AVCODEC_VDPAU_INTERNAL_H
+
+void ff_VDPAU_h264_add_data_chunk(H264Context *h, const uint8_t *buf,
+                                  int buf_size);
+void ff_VDPAU_h264_picture_complete(H264Context *h);
+
+#endif /* AVCODEC_VDPAU_INTERNAL_H */
--- a/libavcodec/vdpau_render.h
+++ b/libavcodec/vdpau_render.h
+/*
+ * Video Decode and Presentation API for UNIX (VDPAU) is used for
+ * HW decode acceleration for MPEG-1/2, H.264 and VC-1.
+ *
+ * Copyright (C) 2008 NVIDIA.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_VDPAU_RENDER_H
+#define AVCODEC_VDPAU_RENDER_H
+
+/**
+ * \defgroup Decoder VDPAU Decoder and Renderer
+ *
+ * VDPAU HW acceleration has two modules
+ * - VDPAU Decoding
+ * - VDPAU Presentation
+ *
+ * VDPAU decoding module parses all headers using FFmpeg
+ * parsing mechanism and uses VDPAU for the actual decoding.
+ *
+ * As per the current implementation, the actual decoding
+ * and rendering (API calls) are done as part of VDPAU
+ * presentation (vo_vdpau.c) module.
+ *
+ * @{
+ * \defgroup  VDPAU_Decoding VDPAU Decoding
+ * \ingroup Decoder
+ * @{
+ */
+
+#include "vdpau/vdpau.h"
+#include "vdpau/vdpau_x11.h"
+
+/**
+ * \brief The videoSurface is used for render.
+ */
+#define FF_VDPAU_STATE_USED_FOR_RENDER 1
+
+/**
+ * \brief The videoSurface is needed for reference/prediction,
+ * codec manipulates this.
+ */
+#define FF_VDPAU_STATE_USED_FOR_REFERENCE 2
+
+/**
+ * \brief This structure is used as a CALL-BACK between the ffmpeg
+ * decoder (vd_) and presentation (vo_) module.
+ * This is used for defining a video-frame containing surface,
+ * picture-parameter, bitstream informations etc which are passed
+ * between ffmpeg decoder and its clients.
+ */
+struct vdpau_render_state
+    VdpVideoSurface surface; ///< used as rendered surface, never changed.
+
+    int state; ///< Holds FF_VDPAU_STATE_* values
+
+    /** Picture Parameter information for all supported codecs */
+    union _VdpPictureInfo {
+        VdpPictureInfoH264     h264;
+    } info;
+
+    /** Describe size/location of the compressed video data */
+    int bitstreamBuffersAlloced;
+    int bitstreamBuffersUsed;
+    VdpBitstreamBuffer *bitstreamBuffers;
+};
+
+/* @}*/
+
+#endif /* AVCODEC_VDPAU_RENDER_H */
--- a/libavcodec/vdpauvideo.c
+++ b/libavcodec/vdpauvideo.c
+/*
+ * Video Decode and Presentation API for UNIX (VDPAU) is used for
+ * HW decode acceleration for MPEG-1/2, H.264 and VC-1.
+ *
+ * Copyright (c) 2008 NVIDIA.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <limits.h>
+#include "avcodec.h"
+#include "h264.h"
+
+#undef NDEBUG
+#include <assert.h>
+
+#include "vdpau_render.h"
+#include "vdpau_internal.h"
+
+/**
+ * \addtogroup VDPAU_Decoding
+ *
+ * @{
+ */
+
+static void VDPAU_h264_set_reference_frames(H264Context *h)
+{
+    MpegEncContext * s = &h->s;
+    struct vdpau_render_state * render, * render_ref;
+    VdpReferenceFrameH264 * rf, * rf2;
+    Picture * pic;
+    int i, list, pic_frame_idx;
+
+    render = (struct vdpau_render_state*)s->current_picture_ptr->data[0];
+    assert(render);
+
+    rf = &render->info.h264.referenceFrames[0];
+#define H264_RF_COUNT FF_ARRAY_ELEMS(render->info.h264.referenceFrames)
+
+    for (list = 0; list < 2; ++list) {
+        Picture **lp = list ? h->long_ref : h->short_ref;
+        int ls = list ? h->long_ref_count : h->short_ref_count;
+
+        for (i = 0; i < ls; ++i) {
+            pic = lp[i];
+            if (!pic || !pic->reference)
+                continue;
+            pic_frame_idx = pic->long_ref ? pic->pic_id : pic->frame_num;
+
+            render_ref = (struct vdpau_render_state*)pic->data[0];
+            assert(render_ref);
+
+            rf2 = &render->info.h264.referenceFrames[0];
+            while (rf2 != rf) {
+                if (
+                    (rf2->surface == render_ref->surface)
+                    && (rf2->is_long_term == pic->long_ref)
+                    && (rf2->frame_idx == pic_frame_idx)
+                )
+                    break;
+                ++rf2;
+            }
+            if (rf2 != rf) {
+                rf2->top_is_reference    |= (pic->reference & PICT_TOP_FIELD)    ? VDP_TRUE : VDP_FALSE;
+                rf2->bottom_is_reference |= (pic->reference & PICT_BOTTOM_FIELD) ? VDP_TRUE : VDP_FALSE;
+                continue;
+            }
+
+            if (rf >= &render->info.h264.referenceFrames[H264_RF_COUNT])
+                continue;
+
+            rf->surface             = render_ref->surface;
+            rf->is_long_term        = pic->long_ref;
+            rf->top_is_reference    = (pic->reference & PICT_TOP_FIELD)    ? VDP_TRUE : VDP_FALSE;
+            rf->bottom_is_reference = (pic->reference & PICT_BOTTOM_FIELD) ? VDP_TRUE : VDP_FALSE;
+            rf->field_order_cnt[0]  = pic->field_poc[0];
+            rf->field_order_cnt[1]  = pic->field_poc[1];
+            rf->frame_idx           = pic_frame_idx;
+
+            ++rf;
+        }
+    }
+
+    for (; rf < &render->info.h264.referenceFrames[H264_RF_COUNT]; ++rf) {
+        rf->surface             = VDP_INVALID_HANDLE;
+        rf->is_long_term        = 0;
+        rf->top_is_reference    = 0;
+        rf->bottom_is_reference = 0;
+        rf->field_order_cnt[0]  = 0;
+        rf->field_order_cnt[1]  = 0;
+        rf->frame_idx           = 0;
+    }
+}
+
+void ff_VDPAU_h264_add_data_chunk(H264Context *h, const uint8_t *buf, int buf_size)
+{
+    MpegEncContext * s = &h->s;
+    struct vdpau_render_state * render;
+
+    render = (struct vdpau_render_state*)s->current_picture_ptr->data[0];
+    assert(render);
+
+    if (!render->bitstreamBuffersUsed)
+        VDPAU_h264_set_reference_frames(h);
+
+    render->bitstreamBuffers= av_fast_realloc(
+        render->bitstreamBuffers,
+        &render->bitstreamBuffersAlloced,
+        sizeof(*render->bitstreamBuffers)*(render->bitstreamBuffersUsed + 1)
+    );
+
+    render->bitstreamBuffers[render->bitstreamBuffersUsed].struct_version  = VDP_BITSTREAM_BUFFER_VERSION;
+    render->bitstreamBuffers[render->bitstreamBuffersUsed].bitstream       = buf;
+    render->bitstreamBuffers[render->bitstreamBuffersUsed].bitstream_bytes = buf_size;
+    render->bitstreamBuffersUsed++;
+}
+
+void ff_VDPAU_h264_picture_complete(H264Context *h)
+{
+    MpegEncContext * s = &h->s;
+    struct vdpau_render_state * render;
+
+    render = (struct vdpau_render_state*)s->current_picture_ptr->data[0];
+    assert(render);
+
+    render->info.h264.slice_count = h->slice_num;
+    if (render->info.h264.slice_count < 1)
+        return;
+
+    for (int i = 0; i < 2; ++i) {
+        int foc = s->current_picture_ptr->field_poc[i];
+        if (foc == INT_MAX)
+            foc = 0;
+        render->info.h264.field_order_cnt[i] = foc;
+    }
+
+    render->info.h264.is_reference                           = s->current_picture_ptr->reference ? VDP_TRUE : VDP_FALSE;
+    render->info.h264.frame_num                              = h->frame_num;
+    render->info.h264.field_pic_flag                         = s->picture_structure != PICT_FRAME;
+    render->info.h264.bottom_field_flag                      = s->picture_structure == PICT_BOTTOM_FIELD;
+    render->info.h264.num_ref_frames                         = h->sps.ref_frame_count;
+    render->info.h264.mb_adaptive_frame_field_flag           = h->sps.mb_aff;
+    render->info.h264.constrained_intra_pred_flag            = h->pps.constrained_intra_pred;
+    render->info.h264.weighted_pred_flag                     = h->pps.weighted_pred;
+    render->info.h264.weighted_bipred_idc                    = h->pps.weighted_bipred_idc;
+    render->info.h264.frame_mbs_only_flag                    = h->sps.frame_mbs_only_flag;
+    render->info.h264.transform_8x8_mode_flag                = h->pps.transform_8x8_mode;
+    render->info.h264.chroma_qp_index_offset                 = h->pps.chroma_qp_index_offset[0];
+    render->info.h264.second_chroma_qp_index_offset          = h->pps.chroma_qp_index_offset[1];
+    render->info.h264.pic_init_qp_minus26                    = h->pps.init_qp - 26;
+    render->info.h264.num_ref_idx_l0_active_minus1           = h->pps.ref_count[0] - 1;
+    render->info.h264.num_ref_idx_l1_active_minus1           = h->pps.ref_count[1] - 1;
+    render->info.h264.log2_max_frame_num_minus4              = h->sps.log2_max_frame_num - 4;
+    render->info.h264.pic_order_cnt_type                     = h->sps.poc_type;
+    render->info.h264.log2_max_pic_order_cnt_lsb_minus4      = h->sps.log2_max_poc_lsb - 4;
+    render->info.h264.delta_pic_order_always_zero_flag       = h->sps.delta_pic_order_always_zero_flag;
+    render->info.h264.direct_8x8_inference_flag              = h->sps.direct_8x8_inference_flag;
+    render->info.h264.entropy_coding_mode_flag               = h->pps.cabac;
+    render->info.h264.pic_order_present_flag                 = h->pps.pic_order_present;
+    render->info.h264.deblocking_filter_control_present_flag = h->pps.deblocking_filter_parameters_present;
+    render->info.h264.redundant_pic_cnt_present_flag         = h->pps.redundant_pic_cnt_present;
+    memcpy(render->info.h264.scaling_lists_4x4, h->pps.scaling_matrix4, sizeof(render->info.h264.scaling_lists_4x4));
+    memcpy(render->info.h264.scaling_lists_8x8, h->pps.scaling_matrix8, sizeof(render->info.h264.scaling_lists_8x8));
+
+    ff_draw_horiz_band(s, 0, s->avctx->height);
+    render->bitstreamBuffersUsed = 0;
+}
+
+/* @}*/
--- a/libavutil/avutil.h
+++ b/libavutil/avutil.h
@@ -121,6 +121,7 @@ enum PixelFormat {
    PIX_FMT_YUV440P,   ///< Planar YUV 4:4:0 (1 Cr & Cb sample per 1x2 Y samples)
    PIX_FMT_YUVJ440P,  ///< Planar YUV 4:4:0 full scale (jpeg)
    PIX_FMT_YUVA420P,  ///< Planar YUV 4:2:0, 20bpp, (1 Cr & Cb sample per 2x2 Y & A samples)
+    PIX_FMT_VDPAU_H264,///< H264 HW decoding with VDPAU, data[0] contains a vdpau_render_state struct which contains the bitstream of the slices as well as various fields extracted from headers
    PIX_FMT_NB,        ///< number of pixel formats, DO NOT USE THIS if you want to link with shared libav* because the number of formats might differ between versions
 };