diff --git a/libavcodec/cuviddec.c b/libavcodec/cuviddec.c index 03589367cea683d47961f09c48fc632e4c535a15..291bb93dbc682ebcf385374aed92e0b838d32116 100644 --- a/libavcodec/cuviddec.c +++ b/libavcodec/cuviddec.c @@ -34,8 +34,14 @@ #include "avcodec.h" #include "decode.h" #include "hwaccel.h" +#include "nvdec.h" #include "internal.h" +#if !NVDECAPI_CHECK_VERSION(9, 0) +#define cudaVideoSurfaceFormat_YUV444 2 +#define cudaVideoSurfaceFormat_YUV444_16Bit 3 +#endif + typedef struct CuvidContext { AVClass *avclass; @@ -106,6 +112,7 @@ static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form CUVIDDECODECAPS *caps = NULL; CUVIDDECODECREATEINFO cuinfo; int surface_fmt; + int chroma_444; int old_width = avctx->width; int old_height = avctx->height; @@ -148,17 +155,19 @@ static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form cuinfo.target_rect.right = cuinfo.ulTargetWidth; cuinfo.target_rect.bottom = cuinfo.ulTargetHeight; + chroma_444 = format->chroma_format == cudaVideoChromaFormat_444; + switch (format->bit_depth_luma_minus8) { case 0: // 8-bit - pix_fmts[1] = AV_PIX_FMT_NV12; + pix_fmts[1] = chroma_444 ? AV_PIX_FMT_YUV444P : AV_PIX_FMT_NV12; caps = &ctx->caps8; break; case 2: // 10-bit - pix_fmts[1] = AV_PIX_FMT_P010; + pix_fmts[1] = chroma_444 ? AV_PIX_FMT_YUV444P16 : AV_PIX_FMT_P010; caps = &ctx->caps10; break; case 4: // 12-bit - pix_fmts[1] = AV_PIX_FMT_P016; + pix_fmts[1] = chroma_444 ? AV_PIX_FMT_YUV444P16 : AV_PIX_FMT_P016; caps = &ctx->caps12; break; default: @@ -261,12 +270,6 @@ static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form return 0; } - if (format->chroma_format != cudaVideoChromaFormat_420) { - av_log(avctx, AV_LOG_ERROR, "Chroma formats other than 420 are not supported\n"); - ctx->internal_error = AVERROR(EINVAL); - return 0; - } - ctx->chroma_format = format->chroma_format; cuinfo.CodecType = ctx->codec_type = format->codec; @@ -280,8 +283,15 @@ static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form case AV_PIX_FMT_P016: cuinfo.OutputFormat = cudaVideoSurfaceFormat_P016; break; + case AV_PIX_FMT_YUV444P: + cuinfo.OutputFormat = cudaVideoSurfaceFormat_YUV444; + break; + case AV_PIX_FMT_YUV444P16: + cuinfo.OutputFormat = cudaVideoSurfaceFormat_YUV444_16Bit; + break; default: - av_log(avctx, AV_LOG_ERROR, "Output formats other than NV12, P010 or P016 are not supported\n"); + av_log(avctx, AV_LOG_ERROR, "Unsupported output format: %s\n", + av_get_pix_fmt_name(avctx->sw_pix_fmt)); ctx->internal_error = AVERROR(EINVAL); return 0; } @@ -490,6 +500,7 @@ static int cuvid_output_frame(AVCodecContext *avctx, AVFrame *frame) return ret; if (av_fifo_size(ctx->frame_queue)) { + const AVPixFmtDescriptor *pixdesc; CuvidParsedFrame parsed_frame; CUVIDPROCPARAMS params; unsigned int pitch = 0; @@ -520,7 +531,10 @@ static int cuvid_output_frame(AVCodecContext *avctx, AVFrame *frame) goto error; } - for (i = 0; i < 2; i++) { + pixdesc = av_pix_fmt_desc_get(avctx->sw_pix_fmt); + + for (i = 0; i < pixdesc->nb_components; i++) { + int height = avctx->height >> (i ? pixdesc->log2_chroma_h : 0); CUDA_MEMCPY2D cpy = { .srcMemoryType = CU_MEMORYTYPE_DEVICE, .dstMemoryType = CU_MEMORYTYPE_DEVICE, @@ -530,22 +544,25 @@ static int cuvid_output_frame(AVCodecContext *avctx, AVFrame *frame) .dstPitch = frame->linesize[i], .srcY = offset, .WidthInBytes = FFMIN(pitch, frame->linesize[i]), - .Height = avctx->height >> (i ? 1 : 0), + .Height = height, }; ret = CHECK_CU(ctx->cudl->cuMemcpy2DAsync(&cpy, device_hwctx->stream)); if (ret < 0) goto error; - offset += avctx->height; + offset += height; } ret = CHECK_CU(ctx->cudl->cuStreamSynchronize(device_hwctx->stream)); if (ret < 0) goto error; - } else if (avctx->pix_fmt == AV_PIX_FMT_NV12 || - avctx->pix_fmt == AV_PIX_FMT_P010 || - avctx->pix_fmt == AV_PIX_FMT_P016) { + } else if (avctx->pix_fmt == AV_PIX_FMT_NV12 || + avctx->pix_fmt == AV_PIX_FMT_P010 || + avctx->pix_fmt == AV_PIX_FMT_P016 || + avctx->pix_fmt == AV_PIX_FMT_YUV444P || + avctx->pix_fmt == AV_PIX_FMT_YUV444P16) { + unsigned int offset = 0; AVFrame *tmp_frame = av_frame_alloc(); if (!tmp_frame) { av_log(avctx, AV_LOG_ERROR, "av_frame_alloc failed\n"); @@ -553,15 +570,24 @@ static int cuvid_output_frame(AVCodecContext *avctx, AVFrame *frame) goto error; } + pixdesc = av_pix_fmt_desc_get(avctx->sw_pix_fmt); + tmp_frame->format = AV_PIX_FMT_CUDA; tmp_frame->hw_frames_ctx = av_buffer_ref(ctx->hwframe); - tmp_frame->data[0] = (uint8_t*)mapped_frame; - tmp_frame->linesize[0] = pitch; - tmp_frame->data[1] = (uint8_t*)(mapped_frame + avctx->height * pitch); - tmp_frame->linesize[1] = pitch; tmp_frame->width = avctx->width; tmp_frame->height = avctx->height; + /* + * Note that the following logic would not work for three plane + * YUV420 because the pitch value is different for the chroma + * planes. + */ + for (i = 0; i < pixdesc->nb_components; i++) { + tmp_frame->data[i] = (uint8_t*)mapped_frame + offset; + tmp_frame->linesize[i] = pitch; + offset += pitch * (avctx->height >> (i ? pixdesc->log2_chroma_h : 0)); + } + ret = ff_get_buffer(avctx, frame, 0); if (ret < 0) { av_log(avctx, AV_LOG_ERROR, "ff_get_buffer failed\n");