diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c index 8c4c670630e7af9e165371713a7117312ba4dc45..ec73bbab4e7bec08c4f19ab110356ee537c97e29 100644 --- a/libavcodec/dsputil.c +++ b/libavcodec/dsputil.c @@ -921,34 +921,6 @@ WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c) WRAPPER8_16_SQ(rd8x8_c, rd16_c) WRAPPER8_16_SQ(bit8x8_c, bit16_c) -/* draw the edges of width 'w' of an image of size width, height */ -// FIXME: Check that this is OK for MPEG-4 interlaced. -static void draw_edges_8_c(uint8_t *buf, int wrap, int width, int height, - int w, int h, int sides) -{ - uint8_t *ptr = buf, *last_line; - int i; - - /* left and right */ - for (i = 0; i < height; i++) { - memset(ptr - w, ptr[0], w); - memset(ptr + width, ptr[width - 1], w); - ptr += wrap; - } - - /* top and bottom + corners */ - buf -= w; - last_line = buf + (height - 1) * wrap; - if (sides & EDGE_TOP) - for (i = 0; i < h; i++) - // top - memcpy(buf - (i + 1) * wrap, buf, width + w + w); - if (sides & EDGE_BOTTOM) - for (i = 0; i < h; i++) - // bottom - memcpy(last_line + (i + 1) * wrap, last_line, width + w + w); -} - /* init static data */ av_cold void ff_dsputil_static_init(void) { @@ -1023,8 +995,6 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx) c->nsse[0] = nsse16_c; c->nsse[1] = nsse8_c; - c->draw_edges = draw_edges_8_c; - switch (avctx->bits_per_raw_sample) { case 9: case 10: diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h index 284973e1386c1a7e70a61d5dabedad2a83848b55..5a83fe0b6493b3614426eb6b2a49b8299d627b51 100644 --- a/libavcodec/dsputil.h +++ b/libavcodec/dsputil.h @@ -83,12 +83,6 @@ typedef struct DSPContext { /* (I)DCT */ void (*fdct)(int16_t *block /* align 16 */); void (*fdct248)(int16_t *block /* align 16 */); - - void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, - int w, int h, int sides); -#define EDGE_WIDTH 16 -#define EDGE_TOP 1 -#define EDGE_BOTTOM 2 } DSPContext; void ff_dsputil_static_init(void); diff --git a/libavcodec/mpegvideo.h b/libavcodec/mpegvideo.h index ba39f902c2a9d0067ad57dfab31506a7051dd3a6..ccdc0d851ce222afae4b77e2e5826057e76b24fe 100644 --- a/libavcodec/mpegvideo.h +++ b/libavcodec/mpegvideo.h @@ -80,6 +80,8 @@ enum OutputFormat { #define INPLACE_OFFSET 16 +#define EDGE_WIDTH 16 + /* Start codes. */ #define SEQ_END_CODE 0x000001b7 #define SEQ_START_CODE 0x000001b3 diff --git a/libavcodec/mpegvideo_enc.c b/libavcodec/mpegvideo_enc.c index 12440bb67a3bbe284a4dc019981c1433df670231..5ea240e84ec0eb317c5ab6f3f35c2b0311b8e43f 100644 --- a/libavcodec/mpegvideo_enc.c +++ b/libavcodec/mpegvideo_enc.c @@ -1391,18 +1391,22 @@ static void frame_end(MpegEncContext *s) const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt); int hshift = desc->log2_chroma_w; int vshift = desc->log2_chroma_h; - s->dsp.draw_edges(s->current_picture.f->data[0], s->linesize, - s->h_edge_pos, s->v_edge_pos, - EDGE_WIDTH, EDGE_WIDTH, - EDGE_TOP | EDGE_BOTTOM); - s->dsp.draw_edges(s->current_picture.f->data[1], s->uvlinesize, - s->h_edge_pos >> hshift, s->v_edge_pos >> vshift, - EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift, - EDGE_TOP | EDGE_BOTTOM); - s->dsp.draw_edges(s->current_picture.f->data[2], s->uvlinesize, - s->h_edge_pos >> hshift, s->v_edge_pos >> vshift, - EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift, - EDGE_TOP | EDGE_BOTTOM); + s->mpvencdsp.draw_edges(s->current_picture.f->data[0], s->linesize, + s->h_edge_pos, s->v_edge_pos, + EDGE_WIDTH, EDGE_WIDTH, + EDGE_TOP | EDGE_BOTTOM); + s->mpvencdsp.draw_edges(s->current_picture.f->data[1], s->uvlinesize, + s->h_edge_pos >> hshift, + s->v_edge_pos >> vshift, + EDGE_WIDTH >> hshift, + EDGE_WIDTH >> vshift, + EDGE_TOP | EDGE_BOTTOM); + s->mpvencdsp.draw_edges(s->current_picture.f->data[2], s->uvlinesize, + s->h_edge_pos >> hshift, + s->v_edge_pos >> vshift, + EDGE_WIDTH >> hshift, + EDGE_WIDTH >> vshift, + EDGE_TOP | EDGE_BOTTOM); } emms_c(); diff --git a/libavcodec/mpegvideoencdsp.c b/libavcodec/mpegvideoencdsp.c index ee6327ded9aa216232973c210d4fceb920547d07..8202034643c56df3c2092d6c3acbfe176f2a7e42 100644 --- a/libavcodec/mpegvideoencdsp.c +++ b/libavcodec/mpegvideoencdsp.c @@ -18,6 +18,7 @@ #include #include +#include #include "config.h" #include "libavutil/attributes.h" @@ -124,6 +125,34 @@ static int pix_norm1_c(uint8_t *pix, int line_size) return s; } +/* draw the edges of width 'w' of an image of size width, height */ +// FIXME: Check that this is OK for MPEG-4 interlaced. +static void draw_edges_8_c(uint8_t *buf, int wrap, int width, int height, + int w, int h, int sides) +{ + uint8_t *ptr = buf, *last_line; + int i; + + /* left and right */ + for (i = 0; i < height; i++) { + memset(ptr - w, ptr[0], w); + memset(ptr + width, ptr[width - 1], w); + ptr += wrap; + } + + /* top and bottom + corners */ + buf -= w; + last_line = buf + (height - 1) * wrap; + if (sides & EDGE_TOP) + for (i = 0; i < h; i++) + // top + memcpy(buf - (i + 1) * wrap, buf, width + w + w); + if (sides & EDGE_BOTTOM) + for (i = 0; i < h; i++) + // bottom + memcpy(last_line + (i + 1) * wrap, last_line, width + w + w); +} + av_cold void ff_mpegvideoencdsp_init(MpegvideoEncDSPContext *c, AVCodecContext *avctx) { @@ -138,6 +167,8 @@ av_cold void ff_mpegvideoencdsp_init(MpegvideoEncDSPContext *c, c->pix_sum = pix_sum_c; c->pix_norm1 = pix_norm1_c; + c->draw_edges = draw_edges_8_c; + if (ARCH_ARM) ff_mpegvideoencdsp_init_arm(c, avctx); if (ARCH_PPC) diff --git a/libavcodec/mpegvideoencdsp.h b/libavcodec/mpegvideoencdsp.h index dcbeb3cbaef99070db1dd15bf62127b72516f505..91a292a296476277cc2c906d963dd8daf8f3b07d 100644 --- a/libavcodec/mpegvideoencdsp.h +++ b/libavcodec/mpegvideoencdsp.h @@ -26,6 +26,9 @@ #define BASIS_SHIFT 16 #define RECON_SHIFT 6 +#define EDGE_TOP 1 +#define EDGE_BOTTOM 2 + typedef struct MpegvideoEncDSPContext { int (*try_8x8basis)(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale); @@ -36,6 +39,9 @@ typedef struct MpegvideoEncDSPContext { void (*shrink[4])(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height); + + void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, + int w, int h, int sides); } MpegvideoEncDSPContext; void ff_mpegvideoencdsp_init(MpegvideoEncDSPContext *c, diff --git a/libavcodec/utils.c b/libavcodec/utils.c index d6019d957f7d8b4af0ff2c168c3eac4a0c900169..3af56e9f4685eafd5e967f7334666b468e33c150 100644 --- a/libavcodec/utils.c +++ b/libavcodec/utils.c @@ -41,6 +41,7 @@ #include "avcodec.h" #include "dsputil.h" #include "libavutil/opt.h" +#include "mpegvideo.h" #include "thread.h" #include "internal.h" #include "bytestream.h" diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile index 701aad791624adce7c522c676c23f60fed1611da..58b27b5d5b9b3f2a36835524bafb29cd96bc4557 100644 --- a/libavcodec/x86/Makefile +++ b/libavcodec/x86/Makefile @@ -51,7 +51,6 @@ OBJS-$(CONFIG_VP8_DECODER) += x86/vp8dsp_init.o OBJS-$(CONFIG_VP9_DECODER) += x86/vp9dsp_init.o MMX-OBJS-$(CONFIG_AUDIODSP) += x86/audiodsp_mmx.o -MMX-OBJS-$(CONFIG_DSPUTIL) += x86/dsputil_mmx.o MMX-OBJS-$(CONFIG_ENCODERS) += x86/fdct.o MMX-OBJS-$(CONFIG_HPELDSP) += x86/fpel_mmx.o \ x86/hpeldsp_mmx.o diff --git a/libavcodec/x86/dsputil_init.c b/libavcodec/x86/dsputil_init.c index adc7aa95d67521a44c812f078379b9a42bea57e4..137f9258ad196799b1b7f0f52232255bb7599556 100644 --- a/libavcodec/x86/dsputil_init.c +++ b/libavcodec/x86/dsputil_init.c @@ -24,24 +24,9 @@ #include "libavcodec/dsputil.h" #include "dsputil_x86.h" -static av_cold void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx, - int cpu_flags, unsigned high_bit_depth) -{ -#if HAVE_MMX_INLINE - if (!high_bit_depth) { - c->draw_edges = ff_draw_edges_mmx; - } -#endif /* HAVE_MMX_INLINE */ -} - av_cold void ff_dsputil_init_x86(DSPContext *c, AVCodecContext *avctx, unsigned high_bit_depth) { - int cpu_flags = av_get_cpu_flags(); - - if (X86_MMX(cpu_flags)) - dsputil_init_mmx(c, avctx, cpu_flags, high_bit_depth); - if (CONFIG_ENCODERS) ff_dsputilenc_init_mmx(c, avctx, high_bit_depth); } diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c deleted file mode 100644 index d205a48ea4d0153be3376b4388427a17d9690b47..0000000000000000000000000000000000000000 --- a/libavcodec/x86/dsputil_mmx.c +++ /dev/null @@ -1,128 +0,0 @@ -/* - * MMX optimized DSP utils - * Copyright (c) 2000, 2001 Fabrice Bellard - * Copyright (c) 2002-2004 Michael Niedermayer - * - * MMX optimization by Nick Kurshev - * - * This file is part of Libav. - * - * Libav is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * Libav is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "config.h" -#include "libavutil/cpu.h" -#include "libavutil/x86/asm.h" -#include "dsputil_x86.h" -#include "inline_asm.h" - -#if HAVE_INLINE_ASM - -/* Draw the edges of width 'w' of an image of size width, height - * this MMX version can only handle w == 8 || w == 16. */ -void ff_draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, - int w, int h, int sides) -{ - uint8_t *ptr, *last_line; - int i; - - last_line = buf + (height - 1) * wrap; - /* left and right */ - ptr = buf; - if (w == 8) { - __asm__ volatile ( - "1: \n\t" - "movd (%0), %%mm0 \n\t" - "punpcklbw %%mm0, %%mm0 \n\t" - "punpcklwd %%mm0, %%mm0 \n\t" - "punpckldq %%mm0, %%mm0 \n\t" - "movq %%mm0, -8(%0) \n\t" - "movq -8(%0, %2), %%mm1 \n\t" - "punpckhbw %%mm1, %%mm1 \n\t" - "punpckhwd %%mm1, %%mm1 \n\t" - "punpckhdq %%mm1, %%mm1 \n\t" - "movq %%mm1, (%0, %2) \n\t" - "add %1, %0 \n\t" - "cmp %3, %0 \n\t" - "jb 1b \n\t" - : "+r" (ptr) - : "r" ((x86_reg) wrap), "r" ((x86_reg) width), - "r" (ptr + wrap * height)); - } else { - __asm__ volatile ( - "1: \n\t" - "movd (%0), %%mm0 \n\t" - "punpcklbw %%mm0, %%mm0 \n\t" - "punpcklwd %%mm0, %%mm0 \n\t" - "punpckldq %%mm0, %%mm0 \n\t" - "movq %%mm0, -8(%0) \n\t" - "movq %%mm0, -16(%0) \n\t" - "movq -8(%0, %2), %%mm1 \n\t" - "punpckhbw %%mm1, %%mm1 \n\t" - "punpckhwd %%mm1, %%mm1 \n\t" - "punpckhdq %%mm1, %%mm1 \n\t" - "movq %%mm1, (%0, %2) \n\t" - "movq %%mm1, 8(%0, %2) \n\t" - "add %1, %0 \n\t" - "cmp %3, %0 \n\t" - "jb 1b \n\t" - : "+r" (ptr) - : "r" ((x86_reg) wrap), "r" ((x86_reg) width), - "r" (ptr + wrap * height)); - } - - /* top and bottom (and hopefully also the corners) */ - if (sides & EDGE_TOP) { - for (i = 0; i < h; i += 4) { - ptr = buf - (i + 1) * wrap - w; - __asm__ volatile ( - "1: \n\t" - "movq (%1, %0), %%mm0 \n\t" - "movq %%mm0, (%0) \n\t" - "movq %%mm0, (%0, %2) \n\t" - "movq %%mm0, (%0, %2, 2) \n\t" - "movq %%mm0, (%0, %3) \n\t" - "add $8, %0 \n\t" - "cmp %4, %0 \n\t" - "jb 1b \n\t" - : "+r" (ptr) - : "r" ((x86_reg) buf - (x86_reg) ptr - w), - "r" ((x86_reg) - wrap), "r" ((x86_reg) - wrap * 3), - "r" (ptr + width + 2 * w)); - } - } - - if (sides & EDGE_BOTTOM) { - for (i = 0; i < h; i += 4) { - ptr = last_line + (i + 1) * wrap - w; - __asm__ volatile ( - "1: \n\t" - "movq (%1, %0), %%mm0 \n\t" - "movq %%mm0, (%0) \n\t" - "movq %%mm0, (%0, %2) \n\t" - "movq %%mm0, (%0, %2, 2) \n\t" - "movq %%mm0, (%0, %3) \n\t" - "add $8, %0 \n\t" - "cmp %4, %0 \n\t" - "jb 1b \n\t" - : "+r" (ptr) - : "r" ((x86_reg) last_line - (x86_reg) ptr - w), - "r" ((x86_reg) wrap), "r" ((x86_reg) wrap * 3), - "r" (ptr + width + 2 * w)); - } - } -} - -#endif /* HAVE_INLINE_ASM */ diff --git a/libavcodec/x86/dsputil_x86.h b/libavcodec/x86/dsputil_x86.h index 7e1e8af05127fe2f242f973eb2198812358d180b..b6bddf22286c748ebd638c9690e2ad1758a7e3c8 100644 --- a/libavcodec/x86/dsputil_x86.h +++ b/libavcodec/x86/dsputil_x86.h @@ -31,7 +31,4 @@ void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx, unsigned high_bit_depth); void ff_dsputil_init_pix_mmx(DSPContext *c, AVCodecContext *avctx); -void ff_draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, - int w, int h, int sides); - #endif /* AVCODEC_X86_DSPUTIL_X86_H */ diff --git a/libavcodec/x86/mpegvideoencdsp_init.c b/libavcodec/x86/mpegvideoencdsp_init.c index 4ef2f3496155d1b9518aed07033154ec51346858..7732e7307f2264277b5988f637d0b701abdcef68 100644 --- a/libavcodec/x86/mpegvideoencdsp_init.c +++ b/libavcodec/x86/mpegvideoencdsp_init.c @@ -93,6 +93,101 @@ int ff_pix_norm1_mmx(uint8_t *pix, int line_size); #undef PHADDD #endif /* HAVE_SSSE3_INLINE */ +/* Draw the edges of width 'w' of an image of size width, height + * this MMX version can only handle w == 8 || w == 16. */ +static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, + int w, int h, int sides) +{ + uint8_t *ptr, *last_line; + int i; + + last_line = buf + (height - 1) * wrap; + /* left and right */ + ptr = buf; + if (w == 8) { + __asm__ volatile ( + "1: \n\t" + "movd (%0), %%mm0 \n\t" + "punpcklbw %%mm0, %%mm0 \n\t" + "punpcklwd %%mm0, %%mm0 \n\t" + "punpckldq %%mm0, %%mm0 \n\t" + "movq %%mm0, -8(%0) \n\t" + "movq -8(%0, %2), %%mm1 \n\t" + "punpckhbw %%mm1, %%mm1 \n\t" + "punpckhwd %%mm1, %%mm1 \n\t" + "punpckhdq %%mm1, %%mm1 \n\t" + "movq %%mm1, (%0, %2) \n\t" + "add %1, %0 \n\t" + "cmp %3, %0 \n\t" + "jb 1b \n\t" + : "+r" (ptr) + : "r" ((x86_reg) wrap), "r" ((x86_reg) width), + "r" (ptr + wrap * height)); + } else { + __asm__ volatile ( + "1: \n\t" + "movd (%0), %%mm0 \n\t" + "punpcklbw %%mm0, %%mm0 \n\t" + "punpcklwd %%mm0, %%mm0 \n\t" + "punpckldq %%mm0, %%mm0 \n\t" + "movq %%mm0, -8(%0) \n\t" + "movq %%mm0, -16(%0) \n\t" + "movq -8(%0, %2), %%mm1 \n\t" + "punpckhbw %%mm1, %%mm1 \n\t" + "punpckhwd %%mm1, %%mm1 \n\t" + "punpckhdq %%mm1, %%mm1 \n\t" + "movq %%mm1, (%0, %2) \n\t" + "movq %%mm1, 8(%0, %2) \n\t" + "add %1, %0 \n\t" + "cmp %3, %0 \n\t" + "jb 1b \n\t" + : "+r" (ptr) + : "r" ((x86_reg) wrap), "r" ((x86_reg) width), + "r" (ptr + wrap * height)); + } + + /* top and bottom (and hopefully also the corners) */ + if (sides & EDGE_TOP) { + for (i = 0; i < h; i += 4) { + ptr = buf - (i + 1) * wrap - w; + __asm__ volatile ( + "1: \n\t" + "movq (%1, %0), %%mm0 \n\t" + "movq %%mm0, (%0) \n\t" + "movq %%mm0, (%0, %2) \n\t" + "movq %%mm0, (%0, %2, 2) \n\t" + "movq %%mm0, (%0, %3) \n\t" + "add $8, %0 \n\t" + "cmp %4, %0 \n\t" + "jb 1b \n\t" + : "+r" (ptr) + : "r" ((x86_reg) buf - (x86_reg) ptr - w), + "r" ((x86_reg) - wrap), "r" ((x86_reg) - wrap * 3), + "r" (ptr + width + 2 * w)); + } + } + + if (sides & EDGE_BOTTOM) { + for (i = 0; i < h; i += 4) { + ptr = last_line + (i + 1) * wrap - w; + __asm__ volatile ( + "1: \n\t" + "movq (%1, %0), %%mm0 \n\t" + "movq %%mm0, (%0) \n\t" + "movq %%mm0, (%0, %2) \n\t" + "movq %%mm0, (%0, %2, 2) \n\t" + "movq %%mm0, (%0, %3) \n\t" + "add $8, %0 \n\t" + "cmp %4, %0 \n\t" + "jb 1b \n\t" + : "+r" (ptr) + : "r" ((x86_reg) last_line - (x86_reg) ptr - w), + "r" ((x86_reg) wrap), "r" ((x86_reg) wrap * 3), + "r" (ptr + width + 2 * w)); + } + } +} + #endif /* HAVE_INLINE_ASM */ av_cold void ff_mpegvideoencdsp_init_x86(MpegvideoEncDSPContext *c, @@ -112,6 +207,10 @@ av_cold void ff_mpegvideoencdsp_init_x86(MpegvideoEncDSPContext *c, c->try_8x8basis = try_8x8basis_mmx; } c->add_8x8basis = add_8x8basis_mmx; + + if (avctx->bits_per_raw_sample <= 8) { + c->draw_edges = draw_edges_mmx; + } } if (INLINE_AMD3DNOW(cpu_flags)) {