diff --git a/libswscale/swscale.c b/libswscale/swscale.c index c9dfc8df2fb4b1a4718fe8596c73c2d0a8ed3839..f24561b8bfe5e18b70cd3f795dc50041a67b2cf0 100644 --- a/libswscale/swscale.c +++ b/libswscale/swscale.c @@ -2487,9 +2487,11 @@ static int swScale(SwsContext *c, const uint8_t* src[], const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<chrDstVSubSample) - 1), dstH-1)]; const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input - int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input - int lastLumSrcY2=firstLumSrcY2+ vLumFilterSize -1; // Last line needed as input - int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input + + // Last line needed as input + int lastLumSrcY = FFMIN(c->srcH, firstLumSrcY + vLumFilterSize) - 1; + int lastLumSrcY2 = FFMIN(c->srcH, firstLumSrcY2 + vLumFilterSize) - 1; + int lastChrSrcY = FFMIN(c->chrSrcH, firstChrSrcY + vChrFilterSize) - 1; int enough_lines; //handle holes (FAST_BILINEAR & weird filters) @@ -2585,6 +2587,49 @@ static int swScale(SwsContext *c, const uint8_t* src[], const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL; + + if (firstLumSrcY < 0 || firstLumSrcY + vLumFilterSize > c->srcH) { + const int16_t **tmpY = (const int16_t **) lumPixBuf + 2 * vLumBufSize; + int neg = -firstLumSrcY, i, end = FFMIN(c->srcH - firstLumSrcY, vLumFilterSize); + for (i = 0; i < neg; i++) + tmpY[i] = lumSrcPtr[neg]; + for ( ; i < end; i++) + tmpY[i] = lumSrcPtr[i]; + for ( ; i < vLumFilterSize; i++) + tmpY[i] = tmpY[i-1]; + lumSrcPtr = tmpY; + + if (alpSrcPtr) { + const int16_t **tmpA = (const int16_t **) alpPixBuf + 2 * vLumBufSize; + for (i = 0; i < neg; i++) + tmpA[i] = alpSrcPtr[neg]; + for ( ; i < end; i++) + tmpA[i] = alpSrcPtr[i]; + for ( ; i < vLumFilterSize; i++) + tmpA[i] = tmpA[i - 1]; + alpSrcPtr = tmpA; + } + } + if (firstChrSrcY < 0 || firstChrSrcY + vChrFilterSize > c->chrSrcH) { + const int16_t **tmpU = (const int16_t **) chrUPixBuf + 2 * vChrBufSize, + **tmpV = (const int16_t **) chrVPixBuf + 2 * vChrBufSize; + int neg = -firstChrSrcY, i, end = FFMIN(c->chrSrcH - firstChrSrcY, vChrFilterSize); + for (i = 0; i < neg; i++) { + tmpU[i] = chrUSrcPtr[neg]; + tmpV[i] = chrVSrcPtr[neg]; + } + for ( ; i < end; i++) { + tmpU[i] = chrUSrcPtr[i]; + tmpV[i] = chrVSrcPtr[i]; + } + for ( ; i < vChrFilterSize; i++) { + tmpU[i] = tmpU[i - 1]; + tmpV[i] = tmpV[i - 1]; + } + chrUSrcPtr = tmpU; + chrVSrcPtr = tmpV; + } + if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like const int chrSkipMask= (1<chrDstVSubSample)-1; diff --git a/libswscale/utils.c b/libswscale/utils.c index b644ed9610fa798429e8f185d82d17a47bb1391d..12b3202b16b79b795659c9a283f3e9d1af1c9bca 100644 --- a/libswscale/utils.c +++ b/libswscale/utils.c @@ -182,7 +182,7 @@ static double getSplineCoeff(double a, double b, double c, double d, double dist static int initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSize, int xInc, int srcW, int dstW, int filterAlign, int one, int flags, int cpu_flags, - SwsVector *srcFilter, SwsVector *dstFilter, double param[2]) + SwsVector *srcFilter, SwsVector *dstFilter, double param[2], int is_horizontal) { int i; int filterSize; @@ -459,27 +459,29 @@ static int initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSi //FIXME try to align filterPos if possible //fix borders - for (i=0; i srcW) { - int shift= (*filterPos)[i] + filterSize - srcW; - // move filter coefficients right to compensate for filterPos - for (j=filterSize-2; j>=0; j--) { - int right= FFMIN(j + shift, filterSize-1); - filter[i*filterSize +right] += filter[i*filterSize +j]; - filter[i*filterSize +j]=0; + if ((*filterPos)[i] + filterSize > srcW) { + int shift = (*filterPos)[i] + filterSize - srcW; + // move filter coefficients right to compensate for filterPos + for (j = filterSize - 2; j >= 0; j--) { + int right = FFMIN(j + shift, filterSize - 1); + filter[i * filterSize + right] += filter[i * filterSize + j]; + filter[i * filterSize + j ] = 0; + } + (*filterPos)[i] = srcW - filterSize; } - (*filterPos)[i]= srcW - filterSize; } } @@ -958,12 +960,12 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) if (initFilter(&c->hLumFilter, &c->hLumFilterPos, &c->hLumFilterSize, c->lumXInc, srcW , dstW, filterAlign, 1<<14, (flags&SWS_BICUBLIN) ? (flags|SWS_BICUBIC) : flags, cpu_flags, - srcFilter->lumH, dstFilter->lumH, c->param) < 0) + srcFilter->lumH, dstFilter->lumH, c->param, 1) < 0) goto fail; if (initFilter(&c->hChrFilter, &c->hChrFilterPos, &c->hChrFilterSize, c->chrXInc, c->chrSrcW, c->chrDstW, filterAlign, 1<<14, (flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags, cpu_flags, - srcFilter->chrH, dstFilter->chrH, c->param) < 0) + srcFilter->chrH, dstFilter->chrH, c->param, 1) < 0) goto fail; } } // initialize horizontal stuff @@ -978,12 +980,12 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) if (initFilter(&c->vLumFilter, &c->vLumFilterPos, &c->vLumFilterSize, c->lumYInc, srcH , dstH, filterAlign, (1<<12), (flags&SWS_BICUBLIN) ? (flags|SWS_BICUBIC) : flags, cpu_flags, - srcFilter->lumV, dstFilter->lumV, c->param) < 0) + srcFilter->lumV, dstFilter->lumV, c->param, 0) < 0) goto fail; if (initFilter(&c->vChrFilter, &c->vChrFilterPos, &c->vChrFilterSize, c->chrYInc, c->chrSrcH, c->chrDstH, filterAlign, (1<<12), (flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags, cpu_flags, - srcFilter->chrV, dstFilter->chrV, c->param) < 0) + srcFilter->chrV, dstFilter->chrV, c->param, 0) < 0) goto fail; #if HAVE_ALTIVEC @@ -1024,11 +1026,11 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) // allocate pixbufs (we use dynamic allocation because otherwise we would need to // allocate several megabytes to handle all possible cases) - FF_ALLOC_OR_GOTO(c, c->lumPixBuf, c->vLumBufSize*2*sizeof(int16_t*), fail); - FF_ALLOC_OR_GOTO(c, c->chrUPixBuf, c->vChrBufSize*2*sizeof(int16_t*), fail); - FF_ALLOC_OR_GOTO(c, c->chrVPixBuf, c->vChrBufSize*2*sizeof(int16_t*), fail); + FF_ALLOC_OR_GOTO(c, c->lumPixBuf, c->vLumBufSize*3*sizeof(int16_t*), fail); + FF_ALLOC_OR_GOTO(c, c->chrUPixBuf, c->vChrBufSize*3*sizeof(int16_t*), fail); + FF_ALLOC_OR_GOTO(c, c->chrVPixBuf, c->vChrBufSize*3*sizeof(int16_t*), fail); if (CONFIG_SWSCALE_ALPHA && isALPHA(c->srcFormat) && isALPHA(c->dstFormat)) - FF_ALLOCZ_OR_GOTO(c, c->alpPixBuf, c->vLumBufSize*2*sizeof(int16_t*), fail); + FF_ALLOCZ_OR_GOTO(c, c->alpPixBuf, c->vLumBufSize*3*sizeof(int16_t*), fail); //Note we need at least one pixel more at the end because of the MMX code (just in case someone wanna replace the 4000/8000) /* align at 16 bytes for AltiVec */ for (i=0; ivLumBufSize; i++) {