[FFmpeg-trac] #3451(swscale:new): sws_scale crashes in high resolutions when using mmx optimization
FFmpeg
trac at avcodec.org
Thu Mar 13 10:09:14 CET 2014
#3451: sws_scale crashes in high resolutions when using mmx optimization
-------------------------------------+-----------------------------------
Reporter: miro82 | Owner:
Type: defect | Status: new
Priority: normal | Component: swscale
Version: unspecified | Resolution:
Keywords: crash | Blocked By:
Blocking: | Reproduced by developer: 0
Analyzed by developer: 0 |
-------------------------------------+-----------------------------------
Comment (by miro82):
I discovered that --enable-shared must be set in order to trigger the
crash. This time FFmpeg was configured and build with:
{{{
./configure --disable-yasm --disable-iconv --enable-libx265 --enable-
libx264 --enable-gpl --enable-shared --disable-stripping
}}}
Debug output from the application:
{{{
Miroslavs-MacBook-Pro:bin miran46$ lldb mmx_test2
Current executable set to 'mmx_test2' (x86_64).
(lldb) r
Process 29388 launched:
'/Users/miran46/code/projects/FFMpegCapture/mmx_test2_build/bin/mmx_test2'
(x86_64)
x265 [info]: using cpu capabilities: MMX2 SSE2Fast SSSE3 SSE4.2 AVX AVX2
FMA3 LZCNT BMI2
x265 [info]: Main profile, Level-5.1 (High tier)
x265 [info]: WPP streams / pool / frames : 34 / 4 / 1
x265 [info]: CU size : 64
x265 [info]: Max RQT depth inter / intra : 1 / 1
x265 [info]: ME / range / subpel / merge : hex / 57 / 2 / 2
x265 [info]: Keyframe min / max / scenecut : 25 / 250 / 40
x265 [info]: Lookahead / bframes / badapt : 20 / 4 / 2
x265 [info]: b-pyramid / weightp / refs : 1 / 1 / 3
x265 [info]: Rate Control / AQ-Strength / CUTree : ABR-104857 kbps / 1.0 /
1
x265 [info]: tools: rect amp rd=3 lft sao-lcu sign-hide
Process 29388 stopped
* thread #1: tid = 0x14f790, 0x000000010112e8b8
libswscale.2.dylib`rgb24toyv12_mmxext(src=0x000000010c396300,
ydst=0x000000010a000000, udst=0x000000010a7e9000, vdst=0x000000010abdd080,
width=3840, height=2160, lumStride=<unavailable>,
chromStride=<unavailable>, srcStride=<unavailable>,
rgb2yuv=0x0000000109805340) + 440 at rgb2rgb_template.c:1629, queue =
'com.apple.main-thread', stop reason = EXC_BAD_ACCESS (code=1,
address=0x10c399000)
frame #0: 0x000000010112e8b8
libswscale.2.dylib`rgb24toyv12_mmxext(src=0x000000010c396300,
ydst=0x000000010a000000, udst=0x000000010a7e9000, vdst=0x000000010abdd080,
width=3840, height=2160, lumStride=<unavailable>,
chromStride=<unavailable>, srcStride=<unavailable>,
rgb2yuv=0x0000000109805340) + 440 at rgb2rgb_template.c:1629
1626 for (y=0; y<height-2; y+=2) {
1627 int i;
1628 for (i=0; i<2; i++) {
-> 1629 __asm__ volatile(
1630 "mov %2, %%"REG_a" \n\t"
1631 "movq "BGR2Y_IDX"(%3), %%mm6 \n\t"
1632 "movq "MANGLE(ff_w1111)", %%mm5 \n\t"
(lldb) bt
* thread #1: tid = 0x14f790, 0x000000010112e8b8
libswscale.2.dylib`rgb24toyv12_mmxext(src=0x000000010c396300,
ydst=0x000000010a000000, udst=0x000000010a7e9000, vdst=0x000000010abdd080,
width=3840, height=2160, lumStride=<unavailable>,
chromStride=<unavailable>, srcStride=<unavailable>,
rgb2yuv=0x0000000109805340) + 440 at rgb2rgb_template.c:1629, queue =
'com.apple.main-thread', stop reason = EXC_BAD_ACCESS (code=1,
address=0x10c399000)
* frame #0: 0x000000010112e8b8
libswscale.2.dylib`rgb24toyv12_mmxext(src=0x000000010c396300,
ydst=0x000000010a000000, udst=0x000000010a7e9000, vdst=0x000000010abdd080,
width=3840, height=2160, lumStride=<unavailable>,
chromStride=<unavailable>, srcStride=<unavailable>,
rgb2yuv=0x0000000109805340) + 440 at rgb2rgb_template.c:1629
frame #1: 0x0000000101116b49
libswscale.2.dylib`bgr24ToYv12Wrapper(c=0x0000000109801200,
src=<unavailable>, srcStride=<unavailable>, srcSliceY=0, srcSliceH=2160,
dst=0x00007fff5fbffa00, dstStride=0x00007fff5fbff9e0) + 137 at
swscale_unscaled.c:1314
frame #2: 0x00000001011136b7
libswscale.2.dylib`sws_scale(c=<unavailable>, srcSlice=<unavailable>,
srcStride=<unavailable>, srcSliceY=<unavailable>, srcSliceH=<unavailable>,
dst=<unavailable>, dstStride=<unavailable>) + 2919 at swscale.c:1101
frame #3: 0x0000000100004894 mmx_test2`Encoder::addFrame(unsigned
char*) + 212
frame #4: 0x0000000100003769 mmx_test2`main + 345
(lldb) disassemble --pc
libswscale.2.dylib`rgb24toyv12_mmxext + 440 at rgb2rgb_template.c:1629:
-> 0x10112e8b8: movd 0x15(%rbx,%rdx), %mm3
0x10112e8bd: punpcklbw %mm7, %mm2
0x10112e8c0: punpcklbw %mm7, %mm3
0x10112e8c3: pmaddwd %mm6, %mm4
(lldb) info all-registers
error: 'info' is not a valid command.
(lldb) register read --all
General Purpose Registers:
rax = 0xfffffffffffffff8
rbx = 0x000000010c399000
rcx = 0x0000000000000000
rdx = 0xffffffffffffffe8
rdi = 0x000000000000086c
rsi = 0x0000000000000f00
rbp = 0x0000000000000780
rsp = 0x00007fff5fbff8b0
r8 = 0x0000000000000f00
r9 = 0x0000000000002d00
r10 = 0x0000000000000000
r11 = 0x000000010a000f00
r12 = 0x0000000109805340
r13 = 0x000000010a7e9000
r14 = 0xfffffffffffff100
r15 = 0x000000010c396300
rip = 0x000000010112e8b8 libswscale.2.dylib`rgb24toyv12_mmxext +
440 at rgb2rgb_template.c:1629
rflags = 0x0000000000010282
cs = 0x000000000000002b
fs = 0x00000000ffff0000
gs = 0x00000000ffff0000
eax = 0xfffffff8
ebx = 0x0c399000
ecx = 0x00000000
edx = 0xffffffe8
edi = 0x0000086c
esi = 0x00000f00
ebp = 0x00000780
esp = 0x5fbff8b0
r8d = 0x00000f00
r9d = 0x00002d00
r10d = 0x00000000
r11d = 0x0a000f00
r12d = 0x09805340
r13d = 0x0a7e9000
r14d = 0xfffff100
r15d = 0x0c396300
ax = 0xfff8
bx = 0x9000
cx = 0x0000
dx = 0xffe8
di = 0x086c
si = 0x0f00
bp = 0x0780
sp = 0xf8b0
r8w = 0x0f00
r9w = 0x2d00
r10w = 0x0000
r11w = 0x0f00
r12w = 0x5340
r13w = 0x9000
r14w = 0xf100
r15w = 0x6300
ah = 0xff
bh = 0x90
ch = 0x00
dh = 0xff
al = 0xf8
bl = 0x00
cl = 0x00
dl = 0xe8
dil = 0x6c
sil = 0x00
bpl = 0x80
spl = 0xb0
r8l = 0x00
r9l = 0x00
r10l = 0x00
r11l = 0x00
r12l = 0x40
r13l = 0x00
r14l = 0x00
r15l = 0x00
Floating Point Registers:
fctrl = 0x037f
fstat = 0x0000
ftag = 0xff
fop = 0x0000
fioff = 0x00000000
fiseg = 0x0000
fooff = 0x00000000
foseg = 0x0000
mxcsr = 0x00001fa0
mxcsrmask = 0x0000ffff
stmm0 = {0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0xff 0xff}
stmm1 = {0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0xff 0xff}
stmm2 = {0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0xff 0xff}
stmm3 = {0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0xff 0xff}
stmm4 = {0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0xff 0xff}
stmm5 = {0x01 0x00 0x01 0x00 0x01 0x00 0x01 0x00 0xff 0xff}
stmm6 = {0x88 0x0c 0x87 0x40 0xde 0x20 0x00 0x00 0xff 0xff}
stmm7 = {0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0xff 0xff}
ymm0 = {0x00 0x0f 0x00 0x00 0x80 0x07 0x00 0x00 0x80 0x07 0x00 0x00
0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
0x00 0x00 0x00 0x00 0x00}
ymm1 = {0x00 0x2d 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
0x00 0x00 0x00 0x00 0x00}
ymm2 = {0x00 0x00 0x00 0x00 0x00 0x00 0xe0 0x43 0x00 0x00 0x00 0x00
0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
0x00 0x00 0x00 0x00 0x00}
ymm3 = {0x66 0xa9 0x49 0x15 0x00 0x00 0x00 0x10 0xdf 0x00 0x00 0x00
0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
0x00 0x00 0x00 0x00 0x00}
ymm4 = {0x6b 0xae 0x54 0x16 0x00 0x00 0x00 0x10 0x30 0x01 0x00 0x00
0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
0x00 0x00 0x00 0x00 0x00}
ymm5 = {0x00 0x00 0x00 0x00 0x00 0x00 0xf0 0x3f 0x00 0x00 0x00 0x00
0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
0x00 0x00 0x00 0x00 0x00}
ymm6 = {0xae 0x72 0x46 0xe8 0x8f 0x1d 0xe4 0x3f 0x00 0x00 0x00 0x00
0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
0x00 0x00 0x00 0x00 0x00}
ymm7 = {0x6b 0xc8 0xb8 0xbe 0xd3 0xb9 0x0b 0x40 0x00 0x00 0x00 0x00
0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
0x00 0x00 0x00 0x00 0x00}
ymm8 = {0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
0x00 0x00 0x00 0x00 0x00}
ymm9 = {0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
0x00 0x00 0x00 0x00 0x00}
ymm10 = {0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
0x00 0x00 0x00 0x00 0x00}
ymm11 = {0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
0x00 0x00 0x00 0x00 0x00}
ymm12 = {0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
0x00 0x00 0x00 0x00 0x00}
ymm13 = {0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
0x00 0x00 0x00 0x00 0x00}
ymm14 = {0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
0x00 0x00 0x00 0x00 0x00}
ymm15 = {0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
0x00 0x00 0x00 0x00 0x00}
xmm0 = {0x00 0x0f 0x00 0x00 0x80 0x07 0x00 0x00 0x80 0x07 0x00 0x00
0x00 0x00 0x00 0x00}
xmm1 = {0x00 0x2d 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
0x00 0x00 0x00 0x00}
xmm2 = {0x00 0x00 0x00 0x00 0x00 0x00 0xe0 0x43 0x00 0x00 0x00 0x00
0x00 0x00 0x00 0x00}
xmm3 = {0x66 0xa9 0x49 0x15 0x00 0x00 0x00 0x10 0xdf 0x00 0x00 0x00
0x00 0x00 0x00 0x00}
xmm4 = {0x6b 0xae 0x54 0x16 0x00 0x00 0x00 0x10 0x30 0x01 0x00 0x00
0x00 0x00 0x00 0x00}
xmm5 = {0x00 0x00 0x00 0x00 0x00 0x00 0xf0 0x3f 0x00 0x00 0x00 0x00
0x00 0x00 0x00 0x00}
xmm6 = {0xae 0x72 0x46 0xe8 0x8f 0x1d 0xe4 0x3f 0x00 0x00 0x00 0x00
0x00 0x00 0x00 0x00}
xmm7 = {0x6b 0xc8 0xb8 0xbe 0xd3 0xb9 0x0b 0x40 0x00 0x00 0x00 0x00
0x00 0x00 0x00 0x00}
xmm8 = {0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
0x00 0x00 0x00 0x00}
xmm9 = {0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
0x00 0x00 0x00 0x00}
xmm10 = {0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
0x00 0x00 0x00 0x00}
xmm11 = {0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
0x00 0x00 0x00 0x00}
xmm12 = {0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
0x00 0x00 0x00 0x00}
xmm13 = {0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
0x00 0x00 0x00 0x00}
xmm14 = {0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
0x00 0x00 0x00 0x00}
xmm15 = {0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
0x00 0x00 0x00 0x00}
Exception State Registers:
trapno = 0x0000000e
err = 0x00000004
faultvaddr = 0x000000010c399000
(lldb)
}}}
If I simplify my code and remove all FFmpeg encoding and file-writing code
then the application doesn't crash. The code below works while the
attached code don't.
{{{
extern "C"
{
#ifndef __STDC_CONSTANT_MACROS
#define __STDC_CONSTANT_MACROS
#endif
#include <libavcodec/avcodec.h>
#include <libavutil/imgutils.h>
#include <libavformat/avformat.h>
#include <libswscale/swscale.h>
}
#include <new>
int main()
{
fprintf(stderr, "Running mmx test\n");
av_register_all();
int mWidth = 3840;
int mHeight = 2160;
int ret;
uint8_t * pixels = new (std::nothrow) uint8_t[mWidth * mHeight * 3];
//fill buffer with zeros
memset(pixels, 0, mWidth * mHeight * 3);
SwsContext * sContext = NULL;
//create context for frame convertion
sContext = sws_getContext(mWidth, mHeight, AV_PIX_FMT_BGR24,
mWidth, mHeight, AV_PIX_FMT_YUV420P,
SWS_FAST_BILINEAR, NULL, NULL, NULL);
if (!sContext)
{
fprintf(stderr, "Could not allocate frame convertion
context!\n");
return EXIT_FAILURE;
}
//allocate frames
AVFrame * tmpFrame = NULL;
AVFrame * outFrame = NULL;
outFrame = av_frame_alloc();
if (!outFrame)
{
fprintf(stderr, "Could not create output frame\n");
return false;
}
outFrame->format = PIX_FMT_YUV420P;
outFrame->width = mWidth;
outFrame->height = mHeight;
ret = av_image_alloc(outFrame->data, outFrame->linesize, mWidth,
mHeight, PIX_FMT_YUV420P, 32);
if (ret < 0)
{
fprintf(stderr, "Could not allocate output frame\n");
return false;
}
tmpFrame = av_frame_alloc();
if (!tmpFrame)
{
fprintf(stderr, "Could not create swap frame\n");
return false;
}
tmpFrame->width = mWidth;
tmpFrame->height = mHeight;
tmpFrame->format = PIX_FMT_BGR24;
//fill with random values
//for(unsigned int i=0; i<mWidth * mHeight * 3;i++)
// pixels[i]= static_cast<uint8_t>( rand()%256 );
//convert
// Fill picture with image
avpicture_fill((AVPicture*)tmpFrame, pixels, PIX_FMT_BGR24, mWidth,
mHeight);
// Flipping frame
tmpFrame->data[0] += tmpFrame->linesize[0]*(mHeight-1);
// Flipping frame
tmpFrame->linesize[0] = -tmpFrame->linesize[0];
fprintf(stderr, "Converting to YUV420\n");
//convert BGR24 to YUV420
ret = sws_scale(sContext, tmpFrame->data, tmpFrame->linesize, 0,
mHeight, outFrame->data, outFrame->linesize);
if (ret < 0)
{
fprintf(stderr, "Failed to convert frame to YUV420!\n");
}
//Cleanup
if (tmpFrame)
{
av_frame_free(&tmpFrame);
}
if (outFrame)
{
av_freep(&outFrame->data[0]);
av_frame_free(&outFrame);
}
if (sContext)
sws_freeContext(sContext);
delete [] pixels;
// Exit program
exit( EXIT_SUCCESS );
}
}}}
--
Ticket URL: <https://trac.ffmpeg.org/ticket/3451#comment:5>
FFmpeg <https://ffmpeg.org>
FFmpeg issue tracker
More information about the FFmpeg-trac
mailing list