[FFmpeg-trac] #4877(swscale:new): API: swscale crash with slices

Thu Sep 24 00:02:09 CEST 2015

#4877: API: swscale crash with slices
---------------------------------+--------------------------------------
             Reporter:  rxt      |                     Type:  defect
               Status:  new      |                 Priority:  normal
            Component:  swscale  |                  Version:  git-master
             Keywords:           |               Blocked By:
             Blocking:           |  Reproduced by developer:  0
Analyzed by developer:  0        |
---------------------------------+--------------------------------------
 Summary of the bug:
 The attached code crashes in swscale at the second slice.
 If I undefine FILTER_NEW in swscale_internal.h it works.

 Version
 ffmpeg version N-74819-g3441fef Copyright (c) 2000-2015 the FFmpeg
 developers
   built with gcc 4.9.2 (Debian 4.9.2-10)
   configuration: --enable-gpl --enable-postproc --enable-libopencore-amrnb
 --enable-libopencore-amrwb --enable-nonfree --enable-version3 --enable-
 libmp3lame --samples=fate-suite/
   libavutil      55.  2.100 / 55.  2.100
   libavcodec     57.  3.100 / 57.  3.100
   libavformat    57.  2.100 / 57.  2.100
   libavdevice    57.  0.100 / 57.  0.100
   libavfilter     6.  8.100 /  6.  8.100
   libswscale      4.  0.100 /  4.  0.100
   libswresample   2.  0.100 /  2.  0.100
   libpostproc    54.  0.100 / 54.  0.100


 How to reproduce:
 Compile and run the follwing code (also attached)

 {{{
 gcc -g scaling_r.c  -L/usr/local/lib -lswscale -lm -lavutil -o scaling_r
 }}}

 {{{
 #include <libavutil/imgutils.h>
 #include <libswscale/swscale.h>

 int main()
 {
     uint8_t *src_data[4], *dst_data[4];
     int src_linesize[4], dst_linesize[4];
     int src_w, src_h, dst_w, dst_h;
     struct SwsContext *sws_ctx;
     int i;

     src_w = 720;
     src_h = 480;
     dst_w = 720;
     dst_h = 540;

     sws_ctx = sws_getContext(src_w, src_h, AV_PIX_FMT_YUV420P,
                              dst_w, dst_h, AV_PIX_FMT_RGB24,
                              SWS_PRINT_INFO|SWS_BICUBIC, NULL, NULL,
 NULL);

     src_linesize[0] = FFALIGN(src_w,16);
     src_data[0] = av_malloc(src_linesize[0]*src_h+16);
     src_linesize[1] = src_linesize[2]=FFALIGN(src_w/2,16);
     src_data[1] = av_malloc(src_linesize[1]*src_h+16);
     src_data[2] = av_malloc(src_linesize[2]*src_h+16);

     dst_linesize[0] = FFALIGN(dst_w*3,16);
     dst_data[0] = av_malloc(dst_linesize[0]*dst_h+16);


     for (i = 0; i < src_h; i+=16) {
         fprintf(stderr, "pos %d\n", i);
         sws_scale(sws_ctx, (const uint8_t * const*)src_data,
                   src_linesize, i, 16, dst_data, dst_linesize);

     }

     return 0;
 }
 }}}

 {{{
 r at blacktower:/usr/local/src/ffmpeg/doc/examples$ ./scaling_r[swscaler @
 0x1409040] bicubic scaler, from yuv420p to rgb24 using MMXEXT
 pos 0
 [swscaler @ 0x1409040] Warning: dstStride is not aligned!
          ->cannot do aligned memory accesses anymore
 [swscaler @ 0x1409040] Warning: data is not aligned! This can lead to a
 speedloss
 pos 16
 Errore di segmentazione
 }}}

 gdb output
 {{{
 r at blacktower:/usr/local/src/ffmpeg/doc/examples$ gdb ./scaling_r
 GNU gdb (Debian 7.7.1+dfsg-5) 7.7.1
 Copyright (C) 2014 Free Software Foundation, Inc.
 License GPLv3+: GNU GPL version 3 or later
 <http://gnu.org/licenses/gpl.html>
 This is free software: you are free to change and redistribute it.
 There is NO WARRANTY, to the extent permitted by law.  Type "show copying"
 and "show warranty" for details.
 This GDB was configured as "x86_64-linux-gnu".
 Type "show configuration" for configuration details.
 For bug reporting instructions, please see:
 <http://www.gnu.org/software/gdb/bugs/>.
 Find the GDB manual and other documentation resources online at:
 <http://www.gnu.org/software/gdb/documentation/>.
 For help, type "help".
 Type "apropos word" to search for commands related to "word"...
 Reading symbols from ./scaling_r...done.
 (gdb) run
 Starting program: /usr/local/src/ffmpeg/doc/examples/scaling_r
 [swscaler @ 0x6b8040] bicubic scaler, from yuv420p to rgb24 using MMXEXT
 pos 0
 [swscaler @ 0x6b8040] Warning: dstStride is not aligned!
          ->cannot do aligned memory accesses anymore
 [swscaler @ 0x6b8040] Warning: data is not aligned! This can lead to a
 speedloss
 pos 16

 Program received signal SIGSEGV, Segmentation fault.
 0x000000000042c1b5 in yuv2rgb_X_c_template (hasAlpha=0,
     target=AV_PIX_FMT_RGB24, y=15, dstW=720, dest=0x7ffff7e27040 "",
     alpSrc=0x0, chrFilterSize=4, chrVSrc=0x6d47a8, chrUSrc=0x6c77e8,
     chrFilter=0x6cced8, lumFilterSize=4, lumSrc=0x6cb640,
 lumFilter=0x6c8e98,
     c=<optimized out>) at libswscale/output.c:1340
 1340                Y1 += lumSrc[j][i * 2]     * lumFilter[j];
 (gbd)
 }}}

 Backtrace
 {{{
 (gdb) bt
 #0  0x000000000042c1b5 in yuv2rgb_X_c_template (hasAlpha=0,
     target=AV_PIX_FMT_RGB24, y=15, dstW=720, dest=0x7ffff7e27040 "",
     alpSrc=0x0, chrFilterSize=4, chrVSrc=0x6d47a8, chrUSrc=0x6c77e8,
     chrFilter=0x6cced8, lumFilterSize=4, lumSrc=0x6cb640,
 lumFilter=0x6c8e98,
     c=<optimized out>) at libswscale/output.c:1340
 #1  yuv2rgb24_X_c (c=0x6e1000, lumFilter=0x6c8e98, lumSrc=0x6cb640,
     lumFilterSize=4, chrFilter=0x6cced8, chrUSrc=0x6c77e8,
 chrVSrc=0x6d47a8,
     chrFilterSize=4, alpSrc=0x0, dest=0x7ffff7e27040 "", dstW=720, y=15)
     at libswscale/output.c:1515
 #2  0x0000000000410dae in packed_vscale (c=<optimized out>,
     desc=<optimized out>, sliceY=15, sliceH=<optimized out>)
     at libswscale/vscale.c:129
 #3  0x000000000040c551 in swscale (c=0x6b8040, src=0x6c8e20,
     srcStride=0xfffffe9c, srcSliceY=7124536, srcSliceH=7124496,
 dst=0x6c77e8,
     dstStride=0x7fffffffe0c0) at libswscale/swscale.c:686
 #4  0x000000000040d8da in sws_scale (c=0x6b8040, srcSlice=0x7fffffffe0d0,
     srcStride=0x6cb640, srcSliceY=4, srcSliceH=7130840,
 dst=0x7fffffffe0f0,
     dstStride=0x7fffffffe160) at libswscale/swscale.c:1267
 #5  0x000000000040b49b in main () at scaling_r.c:33
 }}}

 Disassemble and registers
 {{{
 (gdb) disass $pc-32,$pc+32
 Dump of assembler code from 0x42c195 to 0x42c1d5:
    0x000000000042c195 <yuv2rgb24_X_c+85>:       xor    %edi,%edi
    0x000000000042c197 <yuv2rgb24_X_c+87>:       mov    $0x40000,%ebx
    0x000000000042c19c <yuv2rgb24_X_c+92>:       mov    $0x40000,%r14d
    0x000000000042c1a2 <yuv2rgb24_X_c+98>:       nopw   0x0(%rax,%rax,1)
    0x000000000042c1a8 <yuv2rgb24_X_c+104>:      mov    (%rdx,%rdi,8),%rax
    0x000000000042c1ac <yuv2rgb24_X_c+108>:      movswl (%rsi,%rdi,2),%r11d
    0x000000000042c1b1 <yuv2rgb24_X_c+113>:      add    $0x1,%rdi
 => 0x000000000042c1b5 <yuv2rgb24_X_c+117>:      movswl (%rax,%r12,1),%r10d
    0x000000000042c1ba <yuv2rgb24_X_c+122>:      movswl (%rax,%r15,1),%eax
    0x000000000042c1bf <yuv2rgb24_X_c+127>:      imul   %r11d,%r10d
    0x000000000042c1c3 <yuv2rgb24_X_c+131>:      imul   %r11d,%eax
    0x000000000042c1c7 <yuv2rgb24_X_c+135>:      add    %r10d,%r14d
    0x000000000042c1ca <yuv2rgb24_X_c+138>:      add    %eax,%ebx
    0x000000000042c1cc <yuv2rgb24_X_c+140>:      cmp    %edi,%ecx
    0x000000000042c1ce <yuv2rgb24_X_c+142>:      jg     0x42c1a8
 <yuv2rgb24_X_c+104>
    0x000000000042c1d0 <yuv2rgb24_X_c+144>:      sar    $0x13,%r14d
    0x000000000042c1d4 <yuv2rgb24_X_c+148>:      sar    $0x13,%ebx
 End of assembler dump.
 (gdb) info all-registers
 rax            0x40000000000000 18014398509481984
 rbx            0x40000  262144
 rcx            0x4      4
 rdx            0x6cb640 7124544
 rsi            0x6c8e98 7114392
 rdi            0x1      1
 rbp            0x0      0x0
 rsp            0x7fffffffddd8   0x7fffffffddd8
 r8             0x6cced8 7130840
 r9             0x6c77e8 7108584
 r10            0x6c8e20 7114272
 r11            0xfffffe9c       4294966940
 r12            0x0      0
 r13            0x6d47a8 7161768
 r14            0x40000  262144
 r15            0x2      2
 rip            0x42c1b5 0x42c1b5 <yuv2rgb24_X_c+117>
 eflags         0x10202  [ IF RF ]
 cs             0x33     51
 ss             0x2b     43
 ds             0x0      0
 es             0x0      0
 fs             0x0      0
 gs             0x0      0
 st0            0        (raw 0x00000000000000000000)
 st1            0        (raw 0x00000000000000000000)
 st2            0        (raw 0x00000000000000000000)
 st3            0        (raw 0x00000000000000000000)
 st4            0        (raw 0x00000000000000000000)
 st5            0        (raw 0x00000000000000000000)
 st6            0        (raw 0x00000000000000000000)
 st7            0        (raw 0x00000000000000000000)
 fctrl          0x37f    895
 fstat          0x0      0
 ftag           0xffff   65535
 fiseg          0x0      0
 fioff          0x0      0
 foseg          0x0      0
 fooff          0x0      0
 fop            0x0      0
 mxcsr          0x1fa0   [ PE IM DM ZM OM UM PM ]
 ymm0           {v8_float = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
   v4_double = {0x0, 0x0, 0x0, 0x0}, v32_int8 = {0x0 <repeats 32 times>},
   v16_int16 = {0x0 <repeats 16 times>}, v8_int32 = {0x0, 0x0, 0x0, 0x0,
 0x0,
     0x0, 0x0, 0x0}, v4_int64 = {0x0, 0x0, 0x0, 0x0}, v2_int128 = {
     0x00000000000000000000000000000000,
 0x00000000000000000000000000000000}}
 ymm1           {v8_float = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
   v4_double = {0x0, 0x0, 0x0, 0x0}, v32_int8 = {0x0 <repeats 32 times>},
   v16_int16 = {0x0 <repeats 16 times>}, v8_int32 = {0x0, 0x0, 0x0, 0x0,
 0x0,
     0x0, 0x0, 0x0}, v4_int64 = {0x0, 0x0, 0x0, 0x0}, v2_int128 = {
     0x00000000000000000000000000000000,
 0x00000000000000000000000000000000}}
 ymm2           {v8_float = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
   v4_double = {0x8000000000000000, 0x8000000000000000, 0x0, 0x0}, v32_int8
 = {
     0x61, 0x74, 0x61, 0x20, 0x69, 0x73, 0x20, 0x6e, 0x6f, 0x74, 0x20,
 0x61,
     0x6c, 0x69, 0x67, 0x6e, 0x0 <repeats 16 times>}, v16_int16 = {0x7461,
     0x2061, 0x7369, 0x6e20, 0x746f, 0x6120, 0x696c, 0x6e67, 0x0, 0x0, 0x0,
     0x0, 0x0, 0x0, 0x0, 0x0}, v8_int32 = {0x20617461, 0x6e207369,
 0x6120746f,
     0x6e67696c, 0x0, 0x0, 0x0, 0x0}, v4_int64 = {0x6e20736920617461,
     0x6e67696c6120746f, 0x0, 0x0}, v2_int128 = {
     0x6e67696c6120746f6e20736920617461,
 0x00000000000000000000000000000000}}
 ymm3           {v8_float = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
   v4_double = {0x0, 0x0, 0x0, 0x0}, v32_int8 = {0x0 <repeats 32 times>},
   v16_int16 = {0x0 <repeats 16 times>}, v8_int32 = {0x0, 0x0, 0x0, 0x0,
 0x0,
     0x0, 0x0, 0x0}, v4_int64 = {0x0, 0x0, 0x0, 0x0}, v2_int128 = {
     0x00000000000000000000000000000000,
 0x00000000000000000000000000000000}}
 ymm4           {v8_float = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
   v4_double = {0x0, 0x0, 0x0, 0x0}, v32_int8 = {0x0 <repeats 32 times>},
   v16_int16 = {0x0 <repeats 16 times>}, v8_int32 = {0x0, 0x0, 0x0, 0x0,
 0x0,
     0x0, 0x0, 0x0}, v4_int64 = {0x0, 0x0, 0x0, 0x0}, v2_int128 = {
     0x00000000000000000000000000000000,
 0x00000000000000000000000000000000}}
 ymm5           {v8_float = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
   v4_double = {0x0, 0x0, 0x0, 0x0}, v32_int8 = {0x0 <repeats 32 times>},
   v16_int16 = {0x0 <repeats 16 times>}, v8_int32 = {0x0, 0x0, 0x0, 0x0,
 0x0,
     0x0, 0x0, 0x0}, v4_int64 = {0x0, 0x0, 0x0, 0x0}, v2_int128 = {
     0x00000000000000000000000000000000,
 0x00000000000000000000000000000000}}
 ymm6           {v8_float = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
   v4_double = {0x0, 0x0, 0x0, 0x0}, v32_int8 = {0x0 <repeats 32 times>},
   v16_int16 = {0x0 <repeats 16 times>}, v8_int32 = {0x0, 0x0, 0x0, 0x0,
 0x0,
     0x0, 0x0, 0x0}, v4_int64 = {0x0, 0x0, 0x0, 0x0}, v2_int128 = {
     0x00000000000000000000000000000000,
 0x00000000000000000000000000000000}}
 ymm7           {v8_float = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
   v4_double = {0x0, 0x0, 0x0, 0x0}, v32_int8 = {0x0 <repeats 32 times>},
   v16_int16 = {0x0 <repeats 16 times>}, v8_int32 = {0x0, 0x0, 0x0, 0x0,
 0x0,
     0x0, 0x0, 0x0}, v4_int64 = {0x0, 0x0, 0x0, 0x0}, v2_int128 = {
     0x00000000000000000000000000000000,
 0x00000000000000000000000000000000}}
 ymm8           {v8_float = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
   v4_double = {0x0, 0x0, 0x0, 0x0}, v32_int8 = {0x0 <repeats 32 times>},
   v16_int16 = {0x0 <repeats 16 times>}, v8_int32 = {0x0, 0x0, 0x0, 0x0,
 0x0,
     0x0, 0x0, 0x0}, v4_int64 = {0x0, 0x0, 0x0, 0x0}, v2_int128 = {
     0x00000000000000000000000000000000,
 0x00000000000000000000000000000000}}
 ymm9           {v8_float = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
   v4_double = {0x0, 0x0, 0x0, 0x0}, v32_int8 = {0x0 <repeats 32 times>},
   v16_int16 = {0x0 <repeats 16 times>}, v8_int32 = {0x0, 0x0, 0x0, 0x0,
 0x0,
     0x0, 0x0, 0x0}, v4_int64 = {0x0, 0x0, 0x0, 0x0}, v2_int128 = {
     0x00000000000000000000000000000000,
 0x00000000000000000000000000000000}}
 ymm10          {v8_float = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
   v4_double = {0x0, 0x0, 0x0, 0x0}, v32_int8 = {0x0 <repeats 32 times>},
   v16_int16 = {0x0 <repeats 16 times>}, v8_int32 = {0x0, 0x0, 0x0, 0x0,
 0x0,
     0x0, 0x0, 0x0}, v4_int64 = {0x0, 0x0, 0x0, 0x0}, v2_int128 = {
     0x00000000000000000000000000000000,
 0x00000000000000000000000000000000}}
 ymm11          {v8_float = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
   v4_double = {0x0, 0x0, 0x0, 0x0}, v32_int8 = {0x0 <repeats 32 times>},
   v16_int16 = {0x0 <repeats 16 times>}, v8_int32 = {0x0, 0x0, 0x0, 0x0,
 0x0,
     0x0, 0x0, 0x0}, v4_int64 = {0x0, 0x0, 0x0, 0x0}, v2_int128 = {
     0x00000000000000000000000000000000,
 0x00000000000000000000000000000000}}
 ymm12          {v8_float = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
   v4_double = {0x0, 0x0, 0x0, 0x0}, v32_int8 = {0x0 <repeats 13 times>,
 0xff,
     0x0 <repeats 18 times>}, v16_int16 = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
     0xff00, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v8_int32 = {0x0,
     0x0, 0x0, 0xff00, 0x0, 0x0, 0x0, 0x0}, v4_int64 = {0x0,
 0xff0000000000,
     0x0, 0x0}, v2_int128 = {0x0000ff00000000000000000000000000,
     0x00000000000000000000000000000000}}
 ymm13          {v8_float = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
   v4_double = {0x0, 0x0, 0x0, 0x0}, v32_int8 = {0x0 <repeats 32 times>},
   v16_int16 = {0x0 <repeats 16 times>}, v8_int32 = {0x0, 0x0, 0x0, 0x0,
 0x0,
     0x0, 0x0, 0x0}, v4_int64 = {0x0, 0x0, 0x0, 0x0}, v2_int128 = {
     0x00000000000000000000000000000000,
 0x00000000000000000000000000000000}}
 ymm14          {v8_float = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
   v4_double = {0x0, 0x0, 0x0, 0x0}, v32_int8 = {0x0 <repeats 32 times>},
   v16_int16 = {0x0 <repeats 16 times>}, v8_int32 = {0x0, 0x0, 0x0, 0x0,
 0x0,
     0x0, 0x0, 0x0}, v4_int64 = {0x0, 0x0, 0x0, 0x0}, v2_int128 = {
     0x00000000000000000000000000000000,
 0x00000000000000000000000000000000}}
 ymm15          {v8_float = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
   v4_double = {0x0, 0x0, 0x0, 0x0}, v32_int8 = {0x0 <repeats 32 times>},
   v16_int16 = {0x0 <repeats 16 times>}, v8_int32 = {0x0, 0x0, 0x0, 0x0,
 0x0,
     0x0, 0x0, 0x0}, v4_int64 = {0x0, 0x0, 0x0, 0x0}, v2_int128 = {
     0x00000000000000000000000000000000,
 0x00000000000000000000000000000000}}
 }}}

--
Ticket URL: <https://trac.ffmpeg.org/ticket/4877>
FFmpeg <https://ffmpeg.org>
FFmpeg issue tracker